- You'll hone your pandas skills by learning how to organize, reshape, and aggregate multiple data sets to answer your specific questions. 
- Pandas: Two-dimensional size-mutable, potentially heterogeneous tabular data structure with labeled axes (rows and columns). Arithmetic operations align on both row and column labels. Can be thought of as a dict-like container for Series objects. The primary pandas data structure.

##### Pandas is capable of many tasks including:

- Reading/writing many different data formats 
- Selecting subsets of data
- Calculating across rows and down columns 
- Finding and filling missing data 
- Applying operations to independent groups within the data 
- Reshaping data into different forms 
- Combing multiple datasets together 
- Advanced time-series functionality 
- Visualization through matplotlib and seaborn 

Although pandas is very capable, it does not provide functionality for the entire data science pipeline. 
Pandas is typically the intermediate tool used for data exploration and cleaning squashed between data capturing and storage, and data modeling and predicting.

reference: https://pandas.pydata.org/pandas-docs/stable/overview.html

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


purchase_1 = pd.Series({'Name': 'Chris',
                        'Item Purchased': 'Dog Food',
                        'Cost': 22.50})
purchase_2 = pd.Series({'Name': 'Kevyn',
                        'Item Purchased': 'Kitty Litter',
                        'Cost': 2.50})
purchase_3 = pd.Series({'Name': 'Vinod',
                        'Item Purchased': 'Bird Seed',
                        'Cost': 5.00})
df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 1', 'Store 2'])
df.head()



In [None]:
df.iloc[2]

In [None]:
df.T.loc['Cost']

In [None]:
df.loc['Store 1','Cost']

In [None]:
df

In [None]:
df.loc[['Store 1'],'Cost']

In [None]:
df.drop('Store 1') # won`t really change the df

In [None]:
df

In [None]:
copy_df = df.copy()
copy_df = copy_df.drop('Store 1')
copy_df

In [None]:
copy_df.drop

In [None]:
#del copy_df['Name']
df

In [None]:
dates = pd.date_range('20130101', periods=6)
dates

In [None]:
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))
df

In [None]:
df2 = pd.DataFrame({ 'A' : 1.,
                        'B' : pd.Timestamp('20130102'),
                         'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
                       'D' : np.array([3] * 4,dtype='int32'),
                         'E' : pd.Categorical(["test","train","test","train"]),
                        'F' : 'foo' })

In [None]:
df2.dtypes

In [None]:
df2.describe()

In [None]:
df.sort_index(axis=1, ascending=False)

In [None]:
df.sort_values(by='B',ascending=False)

In [None]:
df.loc[dates[0]]


In [None]:
df.loc['20130102':'20130104',['A','B']]

In [None]:
 df.loc['20130102',['A','B']]

In [None]:
df.loc[dates[0],'A'] #取标量


In [None]:
df.iloc[[1,2,4],[0,2]] #iloc使用索引标号定位，loc使用索引值定位

In [None]:
# boolean indexing
df[df.A>0]

In [None]:
df[df<0] # 基于值选择

In [None]:
df2 = df.copy()
df2['E'] = ['one','one','two','three','four','three']
df2[df2['E'].isin(['two','four'])] # isin() for filter

In [None]:
s1 = pd.Series([1,2,3,4,5,6], index=pd.date_range('20130102', periods=6))
s1

In [None]:
df['F']=s1 # add column( index should be matched )
df

In [None]:
df.at[dates[0],'A']=0 # update value by label
df

In [None]:
df.iat[0,1]=1 # setting value by position
df

In [None]:
df.loc[:,'D'] = np.array([5]*len(df)) # use numpy array setting values 
df

In [None]:
df.loc['2013-01-04',:] = np.array([1,2,3,4])
df

In [None]:
df2 = df.copy()
df2[df2>0] = -df2 #  use where to update the value
df2

In [None]:
##Missing Data
#Reindexing allows you to change/add/delete the index on a specified axis. This returns a copy of the data.


df1 = df.reindex(index=dates[0:4],columns = list(df.columns) + ['E'])
df1.loc[dates[0]:dates[1],'E'] = 1
df1

In [None]:
#To drop any rows that have missing data. not really
df2 = df1.dropna(how='any')
df2

In [None]:
df2 = df1.fillna(value= 5)
df2

In [None]:
df2[df2>0] = 0
df2

In [None]:
df2.loc['20130103',:] = None


In [None]:
pd.isna(df1)

In [None]:
# Operations

df.mean()

In [None]:
df.mean(1) # 选择axis=0 or 1

In [None]:
s=pd.Series([1,3,5,np.nan,6,8],index=dates).shift(2)
#.shift(n) Shift index by desired number of periods with an optional time freq
s

In [None]:
df.sub(s,axis='index')

In [None]:
df.apply(np.cumsum) #np.cumsum Return the cumulative sum of the elements along a given axis.


In [None]:
df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 1', 'Store 2'])
df.head()
purchase_1

查看训练数据

In [None]:
df = pd.read_csv('house-prices-advanced-regression-techniques/train.csv')
df.head()

In [None]:
df.columns

In [None]:
df['SalePrice']>0

In [None]:
only_SalePrice = df.where(df['SalePrice']>0)
only_SalePrice.head()
#only_SalePrice['SalePrice'].count()

In [None]:
df.index

In [None]:
df2=df.loc[:,['Id','SalePrice','Street']]

In [None]:
df2.where(df2['SalePrice']>100000).dropna()


In [None]:
len(df[(df['SalePrice'] > 0) | (df['SalePrice'] < 0)])

In [None]:
df[(df['SalePrice'] > 0) & (df['SalePrice'] == 0)]

In [None]:
### indexing DataFrames
#df['SalePrice']=df.index #make that column to be serial numbers 
df = df.set_index('SalePrice') # just choose column to be index column
df.head()


In [None]:
#df = df.reset_index() #恢复主键
df.head()

In [None]:
df = pd.read_csv('titanic/train.csv')
df.head()

In [None]:
df['Age'].unique()

In [None]:
df = df[(df['Age']==50)]
df.head()

In [None]:
### 3.2 Pandas: Missing Values
df = pd.read_csv('titanic/train.csv')
df.head()

In [None]:
#df= df.set_index('PassengerId')
#df = df.sort_index()
df.head()

In [None]:
df = df.reset_index()
df = df.set_index(['PassengerId','Survived'])# multi fields index 
df.head()

In [None]:
df = df.fillna(method='ffill') 

#df.fillna
'''
可以直接写替换的值，
也可以用字典填充 e.g.{0:10,1:20,2:30}
method参数的取值 ： {‘pad’, ‘ffill’,‘backfill’, ‘bfill’, None}, default None

pad/ffill：用前一个非缺失值去填充该缺失值

backfill/bfill：用下一个非缺失值填充该缺失值

limit参数可以设置填充值的最大个数
axis=0按列填充，axis=1按行填充
'''
df.head()


In [280]:
### 3-3 Pandas : Merging Dataframes

df = pd.DataFrame([{'Name': 'MJ', 'Item Purchased': 'Sponge', 'Cost': 22.50},
                   {'Name': 'Kevyn', 'Item Purchased': 'Kitty Litter', 'Cost': 2.50},
                   {'Name': 'Filip', 'Item Purchased': 'Spoon', 'Cost': 5.00}],
                  index=['Store 1', 'Store 1', 'Store 2'])
df

Unnamed: 0,Cost,Item Purchased,Name
Store 1,22.5,Sponge,MJ
Store 1,2.5,Kitty Litter,Kevyn
Store 2,5.0,Spoon,Filip


In [281]:
df['Date'] = ['December 1', 'January 1', 'mid-May']
df

Unnamed: 0,Cost,Item Purchased,Name,Date
Store 1,22.5,Sponge,MJ,December 1
Store 1,2.5,Kitty Litter,Kevyn,January 1
Store 2,5.0,Spoon,Filip,mid-May


In [285]:
df['Delivered']= True
df['Feedback'] = ['Positive', None, 'Negative']
df

Unnamed: 0,Cost,Item Purchased,Name,Date,Delivered,Feedback
Store 1,22.5,Sponge,MJ,December 1,True,Positive
Store 1,2.5,Kitty Litter,Kevyn,January 1,True,
Store 2,5.0,Spoon,Filip,mid-May,True,Negative


In [292]:
adf = df.reset_index()
adf['Date'] = pd.Series({0:'December 1', 2: 'mid-May'}) # 一维的是series，二维的是dataframe
adf

Unnamed: 0,index,Cost,Item Purchased,Name,Date,Delivered,Feedback
0,Store 1,22.5,Sponge,MJ,December 1,True,Positive
1,Store 1,2.5,Kitty Litter,Kevyn,,True,
2,Store 2,5.0,Spoon,Filip,mid-May,True,Negative


In [293]:
staff_df = pd.DataFrame([{'Name': 'Kelly', 'Role': 'Director of HR'},
                         {'Name': 'Sally', 'Role': 'Course liasion'},
                         {'Name': 'James', 'Role': 'Grader'}])
staff_df = staff_df.set_index('Name')
student_df = pd.DataFrame([{'Name': 'James', 'School': 'Business'},
                           {'Name': 'Mike', 'School': 'Law'},
                           {'Name': 'Sally', 'School': 'Engineering'}])
student_df = student_df.set_index('Name')
print(staff_df.head())
print()
print(student_df.head())

                 Role
Name                 
Kelly  Director of HR
Sally  Course liasion
James          Grader

            School
Name              
James     Business
Mike           Law
Sally  Engineering


#### pd.merge
https://blog.csdn.net/brucewong0516/article/details/82707492

In [303]:
pd_merge = pd.merge(staff_df, student_df, how='outer', left_index=True, right_index=True)
# left_index : use index in the left table to link. 


In [306]:
pd.merge(staff_df,student_df, how ='inner', left_index = True, right_index = True)

Unnamed: 0_level_0,Role,School
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Sally,Course liasion,Engineering
James,Grader,Business


In [307]:
pd.merge(staff_df,student_df, how ='left', left_index = True, right_index = True)

Unnamed: 0_level_0,Role,School
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Kelly,Director of HR,
Sally,Course liasion,Engineering
James,Grader,Business


In [308]:
staff_df = staff_df.reset_index()
student_df= student_df.reset_index()
pd.merge(staff_df,student_df,how = 'left',left_on = 'Name',right_on='Name')
# left_on: which column used to link from the left table

Unnamed: 0,Name,Role,School
0,Kelly,Director of HR,
1,Sally,Course liasion,Engineering
2,James,Grader,Business


In [309]:
staff_df = pd.DataFrame([{'Name': 'Kelly', 'Role': 'Director of HR', 'Location': 'State Street'},
                         {'Name': 'Sally', 'Role': 'Course liasion', 'Location': 'Washington Avenue'},
                         {'Name': 'James', 'Role': 'Grader', 'Location': 'Washington Avenue'}])
student_df = pd.DataFrame([{'Name': 'James', 'School': 'Business', 'Location': '1024 Billiard Avenue'},
                           {'Name': 'Mike', 'School': 'Law', 'Location': 'Fraternity House #22'},
                           {'Name': 'Sally', 'School': 'Engineering', 'Location': '512 Wilson Crescent'}])
pd.merge(staff_df, student_df, how='left', left_on='Name', right_on='Name')

Unnamed: 0,Location_x,Name,Role,Location_y,School
0,State Street,Kelly,Director of HR,,
1,Washington Avenue,Sally,Course liasion,512 Wilson Crescent,Engineering
2,Washington Avenue,James,Grader,1024 Billiard Avenue,Business


In [313]:
staff_df = pd.DataFrame([{'First Name': 'Kelly', 'Last Name': 'Desjardins', 'Role': 'Director of HR'},
                         {'First Name': 'Sally', 'Last Name': 'Brooks', 'Role': 'Course liasion'},
                         {'First Name': 'James', 'Last Name': 'Wilde', 'Role': 'Grader'}])
student_df = pd.DataFrame([{'First Name': 'James', 'Last Name': 'Hammond', 'School': 'Business'},
                           {'First Name': 'Mike', 'Last Name': 'Smith', 'School': 'Law'},
                           {'First Name': 'Sally', 'Last Name': 'Brooks', 'School': 'Engineering'}])
staff_df


Unnamed: 0,First Name,Last Name,Role
0,Kelly,Desjardins,Director of HR
1,Sally,Brooks,Course liasion
2,James,Wilde,Grader


In [315]:
student_df
pd.merge(staff_df, student_df, how='inner', left_on=['First Name','Last Name'], right_on=['First Name','Last Name'])

Unnamed: 0,First Name,Last Name,Role,School
0,Sally,Brooks,Course liasion,Engineering


In [326]:
##3-4 Idiomatic Pandas: Making Code Pandorable
df = pd.read_csv('titanic/train.csv')
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.0750,,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


In [322]:
df =df[df['Age']==50]
df.set_index(['PassengerId','Survived'], inplace =True)
df.rename(columns = {'Pclass':'pclass'})

Unnamed: 0_level_0,Unnamed: 1_level_0,pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
PassengerId,Survived,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
178,0,1,"Isham, Miss. Ann Elizabeth",female,50.0,0,0,PC 17595,28.7125,C49,C
260,1,2,"Parrish, Mrs. (Lutie Davis)",female,50.0,0,1,230433,26.0,,S
300,1,1,"Baxter, Mrs. James (Helene DeLaudeniere Chaput)",female,50.0,0,1,PC 17558,247.5208,B58 B60,C
435,0,1,"Silvey, Mr. William Baird",male,50.0,1,0,13507,55.9,E44,S
459,1,2,"Toomey, Miss. Ellen",female,50.0,0,0,F.C.C. 13531,10.5,,S
483,0,3,"Rouse, Mr. Richard Henry",male,50.0,0,0,A/5 3594,8.05,,S
527,1,2,"Ridsdale, Miss. Lucy",female,50.0,0,0,W./C. 14258,10.5,,S
545,0,1,"Douglas, Mr. Walter Donald",male,50.0,1,0,PC 17761,106.425,C86,C
661,1,1,"Frauenthal, Dr. Henry William",male,50.0,2,0,PC 17611,133.65,,S
724,0,2,"Hodges, Mr. Henry Price",male,50.0,0,0,250643,13.0,,S


In [331]:
df=df.reset_index()
df=df.set_index(['PassengerId','Survived'])
df = df[df['Age'] == 50]
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
PassengerId,Survived,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
178,0,1,"Isham, Miss. Ann Elizabeth",female,50.0,0,0,PC 17595,28.7125,C49,C
260,1,2,"Parrish, Mrs. (Lutie Davis)",female,50.0,0,1,230433,26.0,,S
300,1,1,"Baxter, Mrs. James (Helene DeLaudeniere Chaput)",female,50.0,0,1,PC 17558,247.5208,B58 B60,C
435,0,1,"Silvey, Mr. William Baird",male,50.0,1,0,13507,55.9,E44,S
459,1,2,"Toomey, Miss. Ellen",female,50.0,0,0,F.C.C. 13531,10.5,,S
483,0,3,"Rouse, Mr. Richard Henry",male,50.0,0,0,A/5 3594,8.05,,S
527,1,2,"Ridsdale, Miss. Lucy",female,50.0,0,0,W./C. 14258,10.5,,S
545,0,1,"Douglas, Mr. Walter Donald",male,50.0,1,0,PC 17761,106.425,C86,C
661,1,1,"Frauenthal, Dr. Henry William",male,50.0,2,0,PC 17611,133.65,,S
724,0,2,"Hodges, Mr. Henry Price",male,50.0,0,0,250643,13.0,,S


In [333]:
# 3-5 Pandas :Group by
#DataFrame.groupby(by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False, observed=False, **kwargs)[source]

df = pd.read_csv('titanic/train.csv')
df = df[df['Age']==50]
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
177,178,0,1,"Isham, Miss. Ann Elizabeth",female,50.0,0,0,PC 17595,28.7125,C49,C
259,260,1,2,"Parrish, Mrs. (Lutie Davis)",female,50.0,0,1,230433,26.0,,S
299,300,1,1,"Baxter, Mrs. James (Helene DeLaudeniere Chaput)",female,50.0,0,1,PC 17558,247.5208,B58 B60,C
434,435,0,1,"Silvey, Mr. William Baird",male,50.0,1,0,13507,55.9,E44,S
458,459,1,2,"Toomey, Miss. Ellen",female,50.0,0,0,F.C.C. 13531,10.5,,S
482,483,0,3,"Rouse, Mr. Richard Henry",male,50.0,0,0,A/5 3594,8.05,,S
526,527,1,2,"Ridsdale, Miss. Lucy",female,50.0,0,0,W./C. 14258,10.5,,S
544,545,0,1,"Douglas, Mr. Walter Donald",male,50.0,1,0,PC 17761,106.425,C86,C
660,661,1,1,"Frauenthal, Dr. Henry William",male,50.0,2,0,PC 17611,133.65,,S
723,724,0,2,"Hodges, Mr. Henry Price",male,50.0,0,0,250643,13.0,,S


https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.groupby.html

In [344]:
df = pd.read_csv('titanic/train.csv')
df.groupby(['Survived','Sex'],axis=0).count()


Unnamed: 0_level_0,Unnamed: 1_level_0,PassengerId,Pclass,Name,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
Survived,Sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,female,81,81,81,64,81,81,81,81,6,81
0,male,468,468,468,360,468,468,468,468,62,468
1,female,233,233,233,197,233,233,233,233,91,231
1,male,109,109,109,93,109,109,109,109,45,109


In [345]:
staff_df

Unnamed: 0,First Name,Last Name,Role
0,Kelly,Desjardins,Director of HR
1,Sally,Brooks,Course liasion
2,James,Wilde,Grader


In [352]:
student_df

Unnamed: 0,First Name,Last Name,School
0,James,Hammond,Business
1,Mike,Smith,Law
2,Sally,Brooks,Engineering


In [358]:
arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
...           ['Capitve', 'Wild', 'Capitve', 'Wild']]

In [361]:
index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
df = pd.DataFrame({'Max Speed' : [390., 350., 30., 20.]},index=index)

In [368]:
df.groupby(level=0).mean()
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Max Speed
Animal,Type,Unnamed: 2_level_1
Falcon,Capitve,390.0
Falcon,Wild,350.0
Parrot,Capitve,30.0
Parrot,Wild,20.0


In [369]:
##3-6 pandas : Scales

df = pd.DataFrame(['A+', 'A', 'A-', 'B+', 'B', 'B-', 'C+', 'C', 'C-', 'D+', 'D'],
                  index=['excellent', 'excellent', 'excellent', 'good', 'good', 'good', 'ok', 'ok', 'ok', 'poor', 'poor'])
df.rename(columns={0: 'Grades'}, inplace=True)
df

Unnamed: 0,Grades
excellent,A+
excellent,A
excellent,A-
good,B+
good,B
good,B-
ok,C+
ok,C
ok,C-
poor,D+


In [371]:
#astype  fields datatype conversion。
df['Grades'].astype('category')

excellent    A+
excellent     A
excellent    A-
good         B+
good          B
good         B-
ok           C+
ok            C
ok           C-
poor         D+
poor          D
Name: Grades, dtype: category
Categories (11, object): [A, A+, A-, B, ..., C+, C-, D, D+]

In [376]:
#set orders for scales
grades = df['Grades'].astype('category',
                             categories=['D', 'D+', 'C-', 'C', 'C+', 'B-', 'B', 'B+', 'A-', 'A', 'A+'],
                             ordered=True,
                            )
grades.head()

  after removing the cwd from sys.path.


excellent    A+
excellent     A
excellent    A-
good         B+
good          B
Name: Grades, dtype: category
Categories (11, object): [D < D+ < C- < C ... B+ < A- < A < A+]

In [377]:
grades>'C'

excellent     True
excellent     True
excellent     True
good          True
good          True
good          True
ok            True
ok           False
ok           False
poor         False
poor         False
Name: Grades, dtype: bool

In [379]:
#3-6-1 pandas: Select
df.loc[df['Grades']=='A']

Unnamed: 0,Grades
excellent,A


In [395]:
#To select rows whose column value is in an iterable, some_values, use isin:判断是否在某一范围，用isin
df_test = pd.DataFrame({'A':[1,2,3],'B':[1,4,7]})
#df_test.loc[~df_test.isin({'A':[1,3],'B':[4,7,12]})] is wrong. This only support one column for ~ is the opposite of boolean
df_test.loc[~df_test['A'].isin({'A': [1, 3], 'B': [4, 7, 12]})]
df_test

Unnamed: 0,A,B
0,1,1
1,2,4
2,3,7


In [387]:
df.loc[(df['Grades']=='A')|(df['Grades']<'B')] # & | !=
df.loc[(df['Grades']!='B+')]

Unnamed: 0,Grades
excellent,A+
excellent,A
excellent,A-
good,B
good,B-
ok,C+
ok,C
ok,C-
poor,D+
poor,D


In [399]:
## 3-7 Pandas:Date Functionality
pd.Timestamp('9/1/2016 10:05AM') 

Timestamp('2016-09-01 10:05:00')

In [401]:
pd.Period('1/2016')

Period('2016-01', 'M')

In [402]:
pd.Period('3/5/2016')

Period('2016-03-05', 'D')

In [403]:
t1 = pd.Series(list('abc'), [pd.Timestamp('2016-09-01'), pd.Timestamp('2016-09-02'), pd.Timestamp('2016-09-03')])
t1

2016-09-01    a
2016-09-02    b
2016-09-03    c
dtype: object

In [404]:
type(t1.index)

pandas.core.indexes.datetimes.DatetimeIndex

In [412]:
d1 = ['2 June 2013', 'Aug 29, 2014', '2015/3/21', '7/12/16','16/6/12']
ts3 = pd.DataFrame(np.random.randint(10, 100, (5,2)), index=d1, columns=list('ab'))
ts3

Unnamed: 0,a,b
2 June 2013,46,96
"Aug 29, 2014",90,73
2015/3/21,22,91
7/12/16,21,54
16/6/12,98,19


In [421]:
ts3.index = pd.to_datetime(ts3.index)
ts3
ts3.mean()

a    55.4
b    66.6
dtype: float64

In [416]:
pd.to_datetime('4.7.12', yearfirst=True)

Timestamp('2004-07-12 00:00:00')

In [417]:
pd.Timestamp('9/3/2016')-pd.Timestamp('9/1/2016')
# timestamp calculation

Timedelta('2 days 00:00:00')

In [424]:
#3-8-2 Working with Dates in a Dataframe

dates = pd.date_range('10-01-2016', periods=9, freq='2W-WED') # freq可以理解为outlook的重复频率
dates

DatetimeIndex(['2016-10-05', '2016-10-19', '2016-11-02', '2016-11-16',
               '2016-11-30', '2016-12-14', '2016-12-28', '2017-01-11',
               '2017-01-25'],
              dtype='datetime64[ns]', freq='2W-WED')

In [430]:
df.index.ravel(order='C') # ravel 将多维数据降到一维

array(['excellent', 'excellent', 'excellent', 'good', 'good', 'good',
       'ok', 'ok', 'ok', 'poor', 'poor'], dtype=object)