# Object creation

In [1]:
s = pd.Series([1, 3, 5, np.nan, 6, 9])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    9.0
dtype: float64

In [2]:
dates = pd.date_range('20200622', periods=6)
dates

DatetimeIndex(['2020-06-22', '2020-06-23', '2020-06-24', '2020-06-25',
               '2020-06-26', '2020-06-27'],
              dtype='datetime64[ns]', freq='D')

In [3]:
df = pd.DataFrame(np.random.randn(6, 4), index = dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2020-06-22,0.825774,0.109317,-0.330521,-0.552342
2020-06-23,0.294168,-0.47744,0.274235,-1.579116
2020-06-24,-1.089297,0.211316,-0.326994,1.858009
2020-06-25,1.427319,0.034961,-1.870035,0.795084
2020-06-26,1.235706,-2.473946,-0.690439,0.166659
2020-06-27,-0.440443,0.222631,-0.264885,-0.02637


In [4]:
df2 = pd.DataFrame({
    'a': 1.,
    'b': pd.Timestamp('20200622'),
    'c': pd.Series(1, index=list(range(4)), dtype = 'float32'),
    'd': np.array([3] * 4, dtype='int32'),
    'e': pd.Categorical(["test", "train", "test","train"]),
    'f': 'foo'
    
})
df2

Unnamed: 0,a,b,c,d,e,f
0,1.0,2020-06-22,1.0,3,test,foo
1,1.0,2020-06-22,1.0,3,train,foo
2,1.0,2020-06-22,1.0,3,test,foo
3,1.0,2020-06-22,1.0,3,train,foo


In [5]:
df2.dtypes # dtypes

a           float64
b    datetime64[ns]
c           float32
d             int32
e          category
f            object
dtype: object

# Viewing data

In [6]:
df.head()

Unnamed: 0,A,B,C,D
2020-06-22,0.480312,0.571861,1.125708,-0.838538
2020-06-23,-0.45642,0.159586,-0.335346,0.346654
2020-06-24,1.166765,0.746158,-1.990102,0.081215
2020-06-25,0.400445,1.692777,0.163886,-0.079004
2020-06-26,0.33602,1.618398,1.383444,-1.207175


In [7]:
df.tail(2)

Unnamed: 0,A,B,C,D
2020-06-26,0.33602,1.618398,1.383444,-1.207175
2020-06-27,-1.581854,0.040083,-0.682903,0.561686


In [8]:
df.index

DatetimeIndex(['2020-06-22', '2020-06-23', '2020-06-24', '2020-06-25',
               '2020-06-26', '2020-06-27'],
              dtype='datetime64[ns]', freq='D')

In [9]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [10]:
df2.to_numpy()

array([[1.0, Timestamp('2020-06-22 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2020-06-22 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2020-06-22 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2020-06-22 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

In [13]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.057544,0.80481,-0.055886,-0.189193
std,0.95457,0.708412,1.243268,0.691975
min,-1.581854,0.040083,-1.990102,-1.207175
25%,-0.25831,0.262654,-0.596014,-0.648654
50%,0.368232,0.659009,-0.08573,0.001106
75%,0.460345,1.400338,0.885252,0.280295
max,1.166765,1.692777,1.383444,0.561686


In [16]:
df.T

Unnamed: 0,2020-06-22,2020-06-23,2020-06-24,2020-06-25,2020-06-26,2020-06-27
A,0.480312,-0.45642,1.166765,0.400445,0.33602,-1.581854
B,0.571861,0.159586,0.746158,1.692777,1.618398,0.040083
C,1.125708,-0.335346,-1.990102,0.163886,1.383444,-0.682903
D,-0.838538,0.346654,0.081215,-0.079004,-1.207175,0.561686


In [25]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2020-06-22,-0.838538,1.125708,0.571861,0.480312
2020-06-23,0.346654,-0.335346,0.159586,-0.45642
2020-06-24,0.081215,-1.990102,0.746158,1.166765
2020-06-25,-0.079004,0.163886,1.692777,0.400445
2020-06-26,-1.207175,1.383444,1.618398,0.33602
2020-06-27,0.561686,-0.682903,0.040083,-1.581854


In [41]:
df.sort_values('A')

Unnamed: 0,A,B,C,D
2020-06-27,-1.581854,0.040083,-0.682903,0.561686
2020-06-23,-0.45642,0.159586,-0.335346,0.346654
2020-06-26,0.33602,1.618398,1.383444,-1.207175
2020-06-25,0.400445,1.692777,0.163886,-0.079004
2020-06-22,0.480312,0.571861,1.125708,-0.838538
2020-06-24,1.166765,0.746158,-1.990102,0.081215


In [42]:
df

Unnamed: 0,A,B,C,D
2020-06-22,0.480312,0.571861,1.125708,-0.838538
2020-06-23,-0.45642,0.159586,-0.335346,0.346654
2020-06-24,1.166765,0.746158,-1.990102,0.081215
2020-06-25,0.400445,1.692777,0.163886,-0.079004
2020-06-26,0.33602,1.618398,1.383444,-1.207175
2020-06-27,-1.581854,0.040083,-0.682903,0.561686


In [43]:
df[0:3] # slice exclude end #.

Unnamed: 0,A,B,C,D
2020-06-22,0.480312,0.571861,1.125708,-0.838538
2020-06-23,-0.45642,0.159586,-0.335346,0.346654
2020-06-24,1.166765,0.746158,-1.990102,0.081215


In [44]:
df['2020-06-22':'2020-06-25'] # slice include end value.

Unnamed: 0,A,B,C,D
2020-06-22,0.480312,0.571861,1.125708,-0.838538
2020-06-23,-0.45642,0.159586,-0.335346,0.346654
2020-06-24,1.166765,0.746158,-1.990102,0.081215
2020-06-25,0.400445,1.692777,0.163886,-0.079004


# Selection by label

In [47]:
df.loc['2020-06-22': '2020-06-24', ['B','C']]

Unnamed: 0,B,C
2020-06-22,0.571861,1.125708
2020-06-23,0.159586,-0.335346
2020-06-24,0.746158,-1.990102


In [51]:
df.loc[dates[0], "A"]

0.48031153340019483

In [52]:
df.at[dates[0], "A"]

0.48031153340019483

# Selection by Position

In [53]:
df.iloc[3]

A    0.400445
B    1.692777
C    0.163886
D   -0.079004
Name: 2020-06-25 00:00:00, dtype: float64

In [55]:
df.loc[dates[3]]

A    0.400445
B    1.692777
C    0.163886
D   -0.079004
Name: 2020-06-25 00:00:00, dtype: float64

In [60]:
df.iloc[1:3, 1:3] # range slice

Unnamed: 0,B,C
2020-06-23,0.159586,-0.335346
2020-06-24,0.746158,-1.990102


In [61]:
df.iloc[[1,2,3], [0,2]] # select 

Unnamed: 0,A,C
2020-06-23,-0.45642,-0.335346
2020-06-24,1.166765,-1.990102
2020-06-25,0.400445,0.163886


# Boolean indexing

In [5]:
df

Unnamed: 0,A,B,C,D
2020-06-22,0.825774,0.109317,-0.330521,-0.552342
2020-06-23,0.294168,-0.47744,0.274235,-1.579116
2020-06-24,-1.089297,0.211316,-0.326994,1.858009
2020-06-25,1.427319,0.034961,-1.870035,0.795084
2020-06-26,1.235706,-2.473946,-0.690439,0.166659
2020-06-27,-0.440443,0.222631,-0.264885,-0.02637


In [4]:
df[df['A']>0]

Unnamed: 0,A,B,C,D
2020-06-22,0.825774,0.109317,-0.330521,-0.552342
2020-06-23,0.294168,-0.47744,0.274235,-1.579116
2020-06-25,1.427319,0.034961,-1.870035,0.795084
2020-06-26,1.235706,-2.473946,-0.690439,0.166659


In [6]:
df[df>0]

Unnamed: 0,A,B,C,D
2020-06-22,0.825774,0.109317,,
2020-06-23,0.294168,,0.274235,
2020-06-24,,0.211316,,1.858009
2020-06-25,1.427319,0.034961,,0.795084
2020-06-26,1.235706,,,0.166659
2020-06-27,,0.222631,,


In [8]:
df2 = df.copy()
df2['E'] = ['one','one','two','three','four','three']
df2

Unnamed: 0,A,B,C,D,E
2020-06-22,0.825774,0.109317,-0.330521,-0.552342,one
2020-06-23,0.294168,-0.47744,0.274235,-1.579116,one
2020-06-24,-1.089297,0.211316,-0.326994,1.858009,two
2020-06-25,1.427319,0.034961,-1.870035,0.795084,three
2020-06-26,1.235706,-2.473946,-0.690439,0.166659,four
2020-06-27,-0.440443,0.222631,-0.264885,-0.02637,three


In [12]:
df2.E.isin(['one', 'two'])

2020-06-22     True
2020-06-23     True
2020-06-24     True
2020-06-25    False
2020-06-26    False
2020-06-27    False
Freq: D, Name: E, dtype: bool

In [14]:
df2[df2['E'].isin(['two','four'])]

Unnamed: 0,A,B,C,D,E
2020-06-24,-1.089297,0.211316,-0.326994,1.858009,two
2020-06-26,1.235706,-2.473946,-0.690439,0.166659,four


In [16]:
df2.E.isin(['two','four'])

2020-06-22    False
2020-06-23    False
2020-06-24     True
2020-06-25    False
2020-06-26     True
2020-06-27    False
Freq: D, Name: E, dtype: bool

# Setting

In [None]:
%time : 