In [1]:
import numpy as np
import pandas as pd

In [2]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [3]:
dates = pd.date_range('20130101', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [4]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.703547,0.25307,-0.829037,0.189186
2013-01-02,0.448998,0.204526,-0.459796,1.115702
2013-01-03,0.919487,0.632284,0.024589,1.036383
2013-01-04,0.028016,-0.222023,0.907866,1.076811
2013-01-05,0.337149,-0.9667,-0.129067,1.287065
2013-01-06,-0.020293,-1.172502,0.266152,-0.737748


In [5]:
df2 = pd.DataFrame({'A': 1.,
                    'B': pd.Timestamp('20130102'),
                    'C': pd.Series(1, index=list(range(4)), dtype='float32'),
                    'D': np.array([3] * 4, dtype='int32'),
                    'E': pd.Categorical(["test", "train", "test", "train"]),
                    'F': 'foo'})
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [7]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [8]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,-0.703547,0.25307,-0.829037,0.189186
2013-01-02,0.448998,0.204526,-0.459796,1.115702
2013-01-03,0.919487,0.632284,0.024589,1.036383
2013-01-04,0.028016,-0.222023,0.907866,1.076811
2013-01-05,0.337149,-0.9667,-0.129067,1.287065


In [9]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,0.028016,-0.222023,0.907866,1.076811
2013-01-05,0.337149,-0.9667,-0.129067,1.287065
2013-01-06,-0.020293,-1.172502,0.266152,-0.737748


In [10]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [11]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [12]:
df.to_numpy()

array([[-0.70354697,  0.25307005, -0.82903732,  0.18918633],
       [ 0.44899786,  0.20452565, -0.45979561,  1.11570153],
       [ 0.91948712,  0.63228421,  0.02458921,  1.03638289],
       [ 0.02801563, -0.22202282,  0.90786619,  1.0768111 ],
       [ 0.33714892, -0.96669987, -0.12906711,  1.28706542],
       [-0.02029317, -1.17250215,  0.26615245, -0.73774759]])

In [13]:
df2.to_numpy()

array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

In [14]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.168302,-0.211891,-0.036549,0.661233
std,0.545334,0.720394,0.600503,0.786336
min,-0.703547,-1.172502,-0.829037,-0.737748
25%,-0.008216,-0.780531,-0.377113,0.400985
50%,0.182582,-0.008749,-0.052239,1.056597
75%,0.421036,0.240934,0.205762,1.105979
max,0.919487,0.632284,0.907866,1.287065


In [15]:
df.T

Unnamed: 0,2013-01-01 00:00:00,2013-01-02 00:00:00,2013-01-03 00:00:00,2013-01-04 00:00:00,2013-01-05 00:00:00,2013-01-06 00:00:00
A,-0.703547,0.448998,0.919487,0.028016,0.337149,-0.020293
B,0.25307,0.204526,0.632284,-0.222023,-0.9667,-1.172502
C,-0.829037,-0.459796,0.024589,0.907866,-0.129067,0.266152
D,0.189186,1.115702,1.036383,1.076811,1.287065,-0.737748


In [16]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,0.189186,-0.829037,0.25307,-0.703547
2013-01-02,1.115702,-0.459796,0.204526,0.448998
2013-01-03,1.036383,0.024589,0.632284,0.919487
2013-01-04,1.076811,0.907866,-0.222023,0.028016
2013-01-05,1.287065,-0.129067,-0.9667,0.337149
2013-01-06,-0.737748,0.266152,-1.172502,-0.020293


In [17]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2013-01-06,-0.020293,-1.172502,0.266152,-0.737748
2013-01-05,0.337149,-0.9667,-0.129067,1.287065
2013-01-04,0.028016,-0.222023,0.907866,1.076811
2013-01-02,0.448998,0.204526,-0.459796,1.115702
2013-01-01,-0.703547,0.25307,-0.829037,0.189186
2013-01-03,0.919487,0.632284,0.024589,1.036383


In [18]:
df['A']

2013-01-01   -0.703547
2013-01-02    0.448998
2013-01-03    0.919487
2013-01-04    0.028016
2013-01-05    0.337149
2013-01-06   -0.020293
Freq: D, Name: A, dtype: float64

In [19]:
 df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,-0.703547,0.25307,-0.829037,0.189186
2013-01-02,0.448998,0.204526,-0.459796,1.115702
2013-01-03,0.919487,0.632284,0.024589,1.036383


In [20]:
df.loc[dates[0]]

A   -0.703547
B    0.253070
C   -0.829037
D    0.189186
Name: 2013-01-01 00:00:00, dtype: float64

In [21]:
df.loc[:, ['A', 'B']]

Unnamed: 0,A,B
2013-01-01,-0.703547,0.25307
2013-01-02,0.448998,0.204526
2013-01-03,0.919487,0.632284
2013-01-04,0.028016,-0.222023
2013-01-05,0.337149,-0.9667
2013-01-06,-0.020293,-1.172502


In [22]:
df.loc['20130102':'20130104', ['A', 'B']]

Unnamed: 0,A,B
2013-01-02,0.448998,0.204526
2013-01-03,0.919487,0.632284
2013-01-04,0.028016,-0.222023


In [23]:
df.loc['20130102', ['A', 'B']]

A    0.448998
B    0.204526
Name: 2013-01-02 00:00:00, dtype: float64

In [24]:
df.loc[dates[0], 'A']

-0.7035469670285542

In [25]:
df.at[dates[0], 'A']

-0.7035469670285542

In [26]:
df.iloc[3]

A    0.028016
B   -0.222023
C    0.907866
D    1.076811
Name: 2013-01-04 00:00:00, dtype: float64

In [27]:
df.iloc[3:5, 0:2]

Unnamed: 0,A,B
2013-01-04,0.028016,-0.222023
2013-01-05,0.337149,-0.9667


In [28]:
df.iloc[[1, 2, 4], [0, 2]]

Unnamed: 0,A,C
2013-01-02,0.448998,-0.459796
2013-01-03,0.919487,0.024589
2013-01-05,0.337149,-0.129067


In [29]:
 df.iloc[1:3, :]

Unnamed: 0,A,B,C,D
2013-01-02,0.448998,0.204526,-0.459796,1.115702
2013-01-03,0.919487,0.632284,0.024589,1.036383


In [30]:
df.iloc[:, 1:3]

Unnamed: 0,B,C
2013-01-01,0.25307,-0.829037
2013-01-02,0.204526,-0.459796
2013-01-03,0.632284,0.024589
2013-01-04,-0.222023,0.907866
2013-01-05,-0.9667,-0.129067
2013-01-06,-1.172502,0.266152


In [31]:
df.iloc[1, 1]

0.20452565327397879

In [32]:
df[df.A > 0]

Unnamed: 0,A,B,C,D
2013-01-02,0.448998,0.204526,-0.459796,1.115702
2013-01-03,0.919487,0.632284,0.024589,1.036383
2013-01-04,0.028016,-0.222023,0.907866,1.076811
2013-01-05,0.337149,-0.9667,-0.129067,1.287065


In [33]:
df[df > 0]

Unnamed: 0,A,B,C,D
2013-01-01,,0.25307,,0.189186
2013-01-02,0.448998,0.204526,,1.115702
2013-01-03,0.919487,0.632284,0.024589,1.036383
2013-01-04,0.028016,,0.907866,1.076811
2013-01-05,0.337149,,,1.287065
2013-01-06,,,0.266152,


In [34]:
df2 = df.copy()
df2['E'] = ['one', 'one', 'two', 'three', 'four', 'three']
df2

Unnamed: 0,A,B,C,D,E
2013-01-01,-0.703547,0.25307,-0.829037,0.189186,one
2013-01-02,0.448998,0.204526,-0.459796,1.115702,one
2013-01-03,0.919487,0.632284,0.024589,1.036383,two
2013-01-04,0.028016,-0.222023,0.907866,1.076811,three
2013-01-05,0.337149,-0.9667,-0.129067,1.287065,four
2013-01-06,-0.020293,-1.172502,0.266152,-0.737748,three


In [35]:
df2[df2['E'].isin(['two', 'four'])]

Unnamed: 0,A,B,C,D,E
2013-01-03,0.919487,0.632284,0.024589,1.036383,two
2013-01-05,0.337149,-0.9667,-0.129067,1.287065,four


In [36]:
s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('20130102', periods=6))
s1

2013-01-02    1
2013-01-03    2
2013-01-04    3
2013-01-05    4
2013-01-06    5
2013-01-07    6
Freq: D, dtype: int64

In [37]:
df['F'] = s1
df

Unnamed: 0,A,B,C,D,F
2013-01-01,-0.703547,0.25307,-0.829037,0.189186,
2013-01-02,0.448998,0.204526,-0.459796,1.115702,1.0
2013-01-03,0.919487,0.632284,0.024589,1.036383,2.0
2013-01-04,0.028016,-0.222023,0.907866,1.076811,3.0
2013-01-05,0.337149,-0.9667,-0.129067,1.287065,4.0
2013-01-06,-0.020293,-1.172502,0.266152,-0.737748,5.0


In [39]:
df.at[dates[0], 'A'] = 0
df

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,0.25307,-0.829037,0.189186,
2013-01-02,0.448998,0.204526,-0.459796,1.115702,1.0
2013-01-03,0.919487,0.632284,0.024589,1.036383,2.0
2013-01-04,0.028016,-0.222023,0.907866,1.076811,3.0
2013-01-05,0.337149,-0.9667,-0.129067,1.287065,4.0
2013-01-06,-0.020293,-1.172502,0.266152,-0.737748,5.0


In [41]:
df.iat[0, 1] = 0
df

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,0.0,-0.829037,0.189186,
2013-01-02,0.448998,0.204526,-0.459796,1.115702,1.0
2013-01-03,0.919487,0.632284,0.024589,1.036383,2.0
2013-01-04,0.028016,-0.222023,0.907866,1.076811,3.0
2013-01-05,0.337149,-0.9667,-0.129067,1.287065,4.0
2013-01-06,-0.020293,-1.172502,0.266152,-0.737748,5.0


In [42]:
df.loc[:, 'D'] = np.array([5] * len(df))
df

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,0.0,-0.829037,5,
2013-01-02,0.448998,0.204526,-0.459796,5,1.0
2013-01-03,0.919487,0.632284,0.024589,5,2.0
2013-01-04,0.028016,-0.222023,0.907866,5,3.0
2013-01-05,0.337149,-0.9667,-0.129067,5,4.0
2013-01-06,-0.020293,-1.172502,0.266152,5,5.0


In [43]:
df2 = df.copy()
df2[df2 > 0] = -df2
df2

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,0.0,-0.829037,-5,
2013-01-02,-0.448998,-0.204526,-0.459796,-5,-1.0
2013-01-03,-0.919487,-0.632284,-0.024589,-5,-2.0
2013-01-04,-0.028016,-0.222023,-0.907866,-5,-3.0
2013-01-05,-0.337149,-0.9667,-0.129067,-5,-4.0
2013-01-06,-0.020293,-1.172502,-0.266152,-5,-5.0


In [44]:
df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E'])
df1.loc[dates[0]:dates[1], 'E'] = 1
df1

Unnamed: 0,A,B,C,D,F,E
2013-01-01,0.0,0.0,-0.829037,5,,1.0
2013-01-02,0.448998,0.204526,-0.459796,5,1.0,1.0
2013-01-03,0.919487,0.632284,0.024589,5,2.0,
2013-01-04,0.028016,-0.222023,0.907866,5,3.0,


In [45]:
df1.dropna(how='any')

Unnamed: 0,A,B,C,D,F,E
2013-01-02,0.448998,0.204526,-0.459796,5,1.0,1.0


In [46]:
df1.fillna(value=5)

Unnamed: 0,A,B,C,D,F,E
2013-01-01,0.0,0.0,-0.829037,5,5.0,1.0
2013-01-02,0.448998,0.204526,-0.459796,5,1.0,1.0
2013-01-03,0.919487,0.632284,0.024589,5,2.0,5.0
2013-01-04,0.028016,-0.222023,0.907866,5,3.0,5.0


In [47]:
pd.isna(df1)

Unnamed: 0,A,B,C,D,F,E
2013-01-01,False,False,False,False,True,False
2013-01-02,False,False,False,False,False,False
2013-01-03,False,False,False,False,False,True
2013-01-04,False,False,False,False,False,True


In [48]:
df.mean()

A    0.285559
B   -0.254069
C   -0.036549
D    5.000000
F    3.000000
dtype: float64

In [49]:
df.mean(1)

2013-01-01    1.042741
2013-01-02    1.238746
2013-01-03    1.715272
2013-01-04    1.742772
2013-01-05    1.648276
2013-01-06    1.814671
Freq: D, dtype: float64

In [50]:
s = pd.Series([1, 3, 5, np.nan, 6, 8], index=dates).shift(2)
s    

2013-01-01    NaN
2013-01-02    NaN
2013-01-03    1.0
2013-01-04    3.0
2013-01-05    5.0
2013-01-06    NaN
Freq: D, dtype: float64

In [51]:
df.sub(s, axis='index')

Unnamed: 0,A,B,C,D,F
2013-01-01,,,,,
2013-01-02,,,,,
2013-01-03,-0.080513,-0.367716,-0.975411,4.0,1.0
2013-01-04,-2.971984,-3.222023,-2.092134,2.0,0.0
2013-01-05,-4.662851,-5.9667,-5.129067,0.0,-1.0
2013-01-06,,,,,


In [52]:
df.apply(np.cumsum)

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,0.0,-0.829037,5,
2013-01-02,0.448998,0.204526,-1.288833,10,1.0
2013-01-03,1.368485,0.83681,-1.264244,15,3.0
2013-01-04,1.396501,0.614787,-0.356378,20,6.0
2013-01-05,1.73365,-0.351913,-0.485445,25,10.0
2013-01-06,1.713356,-1.524415,-0.219292,30,15.0


In [53]:
df.apply(lambda x: x.max() - x.min())

A    0.939780
B    1.804786
C    1.736904
D    0.000000
F    4.000000
dtype: float64

In [54]:
s = pd.Series(np.random.randint(0, 7, size=10))
s

0    5
1    1
2    1
3    5
4    4
5    6
6    5
7    5
8    2
9    6
dtype: int64

In [55]:
s.value_counts()

5    4
6    2
1    2
4    1
2    1
dtype: int64

In [56]:
s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])
s.str.lower()

0       a
1       b
2       c
3    aaba
4    baca
5     NaN
6    caba
7     dog
8     cat
dtype: object

In [57]:
df = pd.DataFrame(np.random.randn(10, 4))
df

Unnamed: 0,0,1,2,3
0,0.708102,0.000872,-1.737251,0.374913
1,0.671368,-0.730353,0.089911,-3.115603
2,0.823441,0.018183,-1.729339,-0.181451
3,1.123623,-0.666838,1.332576,-1.332617
4,2.167167,-0.886741,0.912715,-0.452777
5,0.681622,1.113386,-1.625962,-0.455837
6,0.076029,0.081195,0.90801,0.777336
7,-0.215571,-0.001353,-0.79277,0.001637
8,0.347391,-0.8624,-0.570021,1.580609
9,-0.295899,-1.333027,-0.843398,0.922782


In [58]:
pieces = [df[:3], df[3:7], df[7:]]
pieces

[          0         1         2         3
 0  0.708102  0.000872 -1.737251  0.374913
 1  0.671368 -0.730353  0.089911 -3.115603
 2  0.823441  0.018183 -1.729339 -0.181451,
           0         1         2         3
 3  1.123623 -0.666838  1.332576 -1.332617
 4  2.167167 -0.886741  0.912715 -0.452777
 5  0.681622  1.113386 -1.625962 -0.455837
 6  0.076029  0.081195  0.908010  0.777336,
           0         1         2         3
 7 -0.215571 -0.001353 -0.792770  0.001637
 8  0.347391 -0.862400 -0.570021  1.580609
 9 -0.295899 -1.333027 -0.843398  0.922782]

In [59]:
pd.concat(pieces)

Unnamed: 0,0,1,2,3
0,0.708102,0.000872,-1.737251,0.374913
1,0.671368,-0.730353,0.089911,-3.115603
2,0.823441,0.018183,-1.729339,-0.181451
3,1.123623,-0.666838,1.332576,-1.332617
4,2.167167,-0.886741,0.912715,-0.452777
5,0.681622,1.113386,-1.625962,-0.455837
6,0.076029,0.081195,0.90801,0.777336
7,-0.215571,-0.001353,-0.79277,0.001637
8,0.347391,-0.8624,-0.570021,1.580609
9,-0.295899,-1.333027,-0.843398,0.922782


In [60]:
left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]})
right = pd.DataFrame({'key': ['foo', 'foo'], 'rval': [4, 5]})

In [61]:
left

Unnamed: 0,key,lval
0,foo,1
1,foo,2


In [62]:
right

Unnamed: 0,key,rval
0,foo,4
1,foo,5


In [63]:
pd.merge(left, right, on='key')

Unnamed: 0,key,lval,rval
0,foo,1,4
1,foo,1,5
2,foo,2,4
3,foo,2,5


In [64]:
left = pd.DataFrame({'key': ['foo', 'bar'], 'lval': [1, 2]})
right = pd.DataFrame({'key': ['foo', 'bar'], 'rval': [4, 5]})
pd.merge(left, right, on='key')

Unnamed: 0,key,lval,rval
0,foo,1,4
1,bar,2,5


In [65]:
df = pd.DataFrame(np.random.randn(8, 4), columns=['A', 'B', 'C', 'D'])
df

Unnamed: 0,A,B,C,D
0,1.140636,-0.850345,-0.902052,-2.123226
1,-0.597307,-1.210971,1.159433,-0.178428
2,-0.259563,0.200929,-0.742308,0.3382
3,-1.544126,0.63737,2.438659,1.318495
4,-1.171289,-0.117978,-1.274127,-0.180696
5,0.169389,1.439261,0.93152,0.34337
6,-1.252818,1.454879,-0.25764,0.444247
7,0.512334,-1.677709,-0.656556,-0.388788


In [66]:
s = df.iloc[3]
df.append(s, ignore_index=True)

Unnamed: 0,A,B,C,D
0,1.140636,-0.850345,-0.902052,-2.123226
1,-0.597307,-1.210971,1.159433,-0.178428
2,-0.259563,0.200929,-0.742308,0.3382
3,-1.544126,0.63737,2.438659,1.318495
4,-1.171289,-0.117978,-1.274127,-0.180696
5,0.169389,1.439261,0.93152,0.34337
6,-1.252818,1.454879,-0.25764,0.444247
7,0.512334,-1.677709,-0.656556,-0.388788
8,-1.544126,0.63737,2.438659,1.318495
