# 10 minutes to pandas

https://pandas.pydata.org/pandas-docs/stable/user_guide/10min.html

In [1]:
import numpy as np
import pandas as pd

In [3]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [16]:
dates = pd.date_range('20220101', periods=6)
dates

DatetimeIndex(['2022-01-01', '2022-01-02', '2022-01-03', '2022-01-04',
               '2022-01-05', '2022-01-06'],
              dtype='datetime64[ns]', freq='D')

In [21]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2022-01-01,-0.371699,0.0975,0.346212,-1.007007
2022-01-02,0.964977,-1.469004,-2.140595,-0.950908
2022-01-03,-0.596252,-2.072392,0.10669,1.461795
2022-01-04,0.548311,0.504753,-0.865933,1.380846
2022-01-05,-0.708733,-0.255803,-1.291757,0.164082
2022-01-06,1.102424,0.391288,0.476677,0.203918


In [29]:
df2 = pd.DataFrame({'A': 1.,
              'B': pd.Timestamp('20130102'),
              'C': pd.Series(1, index=list(range(4)), dtype='float32'),
              'D': np.array([3] * 4, dtype='int32'),
              'E': pd.Categorical(["test", "train", "test", "train"]),
              'F': 'foo'
                   })
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [30]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [32]:
df2.D

0    3
1    3
2    3
3    3
Name: D, dtype: int32

In [35]:
df2.E

0     test
1    train
2     test
3    train
Name: E, dtype: category
Categories (2, object): ['test', 'train']

In [36]:
df.head()

Unnamed: 0,A,B,C,D
2022-01-01,-0.371699,0.0975,0.346212,-1.007007
2022-01-02,0.964977,-1.469004,-2.140595,-0.950908
2022-01-03,-0.596252,-2.072392,0.10669,1.461795
2022-01-04,0.548311,0.504753,-0.865933,1.380846
2022-01-05,-0.708733,-0.255803,-1.291757,0.164082


In [38]:
df.tail(3)

Unnamed: 0,A,B,C,D
2022-01-04,0.548311,0.504753,-0.865933,1.380846
2022-01-05,-0.708733,-0.255803,-1.291757,0.164082
2022-01-06,1.102424,0.391288,0.476677,0.203918


In [39]:
df.index

DatetimeIndex(['2022-01-01', '2022-01-02', '2022-01-03', '2022-01-04',
               '2022-01-05', '2022-01-06'],
              dtype='datetime64[ns]', freq='D')

In [40]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [41]:
df.to_numpy()

array([[-0.37169868,  0.09750025,  0.3462124 , -1.00700747],
       [ 0.96497659, -1.469004  , -2.14059539, -0.95090795],
       [-0.59625235, -2.07239248,  0.10668954,  1.46179503],
       [ 0.54831107,  0.50475347, -0.86593313,  1.38084572],
       [-0.70873349, -0.25580338, -1.29175681,  0.16408243],
       [ 1.10242382,  0.39128805,  0.47667663,  0.20391754]])

In [42]:
df2.to_numpy()

array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

In [43]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.156504,-0.467276,-0.561451,0.208788
std,0.811934,1.06071,1.045727,1.074134
min,-0.708733,-2.072392,-2.140595,-1.007007
25%,-0.540114,-1.165704,-1.185301,-0.67216
50%,0.088306,-0.079152,-0.379622,0.184
75%,0.86081,0.317841,0.286332,1.086614
max,1.102424,0.504753,0.476677,1.461795


In [44]:
df.T

Unnamed: 0,2022-01-01,2022-01-02,2022-01-03,2022-01-04,2022-01-05,2022-01-06
A,-0.371699,0.964977,-0.596252,0.548311,-0.708733,1.102424
B,0.0975,-1.469004,-2.072392,0.504753,-0.255803,0.391288
C,0.346212,-2.140595,0.10669,-0.865933,-1.291757,0.476677
D,-1.007007,-0.950908,1.461795,1.380846,0.164082,0.203918


In [45]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2022-01-01,-1.007007,0.346212,0.0975,-0.371699
2022-01-02,-0.950908,-2.140595,-1.469004,0.964977
2022-01-03,1.461795,0.10669,-2.072392,-0.596252
2022-01-04,1.380846,-0.865933,0.504753,0.548311
2022-01-05,0.164082,-1.291757,-0.255803,-0.708733
2022-01-06,0.203918,0.476677,0.391288,1.102424


In [46]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2022-01-03,-0.596252,-2.072392,0.10669,1.461795
2022-01-02,0.964977,-1.469004,-2.140595,-0.950908
2022-01-05,-0.708733,-0.255803,-1.291757,0.164082
2022-01-01,-0.371699,0.0975,0.346212,-1.007007
2022-01-06,1.102424,0.391288,0.476677,0.203918
2022-01-04,0.548311,0.504753,-0.865933,1.380846


In [47]:
df['A']

2022-01-01   -0.371699
2022-01-02    0.964977
2022-01-03   -0.596252
2022-01-04    0.548311
2022-01-05   -0.708733
2022-01-06    1.102424
Freq: D, Name: A, dtype: float64

In [49]:
df[0:2]

Unnamed: 0,A,B,C,D
2022-01-01,-0.371699,0.0975,0.346212,-1.007007
2022-01-02,0.964977,-1.469004,-2.140595,-0.950908


In [51]:
df['20220102':'20220104']

Unnamed: 0,A,B,C,D
2022-01-02,0.964977,-1.469004,-2.140595,-0.950908
2022-01-03,-0.596252,-2.072392,0.10669,1.461795
2022-01-04,0.548311,0.504753,-0.865933,1.380846


### Selection by label

In [52]:
df.loc[dates[0]]

A   -0.371699
B    0.097500
C    0.346212
D   -1.007007
Name: 2022-01-01 00:00:00, dtype: float64

In [53]:
df.loc[:, ['A', 'B']]

Unnamed: 0,A,B
2022-01-01,-0.371699,0.0975
2022-01-02,0.964977,-1.469004
2022-01-03,-0.596252,-2.072392
2022-01-04,0.548311,0.504753
2022-01-05,-0.708733,-0.255803
2022-01-06,1.102424,0.391288


In [54]:
df.loc['20220102':'20220104', ['A', 'B']]

Unnamed: 0,A,B
2022-01-02,0.964977,-1.469004
2022-01-03,-0.596252,-2.072392
2022-01-04,0.548311,0.504753


In [56]:
df.loc['20220102', ['A', 'B']]

A    0.964977
B   -1.469004
Name: 2022-01-02 00:00:00, dtype: float64

In [60]:
df.loc[dates[0], 'A']

-0.37169867809571827

In [61]:
df.at[dates[0], 'A']

-0.37169867809571827

### Selection by position

In [62]:
df.iloc[3]

A    0.548311
B    0.504753
C   -0.865933
D    1.380846
Name: 2022-01-04 00:00:00, dtype: float64

In [63]:
df.iloc[3:5, 0:2]

Unnamed: 0,A,B
2022-01-04,0.548311,0.504753
2022-01-05,-0.708733,-0.255803


In [64]:
df.iloc[[1, 2, 4], [0, 2]]

Unnamed: 0,A,C
2022-01-02,0.964977,-2.140595
2022-01-03,-0.596252,0.10669
2022-01-05,-0.708733,-1.291757


In [65]:
df.iloc[1:3, :]

Unnamed: 0,A,B,C,D
2022-01-02,0.964977,-1.469004,-2.140595,-0.950908
2022-01-03,-0.596252,-2.072392,0.10669,1.461795


In [66]:
df.iloc[:, 1:3]

Unnamed: 0,B,C
2022-01-01,0.0975,0.346212
2022-01-02,-1.469004,-2.140595
2022-01-03,-2.072392,0.10669
2022-01-04,0.504753,-0.865933
2022-01-05,-0.255803,-1.291757
2022-01-06,0.391288,0.476677


In [67]:
df.iloc[1, 1]

-1.4690040018508685

In [68]:
df.iat[1, 1]

-1.4690040018508685

### Boolean indexing

In [69]:
df[df['A'] > 0]

Unnamed: 0,A,B,C,D
2022-01-02,0.964977,-1.469004,-2.140595,-0.950908
2022-01-04,0.548311,0.504753,-0.865933,1.380846
2022-01-06,1.102424,0.391288,0.476677,0.203918


In [70]:
df[df > 0]

Unnamed: 0,A,B,C,D
2022-01-01,,0.0975,0.346212,
2022-01-02,0.964977,,,
2022-01-03,,,0.10669,1.461795
2022-01-04,0.548311,0.504753,,1.380846
2022-01-05,,,,0.164082
2022-01-06,1.102424,0.391288,0.476677,0.203918


In [71]:
df2 = df.copy()

In [74]:
df2['E'] = ['one', 'one', 'two', 'three', 'four', 'three']
df2

Unnamed: 0,A,B,C,D,E
2022-01-01,-0.371699,0.0975,0.346212,-1.007007,one
2022-01-02,0.964977,-1.469004,-2.140595,-0.950908,one
2022-01-03,-0.596252,-2.072392,0.10669,1.461795,two
2022-01-04,0.548311,0.504753,-0.865933,1.380846,three
2022-01-05,-0.708733,-0.255803,-1.291757,0.164082,four
2022-01-06,1.102424,0.391288,0.476677,0.203918,three


In [73]:
df2[df2['E'].isin(['two', 'four'])]

Unnamed: 0,A,B,C,D,E
2022-01-03,-0.596252,-2.072392,0.10669,1.461795,two
2022-01-05,-0.708733,-0.255803,-1.291757,0.164082,four


### Setting

In [84]:
s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('20220102', periods=6))
s1

2022-01-02    1
2022-01-03    2
2022-01-04    3
2022-01-05    4
2022-01-06    5
2022-01-07    6
Freq: D, dtype: int64

In [85]:
df['F'] = s1

In [86]:
df

Unnamed: 0,A,B,C,D,F
2022-01-01,0.0,0.0,0.346212,5,
2022-01-02,0.964977,-1.469004,-2.140595,5,1.0
2022-01-03,-0.596252,-2.072392,0.10669,5,2.0
2022-01-04,0.548311,0.504753,-0.865933,5,3.0
2022-01-05,-0.708733,-0.255803,-1.291757,5,4.0
2022-01-06,1.102424,0.391288,0.476677,5,5.0


In [87]:
df.at[dates[0], 'A'] = 0

In [88]:
df.iat[0, 1] = 0

In [89]:
df.loc[:, 'D'] = np.array([5] * len(df))

In [90]:
df

Unnamed: 0,A,B,C,D,F
2022-01-01,0.0,0.0,0.346212,5,
2022-01-02,0.964977,-1.469004,-2.140595,5,1.0
2022-01-03,-0.596252,-2.072392,0.10669,5,2.0
2022-01-04,0.548311,0.504753,-0.865933,5,3.0
2022-01-05,-0.708733,-0.255803,-1.291757,5,4.0
2022-01-06,1.102424,0.391288,0.476677,5,5.0


In [91]:
df2 = df.copy()

In [92]:
df2[df2 > 0] = -df2