# Pandas

In [1]:
import pandas as pd
import numpy as np

In [2]:
s = pd.Series([1,3,5,np.nan,7,8,9])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    7.0
5    8.0
6    9.0
dtype: float64

In [3]:
dates = pd.date_range("20130101",periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [4]:
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2013-01-01,0.396092,-0.791889,1.603159,-0.528221
2013-01-02,-1.348258,0.08688,0.475938,-1.724272
2013-01-03,0.002107,0.239484,-0.356182,-1.533319
2013-01-04,0.626191,1.362856,0.430231,-0.061595
2013-01-05,-0.01977,-0.811805,-0.002757,0.199498
2013-01-06,-1.521201,-1.167378,0.276715,-1.555739


In [5]:
df.dtypes

A    float64
B    float64
C    float64
D    float64
dtype: object

In [6]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,0.396092,-0.791889,1.603159,-0.528221
2013-01-02,-1.348258,0.08688,0.475938,-1.724272
2013-01-03,0.002107,0.239484,-0.356182,-1.533319
2013-01-04,0.626191,1.362856,0.430231,-0.061595
2013-01-05,-0.01977,-0.811805,-0.002757,0.199498


In [7]:
df.tail()

Unnamed: 0,A,B,C,D
2013-01-02,-1.348258,0.08688,0.475938,-1.724272
2013-01-03,0.002107,0.239484,-0.356182,-1.533319
2013-01-04,0.626191,1.362856,0.430231,-0.061595
2013-01-05,-0.01977,-0.811805,-0.002757,0.199498
2013-01-06,-1.521201,-1.167378,0.276715,-1.555739


In [8]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [9]:
df.to_numpy()

array([[ 0.3960925 , -0.79188915,  1.60315884, -0.52822135],
       [-1.34825845,  0.08687973,  0.4759378 , -1.72427169],
       [ 0.00210654,  0.23948393, -0.35618182, -1.53331941],
       [ 0.62619107,  1.36285566,  0.43023099, -0.06159507],
       [-0.01976961, -0.81180456, -0.00275721,  0.19949831],
       [-1.5212008 , -1.16737816,  0.27671509, -1.55573895]])

In [10]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.310806,-0.180309,0.404517,-0.867275
std,0.905721,0.935531,0.663833,0.843104
min,-1.521201,-1.167378,-0.356182,-1.724272
25%,-1.016136,-0.806826,0.067111,-1.550134
50%,-0.008832,-0.352505,0.353473,-1.03077
75%,0.297596,0.201333,0.464511,-0.178252
max,0.626191,1.362856,1.603159,0.199498


In [11]:
#to transpose
df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,0.396092,-1.348258,0.002107,0.626191,-0.01977,-1.521201
B,-0.791889,0.08688,0.239484,1.362856,-0.811805,-1.167378
C,1.603159,0.475938,-0.356182,0.430231,-0.002757,0.276715
D,-0.528221,-1.724272,-1.533319,-0.061595,0.199498,-1.555739


In [12]:
#to sort
df.sort_index(axis=1, ascending=True)

Unnamed: 0,A,B,C,D
2013-01-01,0.396092,-0.791889,1.603159,-0.528221
2013-01-02,-1.348258,0.08688,0.475938,-1.724272
2013-01-03,0.002107,0.239484,-0.356182,-1.533319
2013-01-04,0.626191,1.362856,0.430231,-0.061595
2013-01-05,-0.01977,-0.811805,-0.002757,0.199498
2013-01-06,-1.521201,-1.167378,0.276715,-1.555739


In [13]:
#sort based on values 
df.sort_values(by="B", ascending=True)

Unnamed: 0,A,B,C,D
2013-01-06,-1.521201,-1.167378,0.276715,-1.555739
2013-01-05,-0.01977,-0.811805,-0.002757,0.199498
2013-01-01,0.396092,-0.791889,1.603159,-0.528221
2013-01-02,-1.348258,0.08688,0.475938,-1.724272
2013-01-03,0.002107,0.239484,-0.356182,-1.533319
2013-01-04,0.626191,1.362856,0.430231,-0.061595


In [14]:
df["A"],["B"]

(2013-01-01    0.396092
 2013-01-02   -1.348258
 2013-01-03    0.002107
 2013-01-04    0.626191
 2013-01-05   -0.019770
 2013-01-06   -1.521201
 Freq: D, Name: A, dtype: float64,
 ['B'])

In [15]:
#rows wise selection
df[0:10]

Unnamed: 0,A,B,C,D
2013-01-01,0.396092,-0.791889,1.603159,-0.528221
2013-01-02,-1.348258,0.08688,0.475938,-1.724272
2013-01-03,0.002107,0.239484,-0.356182,-1.533319
2013-01-04,0.626191,1.362856,0.430231,-0.061595
2013-01-05,-0.01977,-0.811805,-0.002757,0.199498
2013-01-06,-1.521201,-1.167378,0.276715,-1.555739


In [16]:
df.loc[dates[0]]

A    0.396092
B   -0.791889
C    1.603159
D   -0.528221
Name: 2013-01-01 00:00:00, dtype: float64

In [17]:
df.loc[:,["A", "B"]]

Unnamed: 0,A,B
2013-01-01,0.396092,-0.791889
2013-01-02,-1.348258,0.08688
2013-01-03,0.002107,0.239484
2013-01-04,0.626191,1.362856
2013-01-05,-0.01977,-0.811805
2013-01-06,-1.521201,-1.167378


In [18]:
df.loc["20220102":"20220104",["A", "B"]]

Unnamed: 0,A,B


In [19]:
#to get value of specfic date
df.at[dates[0],"A"] 

0.39609249957400966

In [20]:
df.at[dates[5],"A"] 

-1.5212007984566667

In [21]:
#to get value of specfic position
df.iloc[3:10]

Unnamed: 0,A,B,C,D
2013-01-04,0.626191,1.362856,0.430231,-0.061595
2013-01-05,-0.01977,-0.811805,-0.002757,0.199498
2013-01-06,-1.521201,-1.167378,0.276715,-1.555739


In [22]:
df[df["A"]>0]

Unnamed: 0,A,B,C,D
2013-01-01,0.396092,-0.791889,1.603159,-0.528221
2013-01-03,0.002107,0.239484,-0.356182,-1.533319
2013-01-04,0.626191,1.362856,0.430231,-0.061595
