# Pandas Python:

In [1]:
import numpy as np
import pandas as pd

* 1.Series:-Creating a Series by passing a list of values, letting pandas create a default integer index:

In [2]:
s=pd.Series([1,2,3,4])

In [3]:
s

0    1
1    2
2    3
3    4
dtype: int64

* 2.DataFrame:-Creating a DataFrame by passing a numpy array, with a datetime index and labeled columns:

In [4]:
dates=pd.date_range('20210217',periods=6)

In [5]:
dates

DatetimeIndex(['2021-02-17', '2021-02-18', '2021-02-19', '2021-02-20',
               '2021-02-21', '2021-02-22'],
              dtype='datetime64[ns]', freq='D')

In [6]:
df=pd.DataFrame(np.random.randn(6,4),index=dates,columns=list('ABCD'))

In [7]:
df

Unnamed: 0,A,B,C,D
2021-02-17,0.863749,-1.232035,-0.082802,0.11071
2021-02-18,0.351729,1.37405,0.429826,-0.823687
2021-02-19,0.253523,-0.030887,0.611111,0.786406
2021-02-20,0.862217,-0.543569,0.174998,-0.106426
2021-02-21,1.36321,-1.968236,-0.862669,1.241813
2021-02-22,-0.264783,0.782279,-0.227397,-0.359445


In [8]:
df.dtypes

A    float64
B    float64
C    float64
D    float64
dtype: object

* Viewing Data:

In [9]:
df.head() #See the top & bottom rows of the frame

Unnamed: 0,A,B,C,D
2021-02-17,0.863749,-1.232035,-0.082802,0.11071
2021-02-18,0.351729,1.37405,0.429826,-0.823687
2021-02-19,0.253523,-0.030887,0.611111,0.786406
2021-02-20,0.862217,-0.543569,0.174998,-0.106426
2021-02-21,1.36321,-1.968236,-0.862669,1.241813


In [10]:
df.tail()

Unnamed: 0,A,B,C,D
2021-02-18,0.351729,1.37405,0.429826,-0.823687
2021-02-19,0.253523,-0.030887,0.611111,0.786406
2021-02-20,0.862217,-0.543569,0.174998,-0.106426
2021-02-21,1.36321,-1.968236,-0.862669,1.241813
2021-02-22,-0.264783,0.782279,-0.227397,-0.359445


In [11]:
df.index

DatetimeIndex(['2021-02-17', '2021-02-18', '2021-02-19', '2021-02-20',
               '2021-02-21', '2021-02-22'],
              dtype='datetime64[ns]', freq='D')

In [12]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [13]:
df.values

array([[ 0.86374917, -1.23203501, -0.08280196,  0.11070992],
       [ 0.35172911,  1.37404976,  0.42982569, -0.82368712],
       [ 0.25352269, -0.03088654,  0.61111106,  0.78640587],
       [ 0.86221736, -0.54356902,  0.17499813, -0.10642625],
       [ 1.36321036, -1.96823633, -0.86266874,  1.24181317],
       [-0.26478268,  0.78227872, -0.22739694, -0.35944452]])

In [14]:
df.describe() # Describe shows a quick statistic summary of your data

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.571608,-0.269733,0.007178,0.141562
std,0.573674,1.245128,0.527501,0.757831
min,-0.264783,-1.968236,-0.862669,-0.823687
25%,0.278074,-1.059919,-0.191248,-0.29619
50%,0.606973,-0.287228,0.046098,0.002142
75%,0.863366,0.578987,0.366119,0.617482
max,1.36321,1.37405,0.611111,1.241813


In [15]:
df.T #Transposing your data

Unnamed: 0,2021-02-17,2021-02-18,2021-02-19,2021-02-20,2021-02-21,2021-02-22
A,0.863749,0.351729,0.253523,0.862217,1.36321,-0.264783
B,-1.232035,1.37405,-0.030887,-0.543569,-1.968236,0.782279
C,-0.082802,0.429826,0.611111,0.174998,-0.862669,-0.227397
D,0.11071,-0.823687,0.786406,-0.106426,1.241813,-0.359445


In [16]:
df.sort_index(axis=0,ascending=True)

Unnamed: 0,A,B,C,D
2021-02-17,0.863749,-1.232035,-0.082802,0.11071
2021-02-18,0.351729,1.37405,0.429826,-0.823687
2021-02-19,0.253523,-0.030887,0.611111,0.786406
2021-02-20,0.862217,-0.543569,0.174998,-0.106426
2021-02-21,1.36321,-1.968236,-0.862669,1.241813
2021-02-22,-0.264783,0.782279,-0.227397,-0.359445


In [17]:
df.sort_index(axis=0,ascending=False) # rows.

Unnamed: 0,A,B,C,D
2021-02-22,-0.264783,0.782279,-0.227397,-0.359445
2021-02-21,1.36321,-1.968236,-0.862669,1.241813
2021-02-20,0.862217,-0.543569,0.174998,-0.106426
2021-02-19,0.253523,-0.030887,0.611111,0.786406
2021-02-18,0.351729,1.37405,0.429826,-0.823687
2021-02-17,0.863749,-1.232035,-0.082802,0.11071


In [18]:
df.sort_index(axis=1,ascending=False) # columns wise

Unnamed: 0,D,C,B,A
2021-02-17,0.11071,-0.082802,-1.232035,0.863749
2021-02-18,-0.823687,0.429826,1.37405,0.351729
2021-02-19,0.786406,0.611111,-0.030887,0.253523
2021-02-20,-0.106426,0.174998,-0.543569,0.862217
2021-02-21,1.241813,-0.862669,-1.968236,1.36321
2021-02-22,-0.359445,-0.227397,0.782279,-0.264783


In [19]:
df.sort_values(by='A')

Unnamed: 0,A,B,C,D
2021-02-22,-0.264783,0.782279,-0.227397,-0.359445
2021-02-19,0.253523,-0.030887,0.611111,0.786406
2021-02-18,0.351729,1.37405,0.429826,-0.823687
2021-02-20,0.862217,-0.543569,0.174998,-0.106426
2021-02-17,0.863749,-1.232035,-0.082802,0.11071
2021-02-21,1.36321,-1.968236,-0.862669,1.241813


In [20]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2021-02-21,1.36321,-1.968236,-0.862669,1.241813
2021-02-17,0.863749,-1.232035,-0.082802,0.11071
2021-02-20,0.862217,-0.543569,0.174998,-0.106426
2021-02-19,0.253523,-0.030887,0.611111,0.786406
2021-02-22,-0.264783,0.782279,-0.227397,-0.359445
2021-02-18,0.351729,1.37405,0.429826,-0.823687


In [21]:
df['A']

2021-02-17    0.863749
2021-02-18    0.351729
2021-02-19    0.253523
2021-02-20    0.862217
2021-02-21    1.363210
2021-02-22   -0.264783
Freq: D, Name: A, dtype: float64

In [22]:
type(df['A'])

pandas.core.series.Series

In [23]:
df[0:3]

Unnamed: 0,A,B,C,D
2021-02-17,0.863749,-1.232035,-0.082802,0.11071
2021-02-18,0.351729,1.37405,0.429826,-0.823687
2021-02-19,0.253523,-0.030887,0.611111,0.786406


In [24]:
df[:5]

Unnamed: 0,A,B,C,D
2021-02-17,0.863749,-1.232035,-0.082802,0.11071
2021-02-18,0.351729,1.37405,0.429826,-0.823687
2021-02-19,0.253523,-0.030887,0.611111,0.786406
2021-02-20,0.862217,-0.543569,0.174998,-0.106426
2021-02-21,1.36321,-1.968236,-0.862669,1.241813


In [25]:
df[::2]

Unnamed: 0,A,B,C,D
2021-02-17,0.863749,-1.232035,-0.082802,0.11071
2021-02-19,0.253523,-0.030887,0.611111,0.786406
2021-02-21,1.36321,-1.968236,-0.862669,1.241813


In [26]:
df[::-1]

Unnamed: 0,A,B,C,D
2021-02-22,-0.264783,0.782279,-0.227397,-0.359445
2021-02-21,1.36321,-1.968236,-0.862669,1.241813
2021-02-20,0.862217,-0.543569,0.174998,-0.106426
2021-02-19,0.253523,-0.030887,0.611111,0.786406
2021-02-18,0.351729,1.37405,0.429826,-0.823687
2021-02-17,0.863749,-1.232035,-0.082802,0.11071


In [27]:
df.loc[dates[0]]

A    0.863749
B   -1.232035
C   -0.082802
D    0.110710
Name: 2021-02-17 00:00:00, dtype: float64

In [28]:
df.loc[:,['A','B']] # Selecting on a multi-axis by label.

Unnamed: 0,A,B
2021-02-17,0.863749,-1.232035
2021-02-18,0.351729,1.37405
2021-02-19,0.253523,-0.030887
2021-02-20,0.862217,-0.543569
2021-02-21,1.36321,-1.968236
2021-02-22,-0.264783,0.782279


In [29]:
df.loc['20210218':'20210222',['A','B']]

Unnamed: 0,A,B
2021-02-18,0.351729,1.37405
2021-02-19,0.253523,-0.030887
2021-02-20,0.862217,-0.543569
2021-02-21,1.36321,-1.968236
2021-02-22,-0.264783,0.782279


In [30]:
df.loc[dates[0],'A']

0.8637491658417635

In [31]:
df.at[dates[0],'A']

0.8637491658417635

In [32]:
df.at[dates[0],'A']=12
df

Unnamed: 0,A,B,C,D
2021-02-17,12.0,-1.232035,-0.082802,0.11071
2021-02-18,0.351729,1.37405,0.429826,-0.823687
2021-02-19,0.253523,-0.030887,0.611111,0.786406
2021-02-20,0.862217,-0.543569,0.174998,-0.106426
2021-02-21,1.36321,-1.968236,-0.862669,1.241813
2021-02-22,-0.264783,0.782279,-0.227397,-0.359445


In [33]:
df.iloc[2]

A    0.253523
B   -0.030887
C    0.611111
D    0.786406
Name: 2021-02-19 00:00:00, dtype: float64

In [34]:
df.iloc[[2,4]]

Unnamed: 0,A,B,C,D
2021-02-19,0.253523,-0.030887,0.611111,0.786406
2021-02-21,1.36321,-1.968236,-0.862669,1.241813


In [35]:
df.iloc[1:4,2:4]

Unnamed: 0,C,D
2021-02-18,0.429826,-0.823687
2021-02-19,0.611111,0.786406
2021-02-20,0.174998,-0.106426


In [36]:
df.iloc[::-1,1:4:1]

Unnamed: 0,B,C,D
2021-02-22,0.782279,-0.227397,-0.359445
2021-02-21,-1.968236,-0.862669,1.241813
2021-02-20,-0.543569,0.174998,-0.106426
2021-02-19,-0.030887,0.611111,0.786406
2021-02-18,1.37405,0.429826,-0.823687
2021-02-17,-1.232035,-0.082802,0.11071


In [37]:
df.iloc[1:3,:]

Unnamed: 0,A,B,C,D
2021-02-18,0.351729,1.37405,0.429826,-0.823687
2021-02-19,0.253523,-0.030887,0.611111,0.786406


In [38]:
df.iat[1,2]

0.42982569146288274

In [39]:
df1=pd.DataFrame([[1,2,3],[4,5,6],[7,8,9]],columns=['A','B','C'])

In [40]:
df1

Unnamed: 0,A,B,C
0,1,2,3
1,4,5,6
2,7,8,9


In [41]:
df1.iat[1,2]

6

In [42]:
df1.iat[1,1]

5

In [43]:
df[df.A>1]

Unnamed: 0,A,B,C,D
2021-02-17,12.0,-1.232035,-0.082802,0.11071
2021-02-21,1.36321,-1.968236,-0.862669,1.241813


In [44]:
df[df>0]

Unnamed: 0,A,B,C,D
2021-02-17,12.0,,,0.11071
2021-02-18,0.351729,1.37405,0.429826,
2021-02-19,0.253523,,0.611111,0.786406
2021-02-20,0.862217,,0.174998,
2021-02-21,1.36321,,,1.241813
2021-02-22,,0.782279,,


In [45]:
df

Unnamed: 0,A,B,C,D
2021-02-17,12.0,-1.232035,-0.082802,0.11071
2021-02-18,0.351729,1.37405,0.429826,-0.823687
2021-02-19,0.253523,-0.030887,0.611111,0.786406
2021-02-20,0.862217,-0.543569,0.174998,-0.106426
2021-02-21,1.36321,-1.968236,-0.862669,1.241813
2021-02-22,-0.264783,0.782279,-0.227397,-0.359445


In [46]:
df['E']=['one','one','two','three','four','three']

In [47]:
df

Unnamed: 0,A,B,C,D,E
2021-02-17,12.0,-1.232035,-0.082802,0.11071,one
2021-02-18,0.351729,1.37405,0.429826,-0.823687,one
2021-02-19,0.253523,-0.030887,0.611111,0.786406,two
2021-02-20,0.862217,-0.543569,0.174998,-0.106426,three
2021-02-21,1.36321,-1.968236,-0.862669,1.241813,four
2021-02-22,-0.264783,0.782279,-0.227397,-0.359445,three


In [48]:
df[df['E'].isin(['two','four'])]

Unnamed: 0,A,B,C,D,E
2021-02-19,0.253523,-0.030887,0.611111,0.786406,two
2021-02-21,1.36321,-1.968236,-0.862669,1.241813,four


In [49]:
s1=pd.Series([1,2,3,4,5,6],index=pd.date_range('20210217',periods=6))

In [50]:
s1

2021-02-17    1
2021-02-18    2
2021-02-19    3
2021-02-20    4
2021-02-21    5
2021-02-22    6
Freq: D, dtype: int64

In [51]:
df['F']=s1

In [52]:
df

Unnamed: 0,A,B,C,D,E,F
2021-02-17,12.0,-1.232035,-0.082802,0.11071,one,1
2021-02-18,0.351729,1.37405,0.429826,-0.823687,one,2
2021-02-19,0.253523,-0.030887,0.611111,0.786406,two,3
2021-02-20,0.862217,-0.543569,0.174998,-0.106426,three,4
2021-02-21,1.36321,-1.968236,-0.862669,1.241813,four,5
2021-02-22,-0.264783,0.782279,-0.227397,-0.359445,three,6


In [53]:
df.at[dates[0],'A']=0

In [54]:
df

Unnamed: 0,A,B,C,D,E,F
2021-02-17,0.0,-1.232035,-0.082802,0.11071,one,1
2021-02-18,0.351729,1.37405,0.429826,-0.823687,one,2
2021-02-19,0.253523,-0.030887,0.611111,0.786406,two,3
2021-02-20,0.862217,-0.543569,0.174998,-0.106426,three,4
2021-02-21,1.36321,-1.968236,-0.862669,1.241813,four,5
2021-02-22,-0.264783,0.782279,-0.227397,-0.359445,three,6


In [55]:
df.iat[0,1]=0

In [56]:
df

Unnamed: 0,A,B,C,D,E,F
2021-02-17,0.0,0.0,-0.082802,0.11071,one,1
2021-02-18,0.351729,1.37405,0.429826,-0.823687,one,2
2021-02-19,0.253523,-0.030887,0.611111,0.786406,two,3
2021-02-20,0.862217,-0.543569,0.174998,-0.106426,three,4
2021-02-21,1.36321,-1.968236,-0.862669,1.241813,four,5
2021-02-22,-0.264783,0.782279,-0.227397,-0.359445,three,6


In [57]:
df.loc[:,'D']=np.array([5]*len(df))

In [58]:
df


Unnamed: 0,A,B,C,D,E,F
2021-02-17,0.0,0.0,-0.082802,5,one,1
2021-02-18,0.351729,1.37405,0.429826,5,one,2
2021-02-19,0.253523,-0.030887,0.611111,5,two,3
2021-02-20,0.862217,-0.543569,0.174998,5,three,4
2021-02-21,1.36321,-1.968236,-0.862669,5,four,5
2021-02-22,-0.264783,0.782279,-0.227397,5,three,6


In [59]:
df

Unnamed: 0,A,B,C,D,E,F
2021-02-17,0.0,0.0,-0.082802,5,one,1
2021-02-18,0.351729,1.37405,0.429826,5,one,2
2021-02-19,0.253523,-0.030887,0.611111,5,two,3
2021-02-20,0.862217,-0.543569,0.174998,5,three,4
2021-02-21,1.36321,-1.968236,-0.862669,5,four,5
2021-02-22,-0.264783,0.782279,-0.227397,5,three,6


In [60]:
df1=df.reindex(index=dates[0:4],columns=list(df.columns)+['G'])

In [61]:
df1.loc[dates[0]:dates[1],'G']=1

In [62]:
df1

Unnamed: 0,A,B,C,D,E,F,G
2021-02-17,0.0,0.0,-0.082802,5,one,1,1.0
2021-02-18,0.351729,1.37405,0.429826,5,one,2,1.0
2021-02-19,0.253523,-0.030887,0.611111,5,two,3,
2021-02-20,0.862217,-0.543569,0.174998,5,three,4,


In [63]:
df1.dropna(how='any')

Unnamed: 0,A,B,C,D,E,F,G
2021-02-17,0.0,0.0,-0.082802,5,one,1,1.0
2021-02-18,0.351729,1.37405,0.429826,5,one,2,1.0


In [64]:
df1.fillna(value=5)

Unnamed: 0,A,B,C,D,E,F,G
2021-02-17,0.0,0.0,-0.082802,5,one,1,1.0
2021-02-18,0.351729,1.37405,0.429826,5,one,2,1.0
2021-02-19,0.253523,-0.030887,0.611111,5,two,3,5.0
2021-02-20,0.862217,-0.543569,0.174998,5,three,4,5.0


In [65]:
df.apply(np.cumsum)

Unnamed: 0,A,B,C,D,E,F
2021-02-17,0.0,0.0,-0.082802,5,one,1
2021-02-18,0.351729,1.37405,0.347024,10,oneone,3
2021-02-19,0.605252,1.343163,0.958135,15,oneonetwo,6
2021-02-20,1.467469,0.799594,1.133133,20,oneonetwothree,10
2021-02-21,2.83068,-1.168642,0.270464,25,oneonetwothreefour,15
2021-02-22,2.565897,-0.386363,0.043067,30,oneonetwothreefourthree,21


In [66]:
df[df['E'].apply(lambda x:x.startswith('one'))]

Unnamed: 0,A,B,C,D,E,F
2021-02-17,0.0,0.0,-0.082802,5,one,1
2021-02-18,0.351729,1.37405,0.429826,5,one,2


In [67]:
df['F'].apply(lambda x:np.max(x))

2021-02-17    1
2021-02-18    2
2021-02-19    3
2021-02-20    4
2021-02-21    5
2021-02-22    6
Freq: D, Name: F, dtype: int64

In [68]:
df['F'].max()

6

In [69]:
df['G']=['21k','987hj','098k','764lk','7658hgk','908lkim']

In [70]:
df

Unnamed: 0,A,B,C,D,E,F,G
2021-02-17,0.0,0.0,-0.082802,5,one,1,21k
2021-02-18,0.351729,1.37405,0.429826,5,one,2,987hj
2021-02-19,0.253523,-0.030887,0.611111,5,two,3,098k
2021-02-20,0.862217,-0.543569,0.174998,5,three,4,764lk
2021-02-21,1.36321,-1.968236,-0.862669,5,four,5,7658hgk
2021-02-22,-0.264783,0.782279,-0.227397,5,three,6,908lkim


In [71]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 6 entries, 2021-02-17 to 2021-02-22
Freq: D
Data columns (total 7 columns):
A    6 non-null float64
B    6 non-null float64
C    6 non-null float64
D    6 non-null int32
E    6 non-null object
F    6 non-null int64
G    6 non-null object
dtypes: float64(3), int32(1), int64(1), object(2)
memory usage: 520.0+ bytes
