## *Pandas Library*

In [1]:
# Importing Pandas Library
import pandas as pd

In [2]:
import numpy as np

In [3]:
# Object Creation
s = pd.Series([2,5,np.nan,7,8,9])     # Series can be defined as a column
s

0    2.0
1    5.0
2    NaN
3    7.0
4    8.0
5    9.0
dtype: float64

In [4]:
# Date

date = pd.date_range("20220315", periods=10)
date

DatetimeIndex(['2022-03-15', '2022-03-16', '2022-03-17', '2022-03-18',
               '2022-03-19', '2022-03-20', '2022-03-21', '2022-03-22',
               '2022-03-23', '2022-03-24'],
              dtype='datetime64[ns]', freq='D')

In [5]:
# Dataframe

df = pd.DataFrame(np.random.randn(10, 4), index=date, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2022-03-15,1.270366,-0.097955,-0.623387,-0.681879
2022-03-16,0.000933,0.491226,0.240821,-1.177824
2022-03-17,-1.003152,0.592851,-0.396004,0.010227
2022-03-18,-1.141755,0.258842,0.553137,0.689823
2022-03-19,-0.06491,-0.299149,0.045281,1.804861
2022-03-20,1.292816,-0.197898,0.001737,1.422182
2022-03-21,2.496157,2.656138,0.56088,-1.806769
2022-03-22,1.178901,-0.951549,1.479429,1.104732
2022-03-23,1.395103,-0.161234,2.23579,-0.159376
2022-03-24,-0.024158,-1.068066,-0.373056,0.224781


In [6]:
df2 = pd.DataFrame(np.random.randn(5,5), index=[1,2,3,4,5], columns=["A","B","C","D","E"])
df2

Unnamed: 0,A,B,C,D,E
1,-1.595476,0.559183,0.705907,-0.559489,-0.78656
2,0.699858,-0.111478,0.218113,-1.878146,-0.055067
3,0.561568,-1.4798,-1.535612,0.734982,-1.322888
4,0.843436,-1.470134,-1.123628,-0.962076,0.591516
5,-1.322329,0.865718,-0.432423,1.002823,1.008089


In [7]:
df3 = pd.DataFrame(
    {
        "A" : 1,
        "B" : pd.Timestamp("20220315"),
        "C" : pd.Series(1, index=list(range(4))),
        "D" : np.array([3]*4, dtype="float32"),
        "E" : pd.Categorical(["test","train"]*2),
        "F" : "foo"
    })

df3

Unnamed: 0,A,B,C,D,E,F
0,1,2022-03-15,1,3.0,test,foo
1,1,2022-03-15,1,3.0,train,foo
2,1,2022-03-15,1,3.0,test,foo
3,1,2022-03-15,1,3.0,train,foo


In [8]:
# Checking Types
df3.dtypes

A             int64
B    datetime64[ns]
C             int64
D           float32
E          category
F            object
dtype: object

In [9]:
df.head(3)

Unnamed: 0,A,B,C,D
2022-03-15,1.270366,-0.097955,-0.623387,-0.681879
2022-03-16,0.000933,0.491226,0.240821,-1.177824
2022-03-17,-1.003152,0.592851,-0.396004,0.010227


In [10]:
df.tail(2)

Unnamed: 0,A,B,C,D
2022-03-23,1.395103,-0.161234,2.23579,-0.159376
2022-03-24,-0.024158,-1.068066,-0.373056,0.224781


In [11]:
# to.numpy() function
df.to_numpy()

array([[ 1.27036634e+00, -9.79546265e-02, -6.23386664e-01,
        -6.81878527e-01],
       [ 9.32715757e-04,  4.91226243e-01,  2.40821075e-01,
        -1.17782416e+00],
       [-1.00315186e+00,  5.92850911e-01, -3.96003560e-01,
         1.02267185e-02],
       [-1.14175497e+00,  2.58841757e-01,  5.53137242e-01,
         6.89823468e-01],
       [-6.49102463e-02, -2.99149270e-01,  4.52813010e-02,
         1.80486104e+00],
       [ 1.29281646e+00, -1.97897739e-01,  1.73689181e-03,
         1.42218215e+00],
       [ 2.49615741e+00,  2.65613790e+00,  5.60879858e-01,
        -1.80676933e+00],
       [ 1.17890090e+00, -9.51549482e-01,  1.47942918e+00,
         1.10473231e+00],
       [ 1.39510332e+00, -1.61233973e-01,  2.23578994e+00,
        -1.59376089e-01],
       [-2.41580501e-02, -1.06806618e+00, -3.73056176e-01,
         2.24781452e-01]])

In [12]:
df2.to_numpy()

array([[-1.59547628,  0.55918288,  0.70590682, -0.55948863, -0.78656042],
       [ 0.69985839, -0.11147842,  0.21811296, -1.87814579, -0.05506702],
       [ 0.56156761, -1.47979971, -1.53561235,  0.73498174, -1.32288768],
       [ 0.84343626, -1.47013416, -1.1236277 , -0.96207612,  0.59151647],
       [-1.32232883,  0.86571774, -0.43242258,  1.00282326,  1.00808942]])

In [13]:
# Describe function

df.describe()

Unnamed: 0,A,B,C,D
count,10.0,10.0,10.0,10.0
mean,0.54003,0.122321,0.372463,0.143076
std,1.166691,1.042203,0.892274,1.152986
min,-1.141755,-1.068066,-0.623387,-1.806769
25%,-0.054722,-0.273836,-0.279358,-0.551253
50%,0.589917,-0.129594,0.143051,0.117504
75%,1.287204,0.43313,0.558944,1.001005
max,2.496157,2.656138,2.23579,1.804861


In [14]:
# Transpose function

df.transpose()

Unnamed: 0,2022-03-15,2022-03-16,2022-03-17,2022-03-18,2022-03-19,2022-03-20,2022-03-21,2022-03-22,2022-03-23,2022-03-24
A,1.270366,0.000933,-1.003152,-1.141755,-0.06491,1.292816,2.496157,1.178901,1.395103,-0.024158
B,-0.097955,0.491226,0.592851,0.258842,-0.299149,-0.197898,2.656138,-0.951549,-0.161234,-1.068066
C,-0.623387,0.240821,-0.396004,0.553137,0.045281,0.001737,0.56088,1.479429,2.23579,-0.373056
D,-0.681879,-1.177824,0.010227,0.689823,1.804861,1.422182,-1.806769,1.104732,-0.159376,0.224781


In [15]:
df

Unnamed: 0,A,B,C,D
2022-03-15,1.270366,-0.097955,-0.623387,-0.681879
2022-03-16,0.000933,0.491226,0.240821,-1.177824
2022-03-17,-1.003152,0.592851,-0.396004,0.010227
2022-03-18,-1.141755,0.258842,0.553137,0.689823
2022-03-19,-0.06491,-0.299149,0.045281,1.804861
2022-03-20,1.292816,-0.197898,0.001737,1.422182
2022-03-21,2.496157,2.656138,0.56088,-1.806769
2022-03-22,1.178901,-0.951549,1.479429,1.104732
2022-03-23,1.395103,-0.161234,2.23579,-0.159376
2022-03-24,-0.024158,-1.068066,-0.373056,0.224781


In [16]:
df.sort_index(axis=1,ascending=True)

Unnamed: 0,A,B,C,D
2022-03-15,1.270366,-0.097955,-0.623387,-0.681879
2022-03-16,0.000933,0.491226,0.240821,-1.177824
2022-03-17,-1.003152,0.592851,-0.396004,0.010227
2022-03-18,-1.141755,0.258842,0.553137,0.689823
2022-03-19,-0.06491,-0.299149,0.045281,1.804861
2022-03-20,1.292816,-0.197898,0.001737,1.422182
2022-03-21,2.496157,2.656138,0.56088,-1.806769
2022-03-22,1.178901,-0.951549,1.479429,1.104732
2022-03-23,1.395103,-0.161234,2.23579,-0.159376
2022-03-24,-0.024158,-1.068066,-0.373056,0.224781


In [17]:
df

Unnamed: 0,A,B,C,D
2022-03-15,1.270366,-0.097955,-0.623387,-0.681879
2022-03-16,0.000933,0.491226,0.240821,-1.177824
2022-03-17,-1.003152,0.592851,-0.396004,0.010227
2022-03-18,-1.141755,0.258842,0.553137,0.689823
2022-03-19,-0.06491,-0.299149,0.045281,1.804861
2022-03-20,1.292816,-0.197898,0.001737,1.422182
2022-03-21,2.496157,2.656138,0.56088,-1.806769
2022-03-22,1.178901,-0.951549,1.479429,1.104732
2022-03-23,1.395103,-0.161234,2.23579,-0.159376
2022-03-24,-0.024158,-1.068066,-0.373056,0.224781


In [18]:
df.sort_index(axis=0, ascending=False)

Unnamed: 0,A,B,C,D
2022-03-24,-0.024158,-1.068066,-0.373056,0.224781
2022-03-23,1.395103,-0.161234,2.23579,-0.159376
2022-03-22,1.178901,-0.951549,1.479429,1.104732
2022-03-21,2.496157,2.656138,0.56088,-1.806769
2022-03-20,1.292816,-0.197898,0.001737,1.422182
2022-03-19,-0.06491,-0.299149,0.045281,1.804861
2022-03-18,-1.141755,0.258842,0.553137,0.689823
2022-03-17,-1.003152,0.592851,-0.396004,0.010227
2022-03-16,0.000933,0.491226,0.240821,-1.177824
2022-03-15,1.270366,-0.097955,-0.623387,-0.681879


In [19]:
df.sort_index(axis=1, ascending=True)

Unnamed: 0,A,B,C,D
2022-03-15,1.270366,-0.097955,-0.623387,-0.681879
2022-03-16,0.000933,0.491226,0.240821,-1.177824
2022-03-17,-1.003152,0.592851,-0.396004,0.010227
2022-03-18,-1.141755,0.258842,0.553137,0.689823
2022-03-19,-0.06491,-0.299149,0.045281,1.804861
2022-03-20,1.292816,-0.197898,0.001737,1.422182
2022-03-21,2.496157,2.656138,0.56088,-1.806769
2022-03-22,1.178901,-0.951549,1.479429,1.104732
2022-03-23,1.395103,-0.161234,2.23579,-0.159376
2022-03-24,-0.024158,-1.068066,-0.373056,0.224781


In [20]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2022-03-15,-0.681879,-0.623387,-0.097955,1.270366
2022-03-16,-1.177824,0.240821,0.491226,0.000933
2022-03-17,0.010227,-0.396004,0.592851,-1.003152
2022-03-18,0.689823,0.553137,0.258842,-1.141755
2022-03-19,1.804861,0.045281,-0.299149,-0.06491
2022-03-20,1.422182,0.001737,-0.197898,1.292816
2022-03-21,-1.806769,0.56088,2.656138,2.496157
2022-03-22,1.104732,1.479429,-0.951549,1.178901
2022-03-23,-0.159376,2.23579,-0.161234,1.395103
2022-03-24,0.224781,-0.373056,-1.068066,-0.024158


In [21]:
df

Unnamed: 0,A,B,C,D
2022-03-15,1.270366,-0.097955,-0.623387,-0.681879
2022-03-16,0.000933,0.491226,0.240821,-1.177824
2022-03-17,-1.003152,0.592851,-0.396004,0.010227
2022-03-18,-1.141755,0.258842,0.553137,0.689823
2022-03-19,-0.06491,-0.299149,0.045281,1.804861
2022-03-20,1.292816,-0.197898,0.001737,1.422182
2022-03-21,2.496157,2.656138,0.56088,-1.806769
2022-03-22,1.178901,-0.951549,1.479429,1.104732
2022-03-23,1.395103,-0.161234,2.23579,-0.159376
2022-03-24,-0.024158,-1.068066,-0.373056,0.224781


In [22]:
# Sort by values
df.sort_values(by="B", ascending=True)

Unnamed: 0,A,B,C,D
2022-03-24,-0.024158,-1.068066,-0.373056,0.224781
2022-03-22,1.178901,-0.951549,1.479429,1.104732
2022-03-19,-0.06491,-0.299149,0.045281,1.804861
2022-03-20,1.292816,-0.197898,0.001737,1.422182
2022-03-23,1.395103,-0.161234,2.23579,-0.159376
2022-03-15,1.270366,-0.097955,-0.623387,-0.681879
2022-03-18,-1.141755,0.258842,0.553137,0.689823
2022-03-16,0.000933,0.491226,0.240821,-1.177824
2022-03-17,-1.003152,0.592851,-0.396004,0.010227
2022-03-21,2.496157,2.656138,0.56088,-1.806769


In [23]:
df.sort_values(by="B", ascending=False)

Unnamed: 0,A,B,C,D
2022-03-21,2.496157,2.656138,0.56088,-1.806769
2022-03-17,-1.003152,0.592851,-0.396004,0.010227
2022-03-16,0.000933,0.491226,0.240821,-1.177824
2022-03-18,-1.141755,0.258842,0.553137,0.689823
2022-03-15,1.270366,-0.097955,-0.623387,-0.681879
2022-03-23,1.395103,-0.161234,2.23579,-0.159376
2022-03-20,1.292816,-0.197898,0.001737,1.422182
2022-03-19,-0.06491,-0.299149,0.045281,1.804861
2022-03-22,1.178901,-0.951549,1.479429,1.104732
2022-03-24,-0.024158,-1.068066,-0.373056,0.224781


In [24]:
df["B"]

2022-03-15   -0.097955
2022-03-16    0.491226
2022-03-17    0.592851
2022-03-18    0.258842
2022-03-19   -0.299149
2022-03-20   -0.197898
2022-03-21    2.656138
2022-03-22   -0.951549
2022-03-23   -0.161234
2022-03-24   -1.068066
Freq: D, Name: B, dtype: float64

In [25]:
df[0:2]

Unnamed: 0,A,B,C,D
2022-03-15,1.270366,-0.097955,-0.623387,-0.681879
2022-03-16,0.000933,0.491226,0.240821,-1.177824


In [26]:
df2

Unnamed: 0,A,B,C,D,E
1,-1.595476,0.559183,0.705907,-0.559489,-0.78656
2,0.699858,-0.111478,0.218113,-1.878146,-0.055067
3,0.561568,-1.4798,-1.535612,0.734982,-1.322888
4,0.843436,-1.470134,-1.123628,-0.962076,0.591516
5,-1.322329,0.865718,-0.432423,1.002823,1.008089


In [27]:
df

Unnamed: 0,A,B,C,D
2022-03-15,1.270366,-0.097955,-0.623387,-0.681879
2022-03-16,0.000933,0.491226,0.240821,-1.177824
2022-03-17,-1.003152,0.592851,-0.396004,0.010227
2022-03-18,-1.141755,0.258842,0.553137,0.689823
2022-03-19,-0.06491,-0.299149,0.045281,1.804861
2022-03-20,1.292816,-0.197898,0.001737,1.422182
2022-03-21,2.496157,2.656138,0.56088,-1.806769
2022-03-22,1.178901,-0.951549,1.479429,1.104732
2022-03-23,1.395103,-0.161234,2.23579,-0.159376
2022-03-24,-0.024158,-1.068066,-0.373056,0.224781


In [28]:
df3

Unnamed: 0,A,B,C,D,E,F
0,1,2022-03-15,1,3.0,test,foo
1,1,2022-03-15,1,3.0,train,foo
2,1,2022-03-15,1,3.0,test,foo
3,1,2022-03-15,1,3.0,train,foo


In [29]:
df2

Unnamed: 0,A,B,C,D,E
1,-1.595476,0.559183,0.705907,-0.559489,-0.78656
2,0.699858,-0.111478,0.218113,-1.878146,-0.055067
3,0.561568,-1.4798,-1.535612,0.734982,-1.322888
4,0.843436,-1.470134,-1.123628,-0.962076,0.591516
5,-1.322329,0.865718,-0.432423,1.002823,1.008089


In [30]:
df2.loc[:,["A", "B"]]

Unnamed: 0,A,B
1,-1.595476,0.559183
2,0.699858,-0.111478
3,0.561568,-1.4798
4,0.843436,-1.470134
5,-1.322329,0.865718


In [31]:
df2.loc[1:3, ["A", "B"]]

Unnamed: 0,A,B
1,-1.595476,0.559183
2,0.699858,-0.111478
3,0.561568,-1.4798


In [32]:
df2.loc[[1, 4], ["A", "B"]]

Unnamed: 0,A,B
1,-1.595476,0.559183
4,0.843436,-1.470134


In [33]:
df2.loc[1, ["A", "B"]]

A   -1.595476
B    0.559183
Name: 1, dtype: float64

In [34]:
df2.loc[1,"A":"D"]

A   -1.595476
B    0.559183
C    0.705907
D   -0.559489
Name: 1, dtype: float64

In [35]:
df2

Unnamed: 0,A,B,C,D,E
1,-1.595476,0.559183,0.705907,-0.559489,-0.78656
2,0.699858,-0.111478,0.218113,-1.878146,-0.055067
3,0.561568,-1.4798,-1.535612,0.734982,-1.322888
4,0.843436,-1.470134,-1.123628,-0.962076,0.591516
5,-1.322329,0.865718,-0.432423,1.002823,1.008089


In [36]:
df2.at[5,"A"]

-1.322328829241098

In [37]:
df2

Unnamed: 0,A,B,C,D,E
1,-1.595476,0.559183,0.705907,-0.559489,-0.78656
2,0.699858,-0.111478,0.218113,-1.878146,-0.055067
3,0.561568,-1.4798,-1.535612,0.734982,-1.322888
4,0.843436,-1.470134,-1.123628,-0.962076,0.591516
5,-1.322329,0.865718,-0.432423,1.002823,1.008089


In [38]:
df2.iloc[2:4]   

Unnamed: 0,A,B,C,D,E
3,0.561568,-1.4798,-1.535612,0.734982,-1.322888
4,0.843436,-1.470134,-1.123628,-0.962076,0.591516


In [39]:
df2.iloc[1:3, 2:4]

Unnamed: 0,C,D
2,0.218113,-1.878146
3,-1.535612,0.734982


In [40]:
df2.iloc[1:4, : ]

Unnamed: 0,A,B,C,D,E
2,0.699858,-0.111478,0.218113,-1.878146,-0.055067
3,0.561568,-1.4798,-1.535612,0.734982,-1.322888
4,0.843436,-1.470134,-1.123628,-0.962076,0.591516


In [41]:
df2.iloc[ : , 1:4 ]

Unnamed: 0,B,C,D
1,0.559183,0.705907,-0.559489
2,-0.111478,0.218113,-1.878146
3,-1.4798,-1.535612,0.734982
4,-1.470134,-1.123628,-0.962076
5,0.865718,-0.432423,1.002823


In [42]:
df[df > 2]

Unnamed: 0,A,B,C,D
2022-03-15,,,,
2022-03-16,,,,
2022-03-17,,,,
2022-03-18,,,,
2022-03-19,,,,
2022-03-20,,,,
2022-03-21,2.496157,2.656138,,
2022-03-22,,,,
2022-03-23,,,2.23579,
2022-03-24,,,,


In [43]:
df4 = df2.copy()
df4

Unnamed: 0,A,B,C,D,E
1,-1.595476,0.559183,0.705907,-0.559489,-0.78656
2,0.699858,-0.111478,0.218113,-1.878146,-0.055067
3,0.561568,-1.4798,-1.535612,0.734982,-1.322888
4,0.843436,-1.470134,-1.123628,-0.962076,0.591516
5,-1.322329,0.865718,-0.432423,1.002823,1.008089


In [44]:
df4["F"] = ["False", "True", "False", "True", "False"]

In [45]:
df4

Unnamed: 0,A,B,C,D,E,F
1,-1.595476,0.559183,0.705907,-0.559489,-0.78656,False
2,0.699858,-0.111478,0.218113,-1.878146,-0.055067,True
3,0.561568,-1.4798,-1.535612,0.734982,-1.322888,False
4,0.843436,-1.470134,-1.123628,-0.962076,0.591516,True
5,-1.322329,0.865718,-0.432423,1.002823,1.008089,False
