## *Pandas Library*

In [1]:
# Importing Pandas Library
import pandas as pd

In [2]:
import numpy as np

In [3]:
# Object Creation
s = pd.Series([2,5,np.nan,7,8,9])     # Series can be defined as a column
s

0    2.0
1    5.0
2    NaN
3    7.0
4    8.0
5    9.0
dtype: float64

In [4]:
# Date

date = pd.date_range("20220315", periods=10)
date

DatetimeIndex(['2022-03-15', '2022-03-16', '2022-03-17', '2022-03-18',
               '2022-03-19', '2022-03-20', '2022-03-21', '2022-03-22',
               '2022-03-23', '2022-03-24'],
              dtype='datetime64[ns]', freq='D')

In [5]:
# Dataframe

df = pd.DataFrame(np.random.randn(10, 4), index=date, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2022-03-15,0.188374,0.085164,-0.383479,-0.234613
2022-03-16,1.563605,1.251101,-0.230795,-0.44803
2022-03-17,1.398884,-0.173683,2.009124,0.168483
2022-03-18,1.040148,0.115814,-1.011216,-0.215799
2022-03-19,-0.68402,-1.346621,0.60159,0.816666
2022-03-20,1.078906,0.041492,0.381013,0.171687
2022-03-21,-0.586917,0.097253,-1.631322,2.258292
2022-03-22,0.362396,0.338426,0.302131,-1.088826
2022-03-23,2.121268,-0.279288,1.911319,1.156002
2022-03-24,1.186717,-0.441883,-2.260598,-1.350416


In [6]:
df2 = pd.DataFrame(np.random.randn(5,5), index=[1,2,3,4,5], columns=["A","B","C","D","E"])
df2

Unnamed: 0,A,B,C,D,E
1,-0.515353,-1.426531,0.957923,-0.598861,0.994438
2,0.226376,-1.021361,0.247731,-0.884024,2.561066
3,1.185453,-0.970615,0.219005,-0.362993,0.915287
4,0.001883,0.250374,0.693249,0.133927,0.738878
5,1.140992,1.69653,-0.250951,-1.970498,-0.665104


In [7]:
df3 = pd.DataFrame(
    {
        "A" : 1,
        "B" : pd.Timestamp("20220315"),
        "C" : pd.Series(1, index=list(range(4))),
        "D" : np.array([3]*4, dtype="float32"),
        "E" : pd.Categorical(["test","train"]*2),
        "F" : "foo"
    })

df3

Unnamed: 0,A,B,C,D,E,F
0,1,2022-03-15,1,3.0,test,foo
1,1,2022-03-15,1,3.0,train,foo
2,1,2022-03-15,1,3.0,test,foo
3,1,2022-03-15,1,3.0,train,foo


In [8]:
# Checking Types
df3.dtypes

A             int64
B    datetime64[ns]
C             int64
D           float32
E          category
F            object
dtype: object

In [9]:
df.head(3)

Unnamed: 0,A,B,C,D
2022-03-15,0.188374,0.085164,-0.383479,-0.234613
2022-03-16,1.563605,1.251101,-0.230795,-0.44803
2022-03-17,1.398884,-0.173683,2.009124,0.168483


In [10]:
df.tail(2)

Unnamed: 0,A,B,C,D
2022-03-23,2.121268,-0.279288,1.911319,1.156002
2022-03-24,1.186717,-0.441883,-2.260598,-1.350416


In [11]:
# to.numpy() function
df.to_numpy()

array([[ 0.18837368,  0.08516448, -0.38347869, -0.23461272],
       [ 1.56360495,  1.2511007 , -0.23079481, -0.44803035],
       [ 1.3988835 , -0.1736834 ,  2.00912398,  0.16848275],
       [ 1.04014845,  0.11581393, -1.01121647, -0.21579883],
       [-0.68402   , -1.34662141,  0.60158979,  0.81666566],
       [ 1.07890559,  0.04149244,  0.38101303,  0.17168729],
       [-0.58691695,  0.0972527 , -1.63132208,  2.25829182],
       [ 0.36239643,  0.33842571,  0.30213116, -1.0888263 ],
       [ 2.12126811, -0.27928802,  1.91131913,  1.15600191],
       [ 1.18671705, -0.44188337, -2.26059832, -1.35041633]])

In [12]:
df2.to_numpy()

array([[-5.15353256e-01, -1.42653087e+00,  9.57923038e-01,
        -5.98860577e-01,  9.94438453e-01],
       [ 2.26375859e-01, -1.02136138e+00,  2.47730547e-01,
        -8.84024136e-01,  2.56106614e+00],
       [ 1.18545348e+00, -9.70615031e-01,  2.19004502e-01,
        -3.62993192e-01,  9.15286989e-01],
       [ 1.88328026e-03,  2.50374126e-01,  6.93248741e-01,
         1.33926888e-01,  7.38878007e-01],
       [ 1.14099169e+00,  1.69652995e+00, -2.50950897e-01,
        -1.97049768e+00, -6.65104129e-01]])

In [13]:
# Describe function

df.describe()

Unnamed: 0,A,B,C,D
count,10.0,10.0,10.0,10.0
mean,0.766936,-0.031223,-0.031223,0.123344
std,0.922136,0.651383,1.385349,1.070846
min,-0.68402,-1.346621,-2.260598,-1.350416
25%,0.231879,-0.252887,-0.854282,-0.394676
50%,1.059527,0.063328,0.035668,-0.023658
75%,1.345842,0.111174,0.546446,0.655421
max,2.121268,1.251101,2.009124,2.258292


In [14]:
# Transpose function

df.transpose()

Unnamed: 0,2022-03-15,2022-03-16,2022-03-17,2022-03-18,2022-03-19,2022-03-20,2022-03-21,2022-03-22,2022-03-23,2022-03-24
A,0.188374,1.563605,1.398884,1.040148,-0.68402,1.078906,-0.586917,0.362396,2.121268,1.186717
B,0.085164,1.251101,-0.173683,0.115814,-1.346621,0.041492,0.097253,0.338426,-0.279288,-0.441883
C,-0.383479,-0.230795,2.009124,-1.011216,0.60159,0.381013,-1.631322,0.302131,1.911319,-2.260598
D,-0.234613,-0.44803,0.168483,-0.215799,0.816666,0.171687,2.258292,-1.088826,1.156002,-1.350416


In [15]:
df

Unnamed: 0,A,B,C,D
2022-03-15,0.188374,0.085164,-0.383479,-0.234613
2022-03-16,1.563605,1.251101,-0.230795,-0.44803
2022-03-17,1.398884,-0.173683,2.009124,0.168483
2022-03-18,1.040148,0.115814,-1.011216,-0.215799
2022-03-19,-0.68402,-1.346621,0.60159,0.816666
2022-03-20,1.078906,0.041492,0.381013,0.171687
2022-03-21,-0.586917,0.097253,-1.631322,2.258292
2022-03-22,0.362396,0.338426,0.302131,-1.088826
2022-03-23,2.121268,-0.279288,1.911319,1.156002
2022-03-24,1.186717,-0.441883,-2.260598,-1.350416


In [16]:
df.sort_index(axis=1,ascending=True)

Unnamed: 0,A,B,C,D
2022-03-15,0.188374,0.085164,-0.383479,-0.234613
2022-03-16,1.563605,1.251101,-0.230795,-0.44803
2022-03-17,1.398884,-0.173683,2.009124,0.168483
2022-03-18,1.040148,0.115814,-1.011216,-0.215799
2022-03-19,-0.68402,-1.346621,0.60159,0.816666
2022-03-20,1.078906,0.041492,0.381013,0.171687
2022-03-21,-0.586917,0.097253,-1.631322,2.258292
2022-03-22,0.362396,0.338426,0.302131,-1.088826
2022-03-23,2.121268,-0.279288,1.911319,1.156002
2022-03-24,1.186717,-0.441883,-2.260598,-1.350416


In [17]:
df

Unnamed: 0,A,B,C,D
2022-03-15,0.188374,0.085164,-0.383479,-0.234613
2022-03-16,1.563605,1.251101,-0.230795,-0.44803
2022-03-17,1.398884,-0.173683,2.009124,0.168483
2022-03-18,1.040148,0.115814,-1.011216,-0.215799
2022-03-19,-0.68402,-1.346621,0.60159,0.816666
2022-03-20,1.078906,0.041492,0.381013,0.171687
2022-03-21,-0.586917,0.097253,-1.631322,2.258292
2022-03-22,0.362396,0.338426,0.302131,-1.088826
2022-03-23,2.121268,-0.279288,1.911319,1.156002
2022-03-24,1.186717,-0.441883,-2.260598,-1.350416


In [18]:
df.sort_index(axis=0, ascending=False)

Unnamed: 0,A,B,C,D
2022-03-24,1.186717,-0.441883,-2.260598,-1.350416
2022-03-23,2.121268,-0.279288,1.911319,1.156002
2022-03-22,0.362396,0.338426,0.302131,-1.088826
2022-03-21,-0.586917,0.097253,-1.631322,2.258292
2022-03-20,1.078906,0.041492,0.381013,0.171687
2022-03-19,-0.68402,-1.346621,0.60159,0.816666
2022-03-18,1.040148,0.115814,-1.011216,-0.215799
2022-03-17,1.398884,-0.173683,2.009124,0.168483
2022-03-16,1.563605,1.251101,-0.230795,-0.44803
2022-03-15,0.188374,0.085164,-0.383479,-0.234613


In [19]:
df.sort_index(axis=1, ascending=True)

Unnamed: 0,A,B,C,D
2022-03-15,0.188374,0.085164,-0.383479,-0.234613
2022-03-16,1.563605,1.251101,-0.230795,-0.44803
2022-03-17,1.398884,-0.173683,2.009124,0.168483
2022-03-18,1.040148,0.115814,-1.011216,-0.215799
2022-03-19,-0.68402,-1.346621,0.60159,0.816666
2022-03-20,1.078906,0.041492,0.381013,0.171687
2022-03-21,-0.586917,0.097253,-1.631322,2.258292
2022-03-22,0.362396,0.338426,0.302131,-1.088826
2022-03-23,2.121268,-0.279288,1.911319,1.156002
2022-03-24,1.186717,-0.441883,-2.260598,-1.350416


In [20]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2022-03-15,-0.234613,-0.383479,0.085164,0.188374
2022-03-16,-0.44803,-0.230795,1.251101,1.563605
2022-03-17,0.168483,2.009124,-0.173683,1.398884
2022-03-18,-0.215799,-1.011216,0.115814,1.040148
2022-03-19,0.816666,0.60159,-1.346621,-0.68402
2022-03-20,0.171687,0.381013,0.041492,1.078906
2022-03-21,2.258292,-1.631322,0.097253,-0.586917
2022-03-22,-1.088826,0.302131,0.338426,0.362396
2022-03-23,1.156002,1.911319,-0.279288,2.121268
2022-03-24,-1.350416,-2.260598,-0.441883,1.186717


In [21]:
df

Unnamed: 0,A,B,C,D
2022-03-15,0.188374,0.085164,-0.383479,-0.234613
2022-03-16,1.563605,1.251101,-0.230795,-0.44803
2022-03-17,1.398884,-0.173683,2.009124,0.168483
2022-03-18,1.040148,0.115814,-1.011216,-0.215799
2022-03-19,-0.68402,-1.346621,0.60159,0.816666
2022-03-20,1.078906,0.041492,0.381013,0.171687
2022-03-21,-0.586917,0.097253,-1.631322,2.258292
2022-03-22,0.362396,0.338426,0.302131,-1.088826
2022-03-23,2.121268,-0.279288,1.911319,1.156002
2022-03-24,1.186717,-0.441883,-2.260598,-1.350416


In [22]:
# Sort by values
df.sort_values(by="B", ascending=True)

Unnamed: 0,A,B,C,D
2022-03-19,-0.68402,-1.346621,0.60159,0.816666
2022-03-24,1.186717,-0.441883,-2.260598,-1.350416
2022-03-23,2.121268,-0.279288,1.911319,1.156002
2022-03-17,1.398884,-0.173683,2.009124,0.168483
2022-03-20,1.078906,0.041492,0.381013,0.171687
2022-03-15,0.188374,0.085164,-0.383479,-0.234613
2022-03-21,-0.586917,0.097253,-1.631322,2.258292
2022-03-18,1.040148,0.115814,-1.011216,-0.215799
2022-03-22,0.362396,0.338426,0.302131,-1.088826
2022-03-16,1.563605,1.251101,-0.230795,-0.44803


In [23]:
df.sort_values(by="B", ascending=False)

Unnamed: 0,A,B,C,D
2022-03-16,1.563605,1.251101,-0.230795,-0.44803
2022-03-22,0.362396,0.338426,0.302131,-1.088826
2022-03-18,1.040148,0.115814,-1.011216,-0.215799
2022-03-21,-0.586917,0.097253,-1.631322,2.258292
2022-03-15,0.188374,0.085164,-0.383479,-0.234613
2022-03-20,1.078906,0.041492,0.381013,0.171687
2022-03-17,1.398884,-0.173683,2.009124,0.168483
2022-03-23,2.121268,-0.279288,1.911319,1.156002
2022-03-24,1.186717,-0.441883,-2.260598,-1.350416
2022-03-19,-0.68402,-1.346621,0.60159,0.816666


In [24]:
df["B"]

2022-03-15    0.085164
2022-03-16    1.251101
2022-03-17   -0.173683
2022-03-18    0.115814
2022-03-19   -1.346621
2022-03-20    0.041492
2022-03-21    0.097253
2022-03-22    0.338426
2022-03-23   -0.279288
2022-03-24   -0.441883
Freq: D, Name: B, dtype: float64

In [25]:
df[0:2]

Unnamed: 0,A,B,C,D
2022-03-15,0.188374,0.085164,-0.383479,-0.234613
2022-03-16,1.563605,1.251101,-0.230795,-0.44803


In [26]:
df2

Unnamed: 0,A,B,C,D,E
1,-0.515353,-1.426531,0.957923,-0.598861,0.994438
2,0.226376,-1.021361,0.247731,-0.884024,2.561066
3,1.185453,-0.970615,0.219005,-0.362993,0.915287
4,0.001883,0.250374,0.693249,0.133927,0.738878
5,1.140992,1.69653,-0.250951,-1.970498,-0.665104


In [27]:
df

Unnamed: 0,A,B,C,D
2022-03-15,0.188374,0.085164,-0.383479,-0.234613
2022-03-16,1.563605,1.251101,-0.230795,-0.44803
2022-03-17,1.398884,-0.173683,2.009124,0.168483
2022-03-18,1.040148,0.115814,-1.011216,-0.215799
2022-03-19,-0.68402,-1.346621,0.60159,0.816666
2022-03-20,1.078906,0.041492,0.381013,0.171687
2022-03-21,-0.586917,0.097253,-1.631322,2.258292
2022-03-22,0.362396,0.338426,0.302131,-1.088826
2022-03-23,2.121268,-0.279288,1.911319,1.156002
2022-03-24,1.186717,-0.441883,-2.260598,-1.350416


In [28]:
df3

Unnamed: 0,A,B,C,D,E,F
0,1,2022-03-15,1,3.0,test,foo
1,1,2022-03-15,1,3.0,train,foo
2,1,2022-03-15,1,3.0,test,foo
3,1,2022-03-15,1,3.0,train,foo


In [29]:
df2

Unnamed: 0,A,B,C,D,E
1,-0.515353,-1.426531,0.957923,-0.598861,0.994438
2,0.226376,-1.021361,0.247731,-0.884024,2.561066
3,1.185453,-0.970615,0.219005,-0.362993,0.915287
4,0.001883,0.250374,0.693249,0.133927,0.738878
5,1.140992,1.69653,-0.250951,-1.970498,-0.665104


In [30]:
df2.loc[:,["A", "B"]]

Unnamed: 0,A,B
1,-0.515353,-1.426531
2,0.226376,-1.021361
3,1.185453,-0.970615
4,0.001883,0.250374
5,1.140992,1.69653


In [31]:
df2.loc[1:3, ["A", "B"]]

Unnamed: 0,A,B
1,-0.515353,-1.426531
2,0.226376,-1.021361
3,1.185453,-0.970615


In [32]:
df2.loc[[1, 4], ["A", "B"]]

Unnamed: 0,A,B
1,-0.515353,-1.426531
4,0.001883,0.250374


In [33]:
df2.loc[1, ["A", "B"]]

A   -0.515353
B   -1.426531
Name: 1, dtype: float64

In [34]:
df2.loc[1,"A":"D"]

A   -0.515353
B   -1.426531
C    0.957923
D   -0.598861
Name: 1, dtype: float64

In [35]:
df2

Unnamed: 0,A,B,C,D,E
1,-0.515353,-1.426531,0.957923,-0.598861,0.994438
2,0.226376,-1.021361,0.247731,-0.884024,2.561066
3,1.185453,-0.970615,0.219005,-0.362993,0.915287
4,0.001883,0.250374,0.693249,0.133927,0.738878
5,1.140992,1.69653,-0.250951,-1.970498,-0.665104


In [36]:
df2.at[5,"A"]

1.1409916865582295

In [37]:
df2

Unnamed: 0,A,B,C,D,E
1,-0.515353,-1.426531,0.957923,-0.598861,0.994438
2,0.226376,-1.021361,0.247731,-0.884024,2.561066
3,1.185453,-0.970615,0.219005,-0.362993,0.915287
4,0.001883,0.250374,0.693249,0.133927,0.738878
5,1.140992,1.69653,-0.250951,-1.970498,-0.665104


In [44]:
df2.iloc[2:4]   

Unnamed: 0,A,B,C,D,E
3,1.185453,-0.970615,0.219005,-0.362993,0.915287
4,0.001883,0.250374,0.693249,0.133927,0.738878


In [45]:
df2.iloc[1:3, 2:4]

Unnamed: 0,C,D
2,0.247731,-0.884024
3,0.219005,-0.362993


In [46]:
df2.iloc[1:4, : ]

Unnamed: 0,A,B,C,D,E
2,0.226376,-1.021361,0.247731,-0.884024,2.561066
3,1.185453,-0.970615,0.219005,-0.362993,0.915287
4,0.001883,0.250374,0.693249,0.133927,0.738878


In [47]:
df2.iloc[ : , 1:4 ]

Unnamed: 0,B,C,D
1,-1.426531,0.957923,-0.598861
2,-1.021361,0.247731,-0.884024
3,-0.970615,0.219005,-0.362993
4,0.250374,0.693249,0.133927
5,1.69653,-0.250951,-1.970498


In [48]:
df[df > 2]

Unnamed: 0,A,B,C,D
2022-03-15,,,,
2022-03-16,,,,
2022-03-17,,,2.009124,
2022-03-18,,,,
2022-03-19,,,,
2022-03-20,,,,
2022-03-21,,,,2.258292
2022-03-22,,,,
2022-03-23,2.121268,,,
2022-03-24,,,,


In [49]:
df4 = df2.copy()
df4

Unnamed: 0,A,B,C,D,E
1,-0.515353,-1.426531,0.957923,-0.598861,0.994438
2,0.226376,-1.021361,0.247731,-0.884024,2.561066
3,1.185453,-0.970615,0.219005,-0.362993,0.915287
4,0.001883,0.250374,0.693249,0.133927,0.738878
5,1.140992,1.69653,-0.250951,-1.970498,-0.665104


In [51]:
df4["F"] = ["False", "True", "False", "True", "False"]

In [52]:
df4

Unnamed: 0,A,B,C,D,E,F
1,-0.515353,-1.426531,0.957923,-0.598861,0.994438,False
2,0.226376,-1.021361,0.247731,-0.884024,2.561066,True
3,1.185453,-0.970615,0.219005,-0.362993,0.915287,False
4,0.001883,0.250374,0.693249,0.133927,0.738878,True
5,1.140992,1.69653,-0.250951,-1.970498,-0.665104,False
