# PANDAS TUTORIAL

In [64]:
import pandas as pd

In [65]:
#check pandas version
print(pd.__version__)

1.1.3


# series create, manipulate, query and delete

In [66]:
#create series from list
arr=[1,2,3,4,5,6]
s1=pd.Series(arr)
s1

0    1
1    2
2    3
3    4
4    5
5    6
dtype: int64

In [67]:
order=[2,4,6,8,10,12]
s2=pd.Series(arr,index=order)
s2

2     1
4     2
6     3
8     4
10    5
12    6
dtype: int64

In [68]:
import numpy as np
n=np.random.rand(5) #create a random nDarray
n

array([0.12316092, 0.78995063, 0.81376957, 0.60722446, 0.4681174 ])

In [69]:
ind=['a','b','c','d','e']
s2=pd.Series(n,index=ind)
s2

a    0.123161
b    0.789951
c    0.813770
d    0.607224
e    0.468117
dtype: float64

In [70]:
#create series from dictionaries
d={'a':1,'b':2,'c':3,'d':4}
S1=pd.Series(d)
S1

a    1
b    2
c    3
d    4
dtype: int64

In [71]:
print(S1)
#modify Index
S1.index=['A','B','C','D']
S1

a    1
b    2
c    3
d    4
dtype: int64


A    1
B    2
C    3
D    4
dtype: int64

In [72]:
#slicing
S1[:3]

A    1
B    2
C    3
dtype: int64

In [73]:
S1[3:]

D    4
dtype: int64

In [74]:
S2=s1.append(S1)
S2

0    1
1    2
2    3
3    4
4    5
5    6
A    1
B    2
C    3
D    4
dtype: int64

In [75]:
S2.drop('D')


0    1
1    2
2    3
3    4
4    5
5    6
A    1
B    2
C    3
dtype: int64

In [76]:
S2

0    1
1    2
2    3
3    4
4    5
5    6
A    1
B    2
C    3
D    4
dtype: int64

# Series Operation


In [77]:
arr1=[0,1,2,3,4,5,7]
arr2=[6,7,8,9,10,11,12]

In [78]:
s5=pd.Series(arr1)
s5

0    0
1    1
2    2
3    3
4    4
5    5
6    7
dtype: int64

In [79]:
s6=pd.Series(arr2)
s6

0     6
1     7
2     8
3     9
4    10
5    11
6    12
dtype: int64

In [80]:
s5.add(s6)

0     6
1     8
2    10
3    12
4    14
5    16
6    19
dtype: int64

In [81]:
s5.sub(s6)

0   -6
1   -6
2   -6
3   -6
4   -6
5   -6
6   -5
dtype: int64

In [82]:
s6.sub(s5)

0    6
1    6
2    6
3    6
4    6
5    6
6    5
dtype: int64

In [83]:
s5.multiply(s6)

0     0
1     7
2    16
3    27
4    40
5    55
6    84
dtype: int64

In [84]:
s6.divide(s5)

0         inf
1    7.000000
2    4.000000
3    3.000000
4    2.500000
5    2.200000
6    1.714286
dtype: float64

In [85]:
print(s6.median())
print(s6.max())

9.0
12


In [86]:
print(s6.min())

6


# CREATE DATAFRAME

In [87]:
date=pd.date_range('today',periods=6) #Define time sequence as index
date

DatetimeIndex(['2021-08-01 13:03:54.235880', '2021-08-02 13:03:54.235880',
               '2021-08-03 13:03:54.235880', '2021-08-04 13:03:54.235880',
               '2021-08-05 13:03:54.235880', '2021-08-06 13:03:54.235880'],
              dtype='datetime64[ns]', freq='D')

In [88]:
num_arr=np.random.rand(6,4)

In [89]:
num_arr

array([[0.47863766, 0.17698279, 0.03390802, 0.66029263],
       [0.58760387, 0.82673123, 0.24567111, 0.77564797],
       [0.92922227, 0.04333618, 0.0187778 , 0.73345055],
       [0.54376472, 0.82277586, 0.03781842, 0.66661632],
       [0.1120068 , 0.28901699, 0.00883329, 0.4952991 ],
       [0.31300995, 0.98356249, 0.40857742, 0.19972841]])

In [90]:
columns=['A','B','C','D'] #Use this table as column

In [91]:
df1=pd.DataFrame(num_arr,columns=columns,index=date)
df1

Unnamed: 0,A,B,C,D
2021-08-01 13:03:54.235880,0.478638,0.176983,0.033908,0.660293
2021-08-02 13:03:54.235880,0.587604,0.826731,0.245671,0.775648
2021-08-03 13:03:54.235880,0.929222,0.043336,0.018778,0.733451
2021-08-04 13:03:54.235880,0.543765,0.822776,0.037818,0.666616
2021-08-05 13:03:54.235880,0.112007,0.289017,0.008833,0.495299
2021-08-06 13:03:54.235880,0.31301,0.983562,0.408577,0.199728


In [92]:
#Create dataframe using dictionaries
data={'Animal':['dog','cat','snake','cat','dog','dog'],
      'Age':[2.0,1.5,2,5,np.nan,1.0],
      'visits':[1,3,2,1,1,4],
      'priorities':['yes','no','yes','yes','no','no']
     }
labels=['a','b','c','d','e','f']
df2=pd.DataFrame(data,index=labels)
df2

Unnamed: 0,Animal,Age,visits,priorities
a,dog,2.0,1,yes
b,cat,1.5,3,no
c,snake,2.0,2,yes
d,cat,5.0,1,yes
e,dog,,1,no
f,dog,1.0,4,no


In [93]:
#see datatype of Array
df2.dtypes

Animal         object
Age           float64
visits          int64
priorities     object
dtype: object

In [94]:
df2.head(2)

Unnamed: 0,Animal,Age,visits,priorities
a,dog,2.0,1,yes
b,cat,1.5,3,no


In [95]:
df2.tail(3)

Unnamed: 0,Animal,Age,visits,priorities
d,cat,5.0,1,yes
e,dog,,1,no
f,dog,1.0,4,no


In [96]:
df2.index

Index(['a', 'b', 'c', 'd', 'e', 'f'], dtype='object')

In [97]:
df2.columns

Index(['Animal', 'Age', 'visits', 'priorities'], dtype='object')

In [98]:
df2.values

array([['dog', 2.0, 1, 'yes'],
       ['cat', 1.5, 3, 'no'],
       ['snake', 2.0, 2, 'yes'],
       ['cat', 5.0, 1, 'yes'],
       ['dog', nan, 1, 'no'],
       ['dog', 1.0, 4, 'no']], dtype=object)

In [99]:
df2.describe

<bound method NDFrame.describe of   Animal  Age  visits priorities
a    dog  2.0       1        yes
b    cat  1.5       3         no
c  snake  2.0       2        yes
d    cat  5.0       1        yes
e    dog  NaN       1         no
f    dog  1.0       4         no>

In [100]:
df2.describe()

Unnamed: 0,Age,visits
count,5.0,6.0
mean,2.3,2.0
std,1.565248,1.264911
min,1.0,1.0
25%,1.5,1.0
50%,2.0,1.5
75%,2.0,2.75
max,5.0,4.0


In [101]:
df2.T

Unnamed: 0,a,b,c,d,e,f
Animal,dog,cat,snake,cat,dog,dog
Age,2,1.5,2,5,,1
visits,1,3,2,1,1,4
priorities,yes,no,yes,yes,no,no


In [102]:
df2.sort_values(by='Age')

Unnamed: 0,Animal,Age,visits,priorities
f,dog,1.0,4,no
b,cat,1.5,3,no
a,dog,2.0,1,yes
c,snake,2.0,2,yes
d,cat,5.0,1,yes
e,dog,,1,no


# slicing DataFrame

In [103]:
df2.sort_values(by='Age')[1:3]

Unnamed: 0,Animal,Age,visits,priorities
b,cat,1.5,3,no
a,dog,2.0,1,yes


In [104]:
#Query dataframe by tag