# Pandas Tutorial

## Series

In [None]:
# Pandas Tutorial 2020 by Derek Banas:- https://www.youtube.com/watch?v=PcvsOaixUh8

In [3]:
import numpy as np
import pandas as pd

In [7]:
list_1 = ['a', 'b', 'c', 'd']
labels = [1, 2, 3, 4]

ser_1 = pd.Series(data=list_1, index=labels)
ser_1

1    a
2    b
3    c
4    d
dtype: object

In [34]:
arr_1 = np.array([1,2,3,4])
ser_2 = pd.Series(arr_1)
ser_2

0    1
1    2
2    3
3    4
dtype: int32

In [31]:
dict_1 = {'f_name':'Harshad', 'l_name':'Shringi', 'age':29}
ser_3 = pd.Series(dict_1)
ser_3

f_name    Harshad
l_name    Shringi
age            29
dtype: object

In [39]:
ser_3.keys()

Index(['f_name', 'l_name', 'age'], dtype='object')

In [22]:
# VV Important  
ser_3.values

array(['Harshad', 'Shringi', 29], dtype=object)

In [40]:
dict_1.values()

dict_values(['Harshad', 'Shringi', 29])

In [30]:
ser_3['f_name'] +' '+ ser_3['l_name'] +' with age: '+ str(ser_3['age'])

'Harshad Shringi with age: 29'

In [41]:
for k,v in dict_1.items():
    print(k,v)

f_name Harshad
l_name Shringi
age 29


In [46]:
ser_2.dtype

dtype('int32')

In [47]:
ser_3.dtype

dtype('O')

In [48]:
# add opertaion on series
# All operations can be performed on series of same dataType
ser_2 + ser_2

0    2
1    4
2    6
3    8
dtype: int32

In [49]:
np.exp(ser_2)

0     2.718282
1     7.389056
2    20.085537
3    54.598150
dtype: float64

In [52]:
ser_4 = pd.Series({4: 5, 5: 6, 6: 7, 7: 8})
ser_2 + ser_4

# does not not align so doesn't get added up.

0   NaN
1   NaN
2   NaN
3   NaN
4   NaN
5   NaN
6   NaN
7   NaN
dtype: float64

In [54]:
ser_4 = pd.Series({4: 5, 5: 6, 6: 7, 7: 8}, name='rand_nums')
ser_4.name

'rand_nums'

## Dataframes

### Creating DataFrames

In [112]:
arr_2 = np.random.randint(10, 50, size=(2,3))
arr_2

array([[39, 41, 43],
       [45, 14, 10]])

In [113]:
arr_2 = np.random.randint(10, 50, size=(2,3))
df_1 = pd.DataFrame(arr_2, ['A','B'], ['C','D','E'])
df_1

Unnamed: 0,C,D,E
A,24,26,43
B,17,24,28


In [86]:
print(df_1.shape)

(2, 3)


In [123]:
dict_3 = {'one': pd.Series([1., 2., 3.], index=['a', 'b', 'c']), 
             'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}
df_2 = pd.DataFrame(dict_3)
df_2

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [82]:
dict_4 = {'one': pd.Series([1., 2., 3.], index=['a', 'b', 'c']), 
             'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd']),
                 'three': pd.Series([1., 2., 3., 4.,5.], index=['a', 'b', 'c', 'd','e'])}
df_3 = pd.DataFrame(dict_4)
df_3

Unnamed: 0,one,two,three
a,1.0,1.0,1.0
b,2.0,2.0,2.0
c,3.0,3.0,3.0
d,,4.0,4.0
e,,,5.0


In [80]:
pd.DataFrame.from_dict(dict([('A', [1,2,3]), ('B', [4,5,6])]))

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


In [81]:
pd.DataFrame.from_dict(dict([('A', [1,2,3]), ('B', [4,5,6])]), 
                      orient='index', columns=['one', 'two', 'three'])

Unnamed: 0,one,two,three
A,1,2,3
B,4,5,6


### Editing and Retrieving data

In [114]:
print(df_1)

    C   D   E
A  24  26  43
B  17  24  28


In [88]:
df_1['C']

A    19
B    21
Name: C, dtype: int32

In [89]:
df_1[['D', 'E']]

Unnamed: 0,D,E
A,30,28
B,17,25


In [90]:
df_1.loc['A']

C    19
D    30
E    28
Name: A, dtype: int32

In [92]:
df_1.iloc[1]

C    21
D    17
E    25
Name: B, dtype: int32

In [93]:
df_1.loc['A', 'C']

19

In [94]:
df_1.loc[['A','B'],['D','E']]

Unnamed: 0,D,E
A,30,28
B,17,25


In [96]:
df_1['Total'] = df_1['C']+ df_1['D']+ df_1['E']
df_1

Unnamed: 0,C,D,E,Total
A,19,30,28,77
B,21,17,25,63


In [126]:
print(df_2)
df_2['Mul'] = df_2['one']*df_2['two']
df_2

   one  two  Mul
a  1.0  1.0  1.0
b  2.0  2.0  4.0
c  3.0  3.0  9.0
d  NaN  4.0  NaN


Unnamed: 0,one,two,Mul
a,1.0,1.0,1.0
b,2.0,2.0,4.0
c,3.0,3.0,9.0
d,,4.0,


In [101]:
dict_new = {'C':10, 'D':20, 'E':30}
new_row = pd.Series(dict_new, name='F')
df_1 = df_1.append(new_row)
df_1

Unnamed: 0,C,D,E,Total
A,19.0,30.0,28.0,77.0
B,21.0,17.0,25.0,63.0
F,10.0,20.0,30.0,
F,10.0,20.0,30.0,
F,10.0,20.0,30.0,


In [102]:
df_1.drop('F', axis=0, inplace=True)

In [105]:
df_1.drop('Total', axis=1, inplace=True)

In [111]:
df_1

Unnamed: 0,C,D,E
A,29,36,49
B,41,47,27


In [119]:
# setting new index

df_1['Gender'] = ['Men', 'Woman']
df_1.set_index('Gender', inplace=True)
df_1

Unnamed: 0_level_0,C,D,E
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Men,24,26,43
Woman,17,24,28


In [117]:
df_1.reset_index(inplace=True)

In [127]:
df_2

Unnamed: 0,one,two,Mul
a,1.0,1.0,1.0
b,2.0,2.0,4.0
c,3.0,3.0,9.0
d,,4.0,


In [128]:
df_2.assign(div=df_2['one'] / df_2['two'])

Unnamed: 0,one,two,Mul,div
a,1.0,1.0,1.0,1.0
b,2.0,2.0,4.0,1.0
c,3.0,3.0,9.0,1.0
d,,4.0,,


In [129]:
df_2.assign(sub= lambda x: (x['two'] - (x['one'])))

Unnamed: 0,one,two,Mul,sub
a,1.0,1.0,1.0,0.0
b,2.0,2.0,4.0,0.0
c,3.0,3.0,9.0,0.0
d,,4.0,,


In [131]:
df_01 = pd.DataFrame({'A': [1., np.nan, 3., np.nan]})
df_02 = pd.DataFrame({'A': [10., 11., 3., 12.]})

df_01.combine_first(df_02)

# NaN values get replaced by other df_02 values

Unnamed: 0,A
0,1.0
1,11.0
2,3.0
3,12.0


### Conditional Selection