# Pandas

In [1]:
import pandas as pd            ### Importing the library Pandas alias as pd

In [2]:
import numpy as np             ### Importing the library Numpy alias as np

## Pandas dataframe and indexing

In [3]:
sports1=pd.Series([1,2,3,4],index=['cricket','football','basketball','golf'])    ### Creating a Tabular column
sports1

cricket       1
football      2
basketball    3
golf          4
dtype: int64

In [4]:
sports1['football']         ### Index of football from sports1

2

In [5]:
sports2=pd.Series([11,13,12,5],index=['cricket','football','baseball','golf'])   ### Creating the second tabular column
sports2

cricket     11
football    13
baseball    12
golf         5
dtype: int64

In [6]:
sports1+sports2      ### Adding the two sports column Index

baseball       NaN
basketball     NaN
cricket       12.0
football      15.0
golf           9.0
dtype: float64

In [7]:
df1=pd.DataFrame(np.random.rand(8,5),index='A B C D E F G H'.split(),columns='Score1 Score2 Score3 Score4 Score5'.split())
df1                                   ### Creating a Tabular column

Unnamed: 0,Score1,Score2,Score3,Score4,Score5
A,0.537067,0.200956,0.970166,0.456438,0.465402
B,0.647956,0.485658,0.374999,0.638319,0.595193
C,0.888368,0.358738,0.847181,0.975766,0.443798
D,0.307696,0.381271,0.02227,0.811045,0.886657
E,0.35152,0.687466,0.292502,0.349406,0.508295
F,0.105951,0.529553,0.492098,0.15422,0.161655
G,0.152437,0.407193,0.159128,0.76477,0.517145
H,0.168253,0.119337,0.613979,0.974279,0.142106


In [8]:
df1[['Score2','Score3','Score4']]     ### Tabular column of Score2 Score3 Score4

Unnamed: 0,Score2,Score3,Score4
A,0.200956,0.970166,0.456438
B,0.485658,0.374999,0.638319
C,0.358738,0.847181,0.975766
D,0.381271,0.02227,0.811045
E,0.687466,0.292502,0.349406
F,0.529553,0.492098,0.15422
G,0.407193,0.159128,0.76477
H,0.119337,0.613979,0.974279


In [9]:
df1['Score2']      ### Score2 column

A    0.200956
B    0.485658
C    0.358738
D    0.381271
E    0.687466
F    0.529553
G    0.407193
H    0.119337
Name: Score2, dtype: float64

In [10]:
df1['Score6']=df1['Score1']+df1['Score2']       ### Adding the sixth column Score6 as sum of Score1 and Score2
df1

Unnamed: 0,Score1,Score2,Score3,Score4,Score5,Score6
A,0.537067,0.200956,0.970166,0.456438,0.465402,0.738023
B,0.647956,0.485658,0.374999,0.638319,0.595193,1.133614
C,0.888368,0.358738,0.847181,0.975766,0.443798,1.247107
D,0.307696,0.381271,0.02227,0.811045,0.886657,0.688967
E,0.35152,0.687466,0.292502,0.349406,0.508295,1.038986
F,0.105951,0.529553,0.492098,0.15422,0.161655,0.635504
G,0.152437,0.407193,0.159128,0.76477,0.517145,0.559631
H,0.168253,0.119337,0.613979,0.974279,0.142106,0.28759


In [11]:
df1.drop('E')          ### Dropping the E Row from the Tabular column

Unnamed: 0,Score1,Score2,Score3,Score4,Score5,Score6
A,0.537067,0.200956,0.970166,0.456438,0.465402,0.738023
B,0.647956,0.485658,0.374999,0.638319,0.595193,1.133614
C,0.888368,0.358738,0.847181,0.975766,0.443798,1.247107
D,0.307696,0.381271,0.02227,0.811045,0.886657,0.688967
F,0.105951,0.529553,0.492098,0.15422,0.161655,0.635504
G,0.152437,0.407193,0.159128,0.76477,0.517145,0.559631
H,0.168253,0.119337,0.613979,0.974279,0.142106,0.28759


In [12]:
df1.drop('Score1',axis=1)      ### Dropping the Score1 Column from the Tabular column by using the axis=1 as axis=1 denotes dropping for the column 

Unnamed: 0,Score2,Score3,Score4,Score5,Score6
A,0.200956,0.970166,0.456438,0.465402,0.738023
B,0.485658,0.374999,0.638319,0.595193,1.133614
C,0.358738,0.847181,0.975766,0.443798,1.247107
D,0.381271,0.02227,0.811045,0.886657,0.688967
E,0.687466,0.292502,0.349406,0.508295,1.038986
F,0.529553,0.492098,0.15422,0.161655,0.635504
G,0.407193,0.159128,0.76477,0.517145,0.559631
H,0.119337,0.613979,0.974279,0.142106,0.28759


In [13]:
df1.drop('A',axis=0)     ### Dropping the A Row from the Tabular column by using the axis=0 as for dropping the row we can write axis=0

Unnamed: 0,Score1,Score2,Score3,Score4,Score5,Score6
B,0.647956,0.485658,0.374999,0.638319,0.595193,1.133614
C,0.888368,0.358738,0.847181,0.975766,0.443798,1.247107
D,0.307696,0.381271,0.02227,0.811045,0.886657,0.688967
E,0.35152,0.687466,0.292502,0.349406,0.508295,1.038986
F,0.105951,0.529553,0.492098,0.15422,0.161655,0.635504
G,0.152437,0.407193,0.159128,0.76477,0.517145,0.559631
H,0.168253,0.119337,0.613979,0.974279,0.142106,0.28759


In [14]:
df1.drop(['Score1','Score2'],axis=1)    ### Score1 and Score2 column is dropped from the Tabular column

Unnamed: 0,Score3,Score4,Score5,Score6
A,0.970166,0.456438,0.465402,0.738023
B,0.374999,0.638319,0.595193,1.133614
C,0.847181,0.975766,0.443798,1.247107
D,0.02227,0.811045,0.886657,0.688967
E,0.292502,0.349406,0.508295,1.038986
F,0.492098,0.15422,0.161655,0.635504
G,0.159128,0.76477,0.517145,0.559631
H,0.613979,0.974279,0.142106,0.28759


In [15]:
df2={'ID':[101,102,103,107,176],'Name':['John','Mercy','Akash','Kavin','Lally'],'Profit':[20,54,56,87,123]}
df=pd.DataFrame(df2)
df                     ### Creating a Data Frame with ID Name and Profit as the Tabular Column

Unnamed: 0,ID,Name,Profit
0,101,John,20
1,102,Mercy,54
2,103,Akash,56
3,107,Kavin,87
4,176,Lally,123


In [16]:
df.drop(3)          ### Dropping the 3rd Row 

Unnamed: 0,ID,Name,Profit
0,101,John,20
1,102,Mercy,54
2,103,Akash,56
4,176,Lally,123


In [17]:
df['Name']         ### Displaying the Name column

0     John
1    Mercy
2    Akash
3    Kavin
4    Lally
Name: Name, dtype: object

In [18]:
df[['Name','ID']]       ### Displaying the Name and ID column

Unnamed: 0,Name,ID
0,John,101
1,Mercy,102
2,Akash,103
3,Kavin,107
4,Lally,176


In [20]:
### Assessment
1) Create a null vector of size 10
2) Create a null vector of size 10 but the fifth value should be one
3) Create a vector with values ranging from 10 to 65
4) Reverse the above vector
5) Create a 3x3 matrix with values ranging from 0 to 8

In [21]:
np.zeros(10)        

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [22]:
array=np.zeros(10)
array[4]=1
array

array([0., 0., 0., 0., 1., 0., 0., 0., 0., 0.])

In [23]:
w=np.arange(10,66)
w

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
       27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
       44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
       61, 62, 63, 64, 65])

In [24]:
w=np.arange(65,9,-1)
w

array([65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49,
       48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32,
       31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15,
       14, 13, 12, 11, 10])

In [25]:
z=np.arange(0,9).reshape(3,3)
z

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])