In [1]:
import pandas as pd
import numpy as np
from numpy.random import randint

In [2]:
columns = ['W', 'X', 'Y', 'Z']
indexes = ['A', 'B', 'C', 'D', 'E']

In [6]:
np.random.seed(101)
# data = randint(0, 100, 20)
# data = data.reshape(5, 4)
data = randint(0, 100, (5, 4))
data

array([[95, 11, 81, 70],
       [63, 87, 75,  9],
       [77, 40,  4, 63],
       [40, 60, 92, 64],
       [ 5, 12, 93, 40]])

In [7]:
df = pd.DataFrame(data, indexes, columns)
df

Unnamed: 0,W,X,Y,Z
A,95,11,81,70
B,63,87,75,9
C,77,40,4,63
D,40,60,92,64
E,5,12,93,40


In [9]:
# How to retrive a column
df['W']
# when we try to pull out one column it returns series

A    95
B    63
C    77
D    40
E     5
Name: W, dtype: int32

In [12]:
type(df['W']) # Python has a built in function to find the data type of the variable

pandas.core.series.Series

In [14]:
# what is I want it also to be a dataframe
# pass the column names inside a list
df[['W']]

Unnamed: 0,W
A,95
B,63
C,77
D,40
E,5


In [15]:
type(df[['W']])

pandas.core.frame.DataFrame

In [17]:
df[['W', 'Y']] # you can pull out multiple columns

Unnamed: 0,W,Y
A,95,81
B,63,75
C,77,4
D,40,92
E,5,93


In [18]:
df[['Z', 'X', 'W']]

Unnamed: 0,Z,X,W
A,70,11,95
B,9,87,63
C,63,40,77
D,64,60,40
E,40,12,5


In [21]:
# how do we pull out rows
df.loc['A'] # this will return series
# loc is not a function or method because is it not followed by ()
# loc is a list property, it is followed by []

W    95
X    11
Y    81
Z    70
Name: A, dtype: int32

In [22]:
df.loc[['A']]

Unnamed: 0,W,X,Y,Z
A,95,11,81,70


In [23]:
df.loc[['A', 'C', 'E']]

Unnamed: 0,W,X,Y,Z
A,95,11,81,70
C,77,40,4,63
E,5,12,93,40


In [24]:
# how can i mention colums and rows
df.loc[['A', 'C', 'E'], ['W', 'Y']] # row, column

Unnamed: 0,W,Y
A,95,81
C,77,4
E,5,93


In [26]:
df.iloc[[0, 2, 4]]

Unnamed: 0,W,X,Y,Z
A,95,11,81,70
C,77,40,4,63
E,5,12,93,40


In [31]:
df.iloc[[0, 2, 4]][['W','Y']] # iloc does not support rows, columns
# we pull out the dataframe and then pull out the required columns

Unnamed: 0,W,Y
A,95,81
C,77,4
E,5,93


In [34]:
# Conditional Selection
df > 50 # boolean data frame

Unnamed: 0,W,X,Y,Z
A,True,False,True,True
B,True,True,True,False
C,True,False,False,True
D,False,True,True,True
E,False,False,True,False


In [36]:
df[df > 50]
# False is filled with NaN => Not a Number

Unnamed: 0,W,X,Y,Z
A,95.0,,81.0,70.0
B,63.0,87.0,75.0,
C,77.0,,,63.0
D,,60.0,92.0,64.0
E,,,93.0,


In [37]:
df['X'] > 50

A    False
B     True
C    False
D     True
E    False
Name: X, dtype: bool

In [38]:
df[df['X'] > 50]

Unnamed: 0,W,X,Y,Z
B,63,87,75,9
D,40,60,92,64


In [39]:
df[df['X'] > 50][['X', 'Y', 'Z']]

Unnamed: 0,X,Y,Z
B,87,75,9
D,60,92,64


In [41]:
# & and condition
# | or condition
df[(df['X'] > 50) & (df['Z'] < 50)]

Unnamed: 0,W,X,Y,Z
B,63,87,75,9


In [42]:
df[(df['X'] > 50) | (df['Z'] < 50)]

Unnamed: 0,W,X,Y,Z
B,63,87,75,9
D,40,60,92,64
E,5,12,93,40


In [43]:
df

Unnamed: 0,W,X,Y,Z
A,95,11,81,70
B,63,87,75,9
C,77,40,4,63
D,40,60,92,64
E,5,12,93,40


In [44]:
newindex = ['Sel', 'Mel', 'Per', 'Ked', 'Joh']
newindex

['Sel', 'Mel', 'Per', 'Ked', 'Joh']

In [45]:
# how to create a new column and add this data
df['States'] = newindex
df

Unnamed: 0,W,X,Y,Z,States
A,95,11,81,70,Sel
B,63,87,75,9,Mel
C,77,40,4,63,Per
D,40,60,92,64,Ked
E,5,12,93,40,Joh


In [46]:
df.set_index('States')

Unnamed: 0_level_0,W,X,Y,Z
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Sel,95,11,81,70
Mel,63,87,75,9
Per,77,40,4,63
Ked,40,60,92,64
Joh,5,12,93,40


In [49]:
df.columns

Index(['W', 'X', 'Y', 'Z', 'States'], dtype='object')

In [53]:
df.columns = ['2019', '2020', '2021', '2022', 'States']
df

Unnamed: 0,2019,2020,2021,2022,States
A,95,11,81,70,Sel
B,63,87,75,9,Mel
C,77,40,4,63,Per
D,40,60,92,64,Ked
E,5,12,93,40,Joh


In [54]:
# let us add a new row
df.loc['F'] = [10, 20, 30, 40, 'Sab']
df

Unnamed: 0,2019,2020,2021,2022,States
A,95,11,81,70,Sel
B,63,87,75,9,Mel
C,77,40,4,63,Per
D,40,60,92,64,Ked
E,5,12,93,40,Joh
F,10,20,30,40,Sab


In [59]:
# by default drop method will try to drop the row
# if you want to drop the column you must mention the axis
# df.drop('States', axis=1)
# however this drop is not permanent
# if you want it to be permanent then use inplace=True
df.drop('States', axis=1, inplace=True)
df

In [61]:
df.drop('F', inplace=True) # by default it will drop the row
df

In [62]:
df

Unnamed: 0,2019,2020,2021,2022
A,95,11,81,70
B,63,87,75,9
C,77,40,4,63
D,40,60,92,64
E,5,12,93,40
