# Advanced Pandas Operations

In [1]:
import numpy as np
import pandas as pd

In [2]:
#create a dictionary
data = {
    'dogs' : [23,17,5,12,1,5],
    'birds' : [10,7,13,2,8,19],
    'cats' : [4,14,8,15,11,3]
}

In [3]:
# convert the dictionary into a dataframe
data_df = pd.DataFrame(data)

In [4]:
# add index names
idx_names = ['shelter0','shelter1','shelter2','shelter3','shelter4','shelter5']
named_data_df = pd.DataFrame(data,index=idx_names)
named_data_df

Unnamed: 0,dogs,birds,cats
shelter0,23,10,4
shelter1,17,7,14
shelter2,5,13,8
shelter3,12,2,15
shelter4,1,8,11
shelter5,5,19,3


In [5]:
# Multiply using Lambda
# add index names
named_data_df['dogs'] = named_data_df['dogs'].apply(lambda x: x*17)
named_data_df['birds'] = named_data_df['birds'].apply(lambda x: x*8)
named_data_df['cats'] = named_data_df['cats'].apply(lambda x: x*23)
named_data_df

Unnamed: 0,dogs,birds,cats
shelter0,391,80,92
shelter1,289,56,322
shelter2,85,104,184
shelter3,204,16,345
shelter4,17,64,253
shelter5,85,152,69


In [6]:
# Use of apply and map
named_data_df = named_data_df.applymap(lambda x: x/2.0)
named_data_df

Unnamed: 0,dogs,birds,cats
shelter0,195.5,40.0,46.0
shelter1,144.5,28.0,161.0
shelter2,42.5,52.0,92.0
shelter3,102.0,8.0,172.5
shelter4,8.5,32.0,126.5
shelter5,42.5,76.0,34.5


In [7]:
# Filter a dataframe with brackets
named_data_df[named_data_df < 50]

Unnamed: 0,dogs,birds,cats
shelter0,,40.0,46.0
shelter1,,28.0,
shelter2,42.5,,
shelter3,,8.0,
shelter4,8.5,32.0,
shelter5,42.5,,34.5


In [8]:
# Filter a dataframe with query method
named_data_df.query('cats > 30', inplace=True)
named_data_df

Unnamed: 0,dogs,birds,cats
shelter0,195.5,40.0,46.0
shelter1,144.5,28.0,161.0
shelter2,42.5,52.0,92.0
shelter3,102.0,8.0,172.5
shelter4,8.5,32.0,126.5
shelter5,42.5,76.0,34.5


In [9]:
# sum a column
dogs_sum_df = named_data_df["dogs"].sum()
dogs_sum_df

535.5

In [10]:
named_data_df

Unnamed: 0,dogs,birds,cats
shelter0,195.5,40.0,46.0
shelter1,144.5,28.0,161.0
shelter2,42.5,52.0,92.0
shelter3,102.0,8.0,172.5
shelter4,8.5,32.0,126.5
shelter5,42.5,76.0,34.5


In [11]:
# row-wise operation
row_sum_data_df = named_data_df.apply(np.sum,axis=0)
row_sum_data_df

dogs     535.5
birds    236.0
cats     632.5
dtype: float64

In [13]:
# column-wise operation
column_sum_data_df = named_data_df.apply(np.sum,axis=1)
column_sum_data_df

shelter0    281.5
shelter1    333.5
shelter2    186.5
shelter3    282.5
shelter4    167.0
shelter5    153.0
dtype: float64

In [14]:
# Adding a new column
new_col_values = [True,True,False,True,False,False]
named_data_df["foster"] = new_col_values
named_data_df

Unnamed: 0,dogs,birds,cats,foster
shelter0,195.5,40.0,46.0,True
shelter1,144.5,28.0,161.0,True
shelter2,42.5,52.0,92.0,False
shelter3,102.0,8.0,172.5,True
shelter4,8.5,32.0,126.5,False
shelter5,42.5,76.0,34.5,False


In [15]:
# Group BY
named_data_df.groupby('foster').sum()

Unnamed: 0_level_0,dogs,birds,cats
foster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
False,93.5,160.0,253.0
True,442.0,76.0,379.5
