## Introduction to the Data

In [23]:
import pandas as pd
import numpy as np


In [24]:
df18 = pd.read_csv('2018.csv', parse_dates=['Date'])
df18.head()

Unnamed: 0,Date,Country,City,Specie,count,min,max,median,variance
0,2018-04-19,HR,Zagreb,pm10,72,12.0,66.0,19.0,1034.64
1,2018-05-03,HR,Zagreb,pm10,72,5.0,46.0,20.0,740.53
2,2018-05-08,HR,Zagreb,pm10,69,7.0,33.0,17.0,286.35
3,2018-05-31,HR,Zagreb,pm10,48,15.0,60.0,25.0,704.61
4,2018-06-22,HR,Zagreb,pm10,62,1.0,60.0,7.0,670.06


In [25]:
df18['Specie'].unique()

array(['pm10', 'temperature', 'wind-speed', 'wind-gust', 'co', 'o3',
       'so2', 'no2', 'humidity', 'pressure', 'pm25', 'precipitation',
       'wd', 'd', 'uvi', 'aqi', 'pol', 'pm1', 'mepaqi'], dtype=object)

## Data Cleaning

### Data Cleaning 2018 DataFrame

In [26]:
df18 = pd.read_csv('2018.csv',parse_dates=['Date'])
df18.head()

Unnamed: 0,Date,Country,City,Specie,count,min,max,median,variance
0,2018-04-19,HR,Zagreb,pm10,72,12.0,66.0,19.0,1034.64
1,2018-05-03,HR,Zagreb,pm10,72,5.0,46.0,20.0,740.53
2,2018-05-08,HR,Zagreb,pm10,69,7.0,33.0,17.0,286.35
3,2018-05-31,HR,Zagreb,pm10,48,15.0,60.0,25.0,704.61
4,2018-06-22,HR,Zagreb,pm10,62,1.0,60.0,7.0,670.06


In [27]:
df18['City'].unique()

array(['Zagreb', 'Rijeka', 'Split', 'Stockholm', 'Göteborg', 'Malmö',
       'Uppsala', 'Pristina', 'São José dos Campos', 'São Paulo',
       'Hamilton', 'Calgary', 'Winnipeg', 'Halifax', 'Kitchener',
       'Edmonton', 'Surrey', 'Mississauga', 'Québec', 'Vancouver',
       'Victoria', 'Montréal', 'Toronto', 'Ottawa', 'London', 'Bogotá',
       'Medellín', 'Copenhagen', 'Las Palmas de Gran Canaria',
       'Salamanca', 'Barcelona', 'Donostia / San Sebastián',
       'Gasteiz / Vitoria', 'Córdoba', 'Valladolid', 'Santander', 'Palma',
       'Málaga', 'Sevilla', 'Bilbao', 'Pamplona', 'Castelló de la Plana',
       'Huelva', 'Granada', 'Madrid', 'Valencia', 'Burgos', 'Murcia',
       'Santa Cruz de Tenerife', 'Oviedo', 'Utrecht', 'Nijmegen',
       'Haarlem', 'Eindhoven', 'Rotterdam', 'Amsterdam', 'Dordrecht',
       'Breda', 'Groningen', 'Maastricht', 'The Hague', 'Abu Dhabi',
       'Dubai', 'Baguio', 'Manila', 'Butuan', 'Zamboanga', 'Cochabamba',
       'Nicosia', 'Limassol', 'Ḩawallī

#### Extracting and Cleaning Data from Chennai

In [28]:
chennai = df18[df18['City'] == 'Chennai']
chennai.head()

Unnamed: 0,Date,Country,City,Specie,count,min,max,median,variance
542590,2018-06-11,IN,Chennai,pressure,72,0.0,1005.0,0.0,2267540.0
542591,2018-05-19,IN,Chennai,pressure,69,0.0,1007.0,0.0,2325260.0
542592,2018-05-29,IN,Chennai,pressure,73,0.0,1003.0,0.0,2241080.0
542593,2018-05-24,IN,Chennai,pressure,71,0.0,1006.0,0.0,2291040.0
542594,2018-05-28,IN,Chennai,pressure,71,0.0,1016.6,0.0,2312180.0


In [29]:
chennai['Specie'].unique()

array(['pressure', 'co', 'o3', 'no2', 'humidity', 'wind-gust', 'so2',
       'pm25', 'wind-speed', 'temperature'], dtype=object)

In [30]:
#For Wind Speed
pm = chennai[chennai['Specie'] == 'co'].copy() #So we don't modify the original
pm.head()

Unnamed: 0,Date,Country,City,Specie,count,min,max,median,variance
542637,2018-01-26,IN,Chennai,co,96,2.8,20.3,7.0,185.1
542638,2018-03-18,IN,Chennai,co,92,2.4,18.8,8.7,178.92
542639,2018-04-07,IN,Chennai,co,48,2.8,9.3,5.1,22.48
542640,2018-04-20,IN,Chennai,co,48,3.1,14.2,6.9,52.57
542641,2018-06-26,IN,Chennai,co,48,2.2,13.4,6.5,61.01


In [31]:
#Pivot table
pm = pd.pivot_table(data=pm, values=['min', 'max', 'median'], index='Date')
pm.head()

Unnamed: 0_level_0,max,median,min
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-01-01,21.7,9.3,2.9
2018-01-02,17.6,8.0,3.3
2018-01-03,18.8,10.0,2.4
2018-01-04,20.3,6.7,2.4
2018-01-05,19.6,6.7,2.4


In [32]:
pm.rename(columns={'min':'min_{}'.format('pm10'), 'max':'max_{}'.format('pm10'), 'median':'avg_{}'.format('pm10')}, inplace=True)
pm.head()

Unnamed: 0_level_0,max_pm10,avg_pm10,min_pm10
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-01-01,21.7,9.3,2.9
2018-01-02,17.6,8.0,3.3
2018-01-03,18.8,10.0,2.4
2018-01-04,20.3,6.7,2.4
2018-01-05,19.6,6.7,2.4


After cleaning and transforming the data, we now know which steps the function has to contain. 

#### Writing a function to ease the cleaning process

In [33]:
def cleaning_function(df, city, parameters):
    #This function will make all the cleaning process
    #df is each DataFrame (df18, df19, df20)
    #City we will use
    #Parameters or environmental variables, we will take from each dataframe based
    #on it's available variables

    #Basic idea:
    #1. Extracts the city
    #2. From the list of parameters, extracts each parameter
    #3. From that parameter makes a pivot, and change columns names
    #4. Takes each parameter df and merges it with an empty df
    #5. Return that new DataFrame

    city_temp = df[df['City'] == city]
    city_temp_parameters = city_temp['Specie'].unique()
    final_df = pd.DataFrame()
    for parameter in parameters:
        if parameter not in city_temp_parameters:
            print("The parameter {} doesn't exist in the dataframe and will be ignored".format(parameter))
            continue
        else:
            temp_df = city_temp[city_temp['Specie'] == parameter].copy()
            temp_df_pivot = pd.pivot_table(data=temp_df, values=['min', 'max', 'median'], index='Date')
            temp_df_pivot.rename(columns={'min':'min_{}'.format(parameter), 'max':'max_{}'.format(parameter), 'median':'avg_{}'.format(parameter)}, inplace=True)
            final_df = pd.concat([final_df, temp_df_pivot], axis=1)
    return final_df

#### Extracting and Cleaning Data from Chennai

In [34]:
parameters = ['pm10', 'pm25', 'pm1', 'co', 'o3', 'so2', 'no2', 'humidity', 'pressure', 'precipitation',
'temperature', 'wind-speed', 'wd', 'uvi']
chennai_18 = cleaning_function(df18, 'Chennai', parameters)
chennai_18.head()

The parameter pm10 doesn't exist in the dataframe and will be ignored
The parameter pm1 doesn't exist in the dataframe and will be ignored
The parameter precipitation doesn't exist in the dataframe and will be ignored
The parameter wd doesn't exist in the dataframe and will be ignored
The parameter uvi doesn't exist in the dataframe and will be ignored


Unnamed: 0_level_0,max_pm25,avg_pm25,min_pm25,max_co,avg_co,min_co,max_o3,avg_o3,min_o3,max_so2,...,min_humidity,max_pressure,avg_pressure,min_pressure,max_temperature,avg_temperature,min_temperature,max_wind-speed,avg_wind-speed,min_wind-speed
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-01,180.0,129.0,56.0,21.7,9.3,2.9,35.4,10.2,1.3,37.5,...,,,,,,,,,,
2018-01-02,834.0,145.0,13.0,17.6,8.0,3.3,33.4,4.5,1.3,37.5,...,,,,,,,,,,
2018-01-03,781.0,135.0,40.0,18.8,10.0,2.4,42.5,3.3,1.3,54.3,...,,,,,,,,,,
2018-01-04,834.0,157.0,111.0,20.3,6.7,2.4,57.4,13.0,1.3,51.5,...,,,,,,,,,,
2018-01-05,327.0,156.0,129.0,19.6,6.7,2.4,32.2,12.4,1.7,32.0,...,,,,,,,,,,


In [35]:
chennai_18.isnull().sum()

max_pm25             0
avg_pm25             0
min_pm25             0
max_co               0
avg_co               0
min_co               0
max_o3               0
avg_o3               0
min_o3               0
max_so2              0
avg_so2              0
min_so2              0
max_no2              0
avg_no2              0
min_no2              0
max_humidity       130
avg_humidity       130
min_humidity       130
max_pressure       135
avg_pressure       135
min_pressure       135
max_temperature    135
avg_temperature    135
min_temperature    135
max_wind-speed     130
avg_wind-speed     130
min_wind-speed     130
dtype: int64

In [36]:
chennai_18.drop(columns=['max_humidity', 'avg_humidity', 'min_humidity','max_pressure', 'avg_pressure', 'min_pressure', 'max_temperature', 'avg_temperature', 'min_temperature', 'max_wind-speed', 'avg_wind-speed', 'min_wind-speed'], inplace=True)

In [37]:
def missingvalue_function(city):
    city_noNan_foward = city.interpolate(method='linear',axis=1, limit_direction='forward')
    city_noNan_all = city_noNan_foward.interpolate(method='linear',axis=1,limit_direction='backward')
    #city_noNan_all.astype(int)
    return city_noNan_all

In [38]:
chennai_18 = missingvalue_function(chennai_18)
chennai_18.isnull().sum()

max_pm25    0
avg_pm25    0
min_pm25    0
max_co      0
avg_co      0
min_co      0
max_o3      0
avg_o3      0
min_o3      0
max_so2     0
avg_so2     0
min_so2     0
max_no2     0
avg_no2     0
min_no2     0
dtype: int64

#### Extracting and Cleaning Data from Mumbai

In [39]:
#Apply the function
#Check for missing data
#Fill missing data
parameters = ['pm10', 'pm25', 'pm1', 'co', 'o3', 'so2', 'no2', 'humidity', 'pressure', 'precipitation',
'temperature', 'wind-speed', 'wd', 'uvi']
mumbai_18 = cleaning_function(df18, 'Mumbai', parameters)
mumbai_18.head()

The parameter pm1 doesn't exist in the dataframe and will be ignored
The parameter precipitation doesn't exist in the dataframe and will be ignored
The parameter wd doesn't exist in the dataframe and will be ignored
The parameter uvi doesn't exist in the dataframe and will be ignored


Unnamed: 0_level_0,max_pm10,avg_pm10,min_pm10,max_pm25,avg_pm25,min_pm25,max_co,avg_co,min_co,max_o3,...,min_humidity,max_pressure,avg_pressure,min_pressure,max_temperature,avg_temperature,min_temperature,max_wind-speed,avg_wind-speed,min_wind-speed
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-01,198.0,104.0,31.0,305.0,166.0,10.0,86.4,13.7,0.8,53.6,...,,,,,,,,,,
2018-01-02,168.0,104.0,42.0,255.0,175.0,100.0,38.4,13.8,0.6,64.7,...,,,,,,,,,,
2018-01-03,271.0,130.0,59.0,364.0,174.0,118.0,57.1,18.1,1.1,68.6,...,,,,,,,,,,
2018-01-04,317.0,122.0,38.0,362.0,180.0,106.0,58.4,22.5,0.2,47.0,...,,,,,,,,,,
2018-01-05,180.0,109.0,40.0,291.0,162.0,82.0,47.7,18.8,1.0,44.8,...,,,,,,,,,,


In [40]:
mumbai_18.isnull().sum()

max_pm10            11
avg_pm10            11
min_pm10            11
max_pm25             5
avg_pm25             5
min_pm25             5
max_co              11
avg_co              11
min_co              11
max_o3              72
avg_o3              72
min_o3              72
max_so2             11
avg_so2             11
min_so2             11
max_no2             72
avg_no2             72
min_no2             72
max_humidity       129
avg_humidity       129
min_humidity       129
max_pressure       129
avg_pressure       129
min_pressure       129
max_temperature    129
avg_temperature    129
min_temperature    129
max_wind-speed     135
avg_wind-speed     135
min_wind-speed     135
dtype: int64

In [41]:
mumbai_18.drop(columns=['max_humidity', 'avg_humidity', 'min_humidity','max_pressure', 'avg_pressure', 'min_pressure', 'max_temperature', 'avg_temperature', 'min_temperature', 'max_wind-speed', 'avg_wind-speed', 'min_wind-speed'], inplace=True)
mumbai_18 = missingvalue_function(mumbai_18)
mumbai_18.isnull().sum()

max_pm10    0
avg_pm10    0
min_pm10    0
max_pm25    0
avg_pm25    0
min_pm25    0
max_co      0
avg_co      0
min_co      0
max_o3      0
avg_o3      0
min_o3      0
max_so2     0
avg_so2     0
min_so2     0
max_no2     0
avg_no2     0
min_no2     0
dtype: int64

#### Extracting and Cleaning Data from Delhi

In [42]:
#Apply the function
#Check for missing data
#Fill missing data
parameters = ['pm10', 'pm25', 'pm1', 'co', 'o3', 'so2', 'no2', 'humidity', 'pressure', 'precipitation',
'temperature', 'wind-speed', 'wd', 'uvi']
Delhi_18 = cleaning_function(df18, 'Delhi', parameters)
Delhi_18.head()

The parameter pm1 doesn't exist in the dataframe and will be ignored
The parameter precipitation doesn't exist in the dataframe and will be ignored
The parameter uvi doesn't exist in the dataframe and will be ignored


Unnamed: 0_level_0,max_pm10,avg_pm10,min_pm10,max_pm25,avg_pm25,min_pm25,max_co,avg_co,min_co,max_o3,...,min_pressure,max_temperature,avg_temperature,min_temperature,max_wind-speed,avg_wind-speed,min_wind-speed,max_wd,avg_wd,min_wd
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-01,889.0,302.0,40.0,999.0,342.0,81.0,98.5,5.1,0.1,147.3,...,,,,,,,,,,
2018-01-02,999.0,318.0,21.0,588.0,327.0,33.0,118.9,4.0,0.1,149.0,...,,,,,,,,,,
2018-01-03,722.0,192.0,76.0,525.0,253.0,162.0,74.2,5.4,0.1,75.5,...,,,,,,,,,,
2018-01-04,859.0,226.0,20.0,999.0,269.0,53.0,149.7,7.6,0.1,37.2,...,,,,,,,,,,
2018-01-05,863.0,200.0,70.0,999.0,274.0,98.0,85.7,6.8,0.1,39.0,...,,,,,,,,,,


In [43]:
Delhi_18.isnull().sum()

max_pm10             0
avg_pm10             0
min_pm10             0
max_pm25             0
avg_pm25             0
min_pm25             0
max_co               0
avg_co               0
min_co               0
max_o3               0
avg_o3               0
min_o3               0
max_so2              0
avg_so2              0
min_so2              0
max_no2              0
avg_no2              0
min_no2              0
max_humidity       121
avg_humidity       121
min_humidity       121
max_pressure       121
avg_pressure       121
min_pressure       121
max_temperature    121
avg_temperature    121
min_temperature    121
max_wind-speed     120
avg_wind-speed     120
min_wind-speed     120
max_wd             133
avg_wd             133
min_wd             133
dtype: int64

In [44]:
Delhi_18.drop(columns=['max_humidity', 'avg_humidity', 'min_humidity','max_pressure', 'avg_pressure', 'min_pressure', 'max_temperature', 'avg_temperature', 'min_temperature', 'max_wind-speed', 'avg_wind-speed', 'min_wind-speed','max_wd','avg_wd','min_wd'], inplace=True)
Delhi_18 = missingvalue_function(Delhi_18)
Delhi_18.isnull().sum()

max_pm10    0
avg_pm10    0
min_pm10    0
max_pm25    0
avg_pm25    0
min_pm25    0
max_co      0
avg_co      0
min_co      0
max_o3      0
avg_o3      0
min_o3      0
max_so2     0
avg_so2     0
min_so2     0
max_no2     0
avg_no2     0
min_no2     0
dtype: int64

#### Extracting and Cleaning Data from Thiruvananthapuram

In [45]:
#Apply the function
#Check for missing data
#Fill missing data
parameters = ['pm10', 'pm25', 'pm1', 'co', 'o3', 'so2', 'no2', 'humidity', 'pressure', 'precipitation',
'temperature', 'wind-speed', 'wd', 'uvi']
thiru_18 = cleaning_function(df18, 'Thiruvananthapuram', parameters)
thiru_18.head()

The parameter pm1 doesn't exist in the dataframe and will be ignored
The parameter precipitation doesn't exist in the dataframe and will be ignored
The parameter wd doesn't exist in the dataframe and will be ignored
The parameter uvi doesn't exist in the dataframe and will be ignored


Unnamed: 0_level_0,max_pm10,avg_pm10,min_pm10,max_pm25,avg_pm25,min_pm25,max_co,avg_co,min_co,max_o3,...,min_humidity,max_pressure,avg_pressure,min_pressure,max_temperature,avg_temperature,min_temperature,max_wind-speed,avg_wind-speed,min_wind-speed
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-01,80.0,65.0,55.0,164.0,154.0,112.0,14.9,12.0,9.4,43.3,...,,,,,,,,,,
2018-01-02,99.0,67.0,49.0,181.0,158.0,109.0,16.2,12.5,8.5,42.9,...,,,,,,,,,,
2018-01-03,74.0,65.0,50.0,162.0,153.0,97.0,16.3,12.9,10.2,40.7,...,,,,,,,,,,
2018-01-04,86.0,72.0,55.0,168.0,160.0,107.0,21.7,12.2,4.0,35.4,...,,,,,,,,,,
2018-01-05,124.0,82.0,61.0,181.0,163.0,152.0,20.7,11.6,5.8,32.9,...,,,,,,,,,,


In [46]:
thiru_18.isnull().sum()

max_pm10             0
avg_pm10             0
min_pm10             0
max_pm25             0
avg_pm25             0
min_pm25             0
max_co               0
avg_co               0
min_co               0
max_o3               0
avg_o3               0
min_o3               0
max_so2              0
avg_so2              0
min_so2              0
max_no2              0
avg_no2              0
min_no2              0
max_humidity       133
avg_humidity       133
min_humidity       133
max_pressure       133
avg_pressure       133
min_pressure       133
max_temperature    133
avg_temperature    133
min_temperature    133
max_wind-speed     133
avg_wind-speed     133
min_wind-speed     133
dtype: int64

In [47]:
thiru_18.drop(columns=['max_humidity', 'avg_humidity', 'min_humidity','max_pressure', 'avg_pressure', 'min_pressure', 'max_temperature', 'avg_temperature', 'min_temperature', 'max_wind-speed', 'avg_wind-speed', 'min_wind-speed'], inplace=True)


In [48]:
thiru_18 = missingvalue_function(thiru_18)
thiru_18.isnull().sum()

max_pm10    0
avg_pm10    0
min_pm10    0
max_pm25    0
avg_pm25    0
min_pm25    0
max_co      0
avg_co      0
min_co      0
max_o3      0
avg_o3      0
min_o3      0
max_so2     0
avg_so2     0
min_so2     0
max_no2     0
avg_no2     0
min_no2     0
dtype: int64

#### Extracting and Cleaning Data from Bangalore

In [49]:
#Apply the function
#Check for missing data
#Fill missing data
parameters = ['pm10', 'pm25', 'pm1', 'co', 'o3', 'so2', 'no2', 'humidity', 'pressure', 'precipitation',
'temperature', 'wind-speed', 'wd', 'uvi']
bangalore_18 = cleaning_function(df18, 'Bengaluru', parameters)
bangalore_18.head()

The parameter pm1 doesn't exist in the dataframe and will be ignored
The parameter precipitation doesn't exist in the dataframe and will be ignored
The parameter wd doesn't exist in the dataframe and will be ignored
The parameter uvi doesn't exist in the dataframe and will be ignored


Unnamed: 0_level_0,max_pm10,avg_pm10,min_pm10,max_pm25,avg_pm25,min_pm25,max_co,avg_co,min_co,max_o3,...,min_humidity,max_pressure,avg_pressure,min_pressure,max_temperature,avg_temperature,min_temperature,max_wind-speed,avg_wind-speed,min_wind-speed
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-01,113.0,56.0,38.0,158.0,95.0,34.0,34.4,9.4,1.8,131.3,...,,,,,,,,,,
2018-01-02,101.0,57.0,36.0,162.0,95.0,70.0,23.9,9.6,0.2,102.7,...,,,,,,,,,,
2018-01-03,121.0,52.0,12.0,158.0,82.0,53.0,27.7,11.4,0.4,93.1,...,,,,,,,,,,
2018-01-04,128.0,57.0,32.0,157.0,90.0,50.0,22.1,10.0,0.2,89.5,...,,,,,,,,,,
2018-01-05,137.0,60.0,41.0,162.0,87.0,56.0,26.1,10.5,0.2,105.2,...,,,,,,,,,,


In [50]:
bangalore_18.isnull().sum()

max_pm10            72
avg_pm10            72
min_pm10            72
max_pm25             0
avg_pm25             0
min_pm25             0
max_co               0
avg_co               0
min_co               0
max_o3              79
avg_o3              79
min_o3              79
max_so2              0
avg_so2              0
min_so2              0
max_no2              0
avg_no2              0
min_no2              0
max_humidity       130
avg_humidity       130
min_humidity       130
max_pressure       130
avg_pressure       130
min_pressure       130
max_temperature    130
avg_temperature    130
min_temperature    130
max_wind-speed     130
avg_wind-speed     130
min_wind-speed     130
dtype: int64

In [51]:
bangalore_18.drop(columns=['max_humidity', 'avg_humidity', 'min_humidity','max_pressure', 'avg_pressure', 'min_pressure', 'max_temperature', 'avg_temperature', 'min_temperature', 'max_wind-speed', 'avg_wind-speed', 'min_wind-speed'], inplace=True)

In [52]:
bangalore_18 = missingvalue_function(thiru_18)
bangalore_18.isnull().sum()

max_pm10    0
avg_pm10    0
min_pm10    0
max_pm25    0
avg_pm25    0
min_pm25    0
max_co      0
avg_co      0
min_co      0
max_o3      0
avg_o3      0
min_o3      0
max_so2     0
avg_so2     0
min_so2     0
max_no2     0
avg_no2     0
min_no2     0
dtype: int64