In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import plotly.graph_objects as go
from ipywidgets import widgets
import seaborn as sns
import folium
from folium.plugins import TimestampedGeoJson
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.


In [None]:
#Reading Needed files
df_2016=pd.read_csv("/kaggle/input/air-pollution-dataset-india20162018/2016_india_withoutNA.csv")
df_2017=pd.read_csv("/kaggle/input/air-pollution-dataset-india20162018/2017_india_withoutNA.csv")
df_2018=pd.read_csv("/kaggle/input/air-pollution-dataset-india20162018/2018_india_withoutNA.csv")
df_temp=df_2016.append(df_2017,ignore_index=True)
df=df_temp.append(df_2018,ignore_index=True)

In [None]:
#---For MAP
#Creating a new dataframe 
data_monthly_dropdown=df
data_monthly_dropdown=data_monthly_dropdown.drop(['local','country','attribution','location','unit'],axis=1)
data_monthly_dropdown['utc'] = data_monthly_dropdown['utc'].map(lambda x: str(x)[:-17])
#Converting to datetime
data_monthly_dropdown['utc']=pd.to_datetime(data_monthly_dropdown['utc'],format="%Y-%m")

#Creating an aggregated dataframe
agg_monthly_dropdown=data_monthly_dropdown
#Grouping by the 3 columns to get unique values
grouped_monthly_dropdown=agg_monthly_dropdown.groupby(['utc','city','parameter'])
#Getting mean of each column
grouped_monthly_dropdown=grouped_monthly_dropdown.mean()

In [None]:
#Adding new columns for ease of work
grouped_monthly_dropdown.insert(3,'date',pd.to_datetime('2016-01'))
grouped_monthly_dropdown.insert(4,'city',0)
grouped_monthly_dropdown.insert(5,'parameter',0)

for i in grouped_monthly_dropdown.index:
    grouped_monthly_dropdown.date[i]=i[0]
    grouped_monthly_dropdown.city[i]=i[1]
    grouped_monthly_dropdown.parameter[i]=i[2]
    #print(i)

In [None]:
#Defining the color scheme
color_scale = np.array(['#10ff00','#99ff00','#ccff00','#ffff00','#ffee00','#FFCC00','#ff9900','#ff6600','#ff0000','#960018'])
sns.palplot(sns.color_palette(color_scale))


In [None]:
#MAP displaying the varying concentrations of parameters over time

pollutants = {
    'so2': {
        'notation' : 'SO2',
        'name' :'Sulphur dioxide',
        'bin_edges' : np.array([15,30,45,60,80,100,125,165,250])
    },
    'pm10': {
        'notation' : 'PM10',
        'name' :'Particulate matter < 10 µm',
        'bin_edges' : np.array([10,20,30,40,50,70,100,150,200])
    },
    'o3': {'notation' : 'O3',
        'name' :'Ozone',
        'bin_edges' : np.array([30,50,70,90,110,145,180,240,360])
    },
    'no2': {'notation' : 'NO2',
        'name' :'Nitrogen dioxide',
        'bin_edges' : np.array([25,45,60,80,110,150,200,270,400])
    },
    'co': {'notation' : 'CO',
        'name' :'Carbon monoxide',
         'bin_edges' : np.array([1.4,2.1,2.8,3.6,4.5,5.2,6.6,8.4,13.7])
    },
    'pm25': {
        'notation' : 'PM25',
        'name' :'Particulate matter < 25 µm',
        'bin_edges' : np.array([10,20,30,40,50,70,100,150,200])
    }
}

In [None]:
#Defining all the required functions
def load_data(pollutant_ID):
    print('> Loading data...')
    agg_ts = grouped_monthly_dropdown[grouped_monthly_dropdown['parameter']==pollutant_ID]
    return agg_ts

def color_coding(poll, bin_edges):    
    idx = np.digitize(poll, bin_edges, right=True)
    return color_scale[idx]


def prepare_data(df, pollutant_ID):
    print('> Preparing data...')
    df['color'] = df.value.apply(color_coding, bin_edges=pollutants[pollutant_ID]['bin_edges'])
    return df

def create_geojson_features(df):
    print('> Creating GeoJSON features...')
    features = []
    for _, row in df.iterrows():
        feature = {
            'type': 'Feature',
            'geometry': {
                'type':'Point', 
                'coordinates':[row['longitude'],row['latitude']]
            },
            'properties': {
                'time': row['date'].date().__str__(),
                'style': {'color' : row['color']},
                'icon': 'circle',
                'iconstyle':{
                    'fillColor': row['color'],
                    'fillOpacity': 0.8,
                    'stroke': 'true',
                    'radius': 7
                }
            }
        }
        features.append(feature)
    return features

def make_map(features):
    print('> Making map...')
    coords_india=[28.65381,77.22897]
    pollution_map = folium.Map(location=coords_india, control_scale=True, zoom_start=8)

    TimestampedGeoJson(
        {'type': 'FeatureCollection',
        'features': features}
        , period='P1M'
        , add_last_point=True
        , auto_play=False
        , loop=False
        , max_speed=1
        , loop_button=True
        , date_options='YYYY/MM'
        , time_slider_drag_update=True
    ).add_to(pollution_map)
    print('> Done.')
    return pollution_map

def plot_pollutant(pollutant_ID):
    print('Mapping {} pollution in India in 2013-2015'.format(pollutants[pollutant_ID]['name']))
    pollutant_map_df = load_data(pollutant_ID)
    pollutant_map_df = prepare_data(pollutant_map_df, pollutant_ID)
    features = create_geojson_features(pollutant_map_df)
    return make_map(features), pollutant_map_df

#Change the name below to any polllutant you want i.e pollution_map, df = plot_pollutant('no2')
pollution_map, pollutant_map_df = plot_pollutant('pm25')
pollution_map.save('/kaggle/input/pollution_pm25.html')
pollution_map



In [None]:
pollution_map, pollutant_map_df = plot_pollutant('pm10')
pollution_map.save('/kaggle/input/pollution_pm10.html')
pollution_map


In [None]:
pollution_map, pollutant_map_df = plot_pollutant('so2')
pollution_map.save('/kaggle/input/pollution_so2.html')
pollution_map


In [None]:
pollution_map, pollutant_map_df = plot_pollutant('no2')
pollution_map.save('/kaggle/input/pollution_no2.html')
pollution_map


In [None]:
pollution_map, pollutant_map_df = plot_pollutant('o3')
pollution_map.save('/kaggle/input/pollution_o3.html')
pollution_map
