In [None]:
import folium   as f
import pandas as pd
import numpy as np
import matplotlib.pyplot   as plt
import matplotlib.ticker   as ticker
import chart_studio.plotly as py
import plotly.graph_objs   as go
import seaborn as sns
from   folium.plugins      import HeatMap
from   pygeocoder          import Geocoder
from plotly.offline        import init_notebook_mode, iplot

init_notebook_mode(connected=True)
%matplotlib inline
plt.style.use("fivethirtyeight")

In [None]:
raw_data = pd.read_csv('../../DataSets/sangamiitmadras/saaf_data.csv')

# Exploratory Data Analysis

In [None]:
sns.set(font_scale=0.8)
plt.figure(figsize=(10,8))
ncount = raw_data.device_id.value_counts() 
order = raw_data.device_id.value_counts().index
ax = sns.countplot(x="device_id", data=raw_data, order = order,)
plt.title('Distribution of devices frequencies')
plt.xlabel('Features')
ax2=ax.twinx()

ax2.yaxis.tick_left()
ax.yaxis.tick_right()

ax2.yaxis.set_label_position('left')
ax.yaxis.set_label_position('right')

ax2.set_ylabel('Frequency')

ax.yaxis.set_major_locator(ticker.LinearLocator(11))

#plt.savefig('device_frequency.png')

## Removing the outlier locations from all Stationary devices

In [None]:
def getlatlog_S():

    latlog = {}
    x = ['S1','S3','S4','S5','S6','S7','S8','S9','S10']
    for i in x:
        
        # longitude lies in range between 70-80 remaining are outliers
        t1 = raw_data.loc[raw_data.loc[:,'device_id'] == i , 'longitude'].between(50,100)
        t2 = np.mean(raw_data.loc[raw_data.loc[:,'device_id'] == i , 'longitude'].loc[t1].value_counts().keys())
        raw_data.loc[raw_data.loc[:,'device_id'] == i , 'longitude'] = t2
         
        # latitude lies in range between 10-40 remaining are outrliers
        t1 = raw_data.loc[raw_data.loc[:,'device_id'] == i , 'latitude'].between(10,40)
        t2 = np.mean(raw_data.loc[raw_data.loc[:,'device_id'] == i , 'latitude'].loc[t1].value_counts().keys())
        raw_data.loc[raw_data.loc[:,'device_id'] == i , 'latitude'] = t2
 
        log = raw_data[raw_data.device_id == i].longitude.iloc[0]
    
        lat = raw_data[raw_data.device_id == i].latitude.iloc[0]
        
        # storing in a dictionary
        latlog[i] = (lat,log)
    
    return latlog

latlog = getlatlog_S()
latlog

## Calculating the addresses of these GeoLocations

In [None]:

#########################################
# My query limit is reached to maximum  #
#########################################

#api = 'AIzaSyBSneru8nRT9VXk6M9GBSek6s9YgVHYxm'
#geocoder = Geocoder(api) 
#for i,j in enumerate(df.device_id):
#results = geocoder.reverse_geocode(df.latitude.loc[0], df.longitude.loc[0])
#df['address'] = results.coordinates
#results.city
#results.country
#results.street_address
#results.administrative_area_level_1
#results.formatted_address

In [None]:
s1 = 'Gurugram Sector 48, Gurgaon, Haryana, 122001 Sector 48 Gurugram India'
s3 = 'Gurugram South City 2, Gurgaon, Haryana, 122001 South City 2, Sector 38 Gurugram India'
s4 = '19 Sector 15 I Road, Gurgaon, Haryana, 122001 Sector 15, Part 1 Gurugram India'
s5 ='Gurugram Wapiti Enclave, Gurgaon, Haryana, 122001 Wapiti Enclave, Sector 33 Gurugram India'
s6 = 'Chennai Korattur, Tamil Nadu, 600080 Korattur, Kamaraj Nagar Chennai India'
s7 = 'Menambedu Road, Chennai Mannurpet, Tamil Nadu, 600098 Mannurpet, Korattur Chennai India'
s8 = '5-7/A Spartan Nagar 1st Street, Chennai, Tamil Nadu, 600050 TS Krishna Nagar, Mogappair Chennai India'
s9 = 'Olympic Colony Street, Chennai Padi, Tamil Nadu, 600050 Padi, Olympic Nagar Chennai India'
s10 = 'Chennai Mannurpet, Tamil Nadu, 600098 Mannurpet, Korattur Chennai India'

In [None]:
df_stat = pd.DataFrame({'address':[s1,s3,s4,s5,s6,s7,s8,s9,s10]})

## Plotting the locations of all Gurugram stationary devices

In [None]:
gr_dev = ['S1','S3','S4','S5']
init_map = f.Map(title = 'Gurugram Stationary objects',zoom_start=14,location = latlog['S5'])

for z , (device_id , (lat,log)) in enumerate(latlog.items()):
    if device_id in gr_dev:
        popup = f.Popup(df_stat.address.loc[z], max_width=1000)
        
        #Plotting the Marker for each stations
        f.CircleMarker(
        location = [lat, log],
            radius=10,
            popup=popup,
            fill=True,
            color='Blue',
            fill_color='Yellow',
            fill_opacity=0.4
            ).add_to(init_map) 
init_map

## To check address click on the marker

## Plotting the locations of all Chennai stationary devices

In [None]:
ch_dev = ['S6','S7','S8','S9','S10']

init_map = f.Map(zoom_start=14,width=800,height=400,location = latlog['S6'])
init_map

for  z,( device_id , (lat,log)) in enumerate(latlog.items()):
    if device_id in ch_dev:
        popup = f.Popup( df_stat.address.loc[z], max_width=1000)
        f.CircleMarker(
        location = [lat, log],
            radius=7,
            popup=popup,
            fill=True,
            color='Blue',
            fill_color='Yellow',
            fill_opacity=0.6
            ).add_to(init_map) 
init_map
# To check address click on the marker

## Plotting the temperature curve of gurugram stationary devices

In [None]:
energy_data = go.Scatter(x=raw_data[raw_data.device_id == 'S1'].timestamp , 
                         y=raw_data[raw_data.device_id == 'S1'].temperature)

layout = go.Layout(title='Temperature at {}'.format(df_stat.address.loc[0].split(',')[0]), xaxis = dict(title='Date'), yaxis=dict(title='Temperature'))

fig = go.Figure(data=[energy_data],layout=layout)

iplot(fig)

In [None]:
energy_data = go.Scatter(x=raw_data[raw_data.device_id == 'S3'].timestamp , 
                         y=raw_data[raw_data.device_id == 'S3'].temperature)

layout = go.Layout(title='Temperature at {}'.format(df_stat.address.loc[1].split(',')[0]), xaxis = dict(title='Date'), yaxis=dict(title='Temperature'))

fig = go.Figure(data=[energy_data],layout=layout)

iplot(fig)

In [None]:
energy_data = go.Scatter(x=raw_data[raw_data.device_id == 'S4'].timestamp , 
                         y=raw_data[raw_data.device_id == 'S4'].temperature)

layout = go.Layout(title='Temperature at {}'.format(df_stat.address.loc[2].split(',')[0]), xaxis = dict(title='Date'), yaxis=dict(title='Temperature'))

fig = go.Figure(data=[energy_data],layout=layout)

iplot(fig)


In [None]:
energy_data = go.Scatter(x=raw_data[raw_data.device_id == 'S5'].timestamp , 
                         y=raw_data[raw_data.device_id == 'S5'].temperature)

layout = go.Layout(title='Temperature at {}'.format(df_stat.address.loc[3].split(',')[0]), xaxis = dict(title='Date'), yaxis=dict(title='Temperature'))

fig = go.Figure(data=[energy_data],layout=layout)

iplot(fig)

## Creating a new dataset from extracting information

In [None]:
x = ['S1','S3','S4','S5','S6','S7','S8','S9','S10']
x1 = ['S1','S3','S4','S5']
x2 = ['S6','S7','S8','S9','S10']
for j,i in enumerate(x):
    df_stat.loc[j,'Device_id'] = i
    df_stat.loc[j,'Temperature_Avg'] = raw_data[raw_data.device_id == i].temperature.mean()
    df_stat.loc[j,'Temperature_Min'] = raw_data[raw_data.device_id == i].temperature.min()
    df_stat.loc[j,'Temperature_Max'] = raw_data[raw_data.device_id == i].temperature.max()
df_stat

In [None]:
df_stat.iloc[:4,:].loc[:,['Temperature_Avg','Temperature_Min','Temperature_Max',]]

In [None]:
df_stat.iloc[:4,:].loc[:,['Temperature_Avg','Temperature_Min','Temperature_Max',]].plot()
plt.title('Gurugram Temperature')
plt.xlabel("device_id's")
plt.ylabel('Temperature')
plt.show()

## Plotting the relation between Temperature and humidity of stationary devices

In [None]:
energy_data = go.Scatter(x=raw_data[raw_data.device_id == 'S1'].timestamp , 
                         y=raw_data[raw_data.device_id == 'S1'].humidity,name='Humidity')

energy_data2 = go.Scatter(x=raw_data[raw_data.device_id == 'S1'].timestamp , 
                          y=raw_data[raw_data.device_id == 'S1'].temperature,name='Temperature')

layout = go.Layout(title='Humidity at {}'.format(df_stat.address.loc[0].split(',')[0]), xaxis = dict(title='Date'), yaxis=dict(title='Humidity'))

fig = go.Figure(data=[energy_data,energy_data2],layout=layout)

iplot(fig)

In [None]:
energy_data = go.Scatter(x=raw_data[raw_data.device_id == 'S3'].timestamp , 
                         y=raw_data[raw_data.device_id == 'S3'].humidity,name='Humidity')

energy_data2 = go.Scatter(x=raw_data[raw_data.device_id == 'S3'].timestamp , 
                          y=raw_data[raw_data.device_id == 'S3'].temperature,name='Temperature')

layout = go.Layout(title='Humidity at {}'.format(df_stat.address.loc[1].split(',')[0]), xaxis = dict(title='Date'), yaxis=dict(title='Humidity'))

fig = go.Figure(data=[energy_data,energy_data2],layout=layout)

iplot(fig)

### from 8th May 2019 - 15th May 2019 devices don't send any reading

In [None]:
energy_data = go.Scatter(x=raw_data[raw_data.device_id == 'S4'].timestamp , 
                         y=raw_data[raw_data.device_id == 'S4'].humidity,legendgroup=True,name='Humidity')


energy_data2 = go.Scatter(x=raw_data[raw_data.device_id == 'S4'].timestamp , 
                          y=raw_data[raw_data.device_id == 'S4'].temperature,legendgroup=True,name='Temperature')

layout = go.Layout(title='Humidity at {}'.format(df_stat.address.loc[2].split(',')[0]), xaxis = dict(title='Date'), yaxis=dict(title='Humidity'))

fig = go.Figure(data=[energy_data,energy_data2],layout=layout)

iplot(fig)

In [None]:
energy_data = go.Scatter(x=raw_data[raw_data.device_id == 'S5'].timestamp , 
                         y=raw_data[raw_data.device_id == 'S5'].humidity,name='Humidity')

energy_data2 = go.Scatter(x=raw_data[raw_data.device_id == 'S5'].timestamp , 
                          y=raw_data[raw_data.device_id == 'S5'].temperature,name='Temperature')

layout = go.Layout(title='Humidity at {}'.format(df_stat.address.loc[3].split(',')[0]), xaxis = dict(title='Date'), yaxis=dict(title='Humidity'))

fig = go.Figure(data=[energy_data,energy_data2],layout=layout)

iplot(fig)

## Calculating the location/Position of Mobile devices

In [None]:
def getlatlog_M():
    
    latlog_M = {}

    t1 = raw_data.loc[raw_data.loc[:,'device_id'] == 'M1' , 'longitude'].between(50,100)
    t2 = raw_data.loc[raw_data.loc[:,'device_id'] == 'M1' , 'latitude'].between(25,28.5)
                    # 13838 ,   # 13421 
    latlog_M['M1'] = (raw_data.loc[raw_data.loc[:,'device_id'] == 'M1' , 'latitude'].loc[t2].sample(13421).values,
                  raw_data.loc[raw_data.loc[:,'device_id'] == 'M1' , 'longitude'].loc[t1].values,
                 raw_data.loc[raw_data.loc[:,'device_id'] == 'M1' , 'latitude'].loc[t2].sample(13421).index)


    t1 = raw_data.loc[raw_data.loc[:,'device_id'] == 'M2' , 'longitude'].between(50,100)
    t2 = raw_data.loc[raw_data.loc[:,'device_id'] == 'M2' , 'latitude'].between(25,30) 

                    # 674   601
    latlog_M['M2'] = (raw_data.loc[raw_data.loc[:,'device_id'] == 'M2' , 'latitude'].loc[t2].sample(601).values
                  ,raw_data.loc[raw_data.loc[:,'device_id'] == 'M2' , 'longitude'].loc[t1].values
                  ,raw_data.loc[raw_data.loc[:,'device_id'] == 'M2' , 'longitude'].loc[t2].sample(601).index)

    for i in ['M3','M4','M5','M6']:
        t1 = raw_data.loc[raw_data.loc[:,'device_id'] == i , 'longitude'].between(50,100)
        t2 = raw_data.loc[raw_data.loc[:,'device_id'] == i , 'latitude'].between(10,40) 

        latlog_M[i] = (raw_data.loc[raw_data.loc[:,'device_id'] == i , 'latitude'].loc[t2].values
                  ,raw_data.loc[raw_data.loc[:,'device_id'] == i , 'longitude'].loc[t1].values
                  ,raw_data.loc[raw_data.loc[:,'device_id'] == i , 'longitude'].loc[t2].index)
    
    
    return latlog_M

In [None]:
latlog_M = getlatlog_M()
latlog_M

# Plotting the HeatMap of Humidity and Temperature

In [None]:
def generateBaseMap(default_location=[28.42, 77.085880], default_zoom_start=13):
    base_map = f.Map(location=default_location, control_scale=True, zoom_start=default_zoom_start)
    return base_map

## Humidity at M1

In [None]:
data = list(zip(latlog_M['M1'][0] , latlog_M['M1'][1] , raw_data.iloc[latlog_M['M1'][2]].humidity.values))

base_map = generateBaseMap()
HeatMap(data = data,radius=10, max_zoom=13).add_to(base_map)
base_map

## Temperature at M1

In [None]:
data = list(zip(latlog_M['M1'][0] , latlog_M['M1'][1] , raw_data.iloc[latlog_M['M1'][2]].temperature.values))

base_map = generateBaseMap()
HeatMap(data = data,radius=10, max_zoom=13).add_to(base_map)
base_map

## Humidity at M2

In [None]:
data = list(zip(latlog_M['M2'][0] , latlog_M['M2'][1] , raw_data.iloc[latlog_M['M2'][2]].humidity.values))

base_map = generateBaseMap()
HeatMap(data = data,radius=10, max_zoom=13).add_to(base_map)
base_map

## Temperature at M2 

In [None]:

data = list(zip(latlog_M['M2'][0] , latlog_M['M2'][1] , raw_data.iloc[latlog_M['M2'][2]].temperature.values))

base_map = generateBaseMap()
HeatMap(data = data,radius=10, max_zoom=13).add_to(base_map)
base_map


In [None]:
tem = pd.DataFrame()
for i in gr_dev:  # appending stationary device dataset
    tem = tem.append(raw_data[raw_data.device_id == i])

In [None]:
for i in ['M1','M2','M3','M4']:
    tem = tem.append(raw_data[raw_data.device_id == i])

## PM10 for gurugram

In [None]:
data = list(zip(tem.latitude.tolist() , tem.longitude.tolist() , tem.pm10.tolist()))
base_map = generateBaseMap()
HeatMap(data = data,radius=10, max_zoom=13).add_to(base_map)
base_map


## PM2.5 for gurugram

In [None]:
data = list(zip(latlog_M['M2'][0] , latlog_M['M2'][1] , raw_data.iloc[latlog_M['M2'][2]].temperature.values))

base_map = generateBaseMap()
HeatMap(data = data,radius=10, max_zoom=13).add_to(base_map)
base_map