In [None]:
import pandas as pd 
import numpy as np 
import seaborn as sns
import matplotlib as plt
import statsmodels as stm

In [None]:
df = pd.read_csv('data/df_stat.csv')
loc_meta = pd.read_csv('data/airqo_metadata.csv')

In [None]:
df.head()

In [None]:
df.columns

In [None]:
df.value_counts('location')

In [None]:
df['target'].describe()

In [None]:
df.sort_values(by='location', inplace=True)

In [None]:
loc_meta

In [None]:
loc_meta['popkm2'] = loc_meta['popn']/loc_meta['km2']
loc_meta['rel_hh_cook_charcoal'] = loc_meta['hh_cook_charcoal']/loc_meta['hh']
loc_meta['rel_hh_cook_firewood'] = loc_meta['hh_cook_firewood']/loc_meta['hh']
loc_meta['rel_hh_cook_unknown'] = 1-loc_meta['rel_hh_cook_charcoal']-loc_meta['rel_hh_cook_firewood']
loc_meta

In [None]:
#Plotting style
sns.set(font_scale=1.5)
sns.set_style('ticks')

## Temperature

Temperature differs a lot by location. Location A has very unstable temperature. Location B and C are more stable.

In [None]:
sns.catplot(x="location", y="mean_temp", data=df, kind="violin", height=7, aspect=10/7)\
    .set(xlabel='Location', ylabel='Mean Temperature in °C', title='Mean Temperature by Location').savefig('images/mean_temp.png');

In [None]:
sns.catplot(x="location", y="min_temp", data=df, kind="violin");

In [None]:
sns.catplot(x="location", y="max_temp", data=df, kind="violin");

In [None]:
sns.catplot(x="location", y="std_temp", data=df, kind="violin");

## Precipitation

There was no rain in location B at all. Most rain was in location C.

In [None]:
df.groupby(['location']).mean()['mean_precip']

In [None]:
sns.catplot(x="location", y="mean_precip", data=df, kind="violin", height=7, aspect=10/7)\
    .set(ylim=(0,2.5), xlabel='Location', ylabel='Mean Precipitation in mm', title='Mean Precipitation by Location').savefig('images/mean_precip.png');

In [None]:
sns.catplot(x="location", y="max_precip", data=df, kind='violin').set(ylim=(0,55), xlabel='Location', ylabel='Maximum Precipitation');#

## Humidity

There is a wet climate.

In [None]:
sns.catplot(x="location", y="mean_rel_humidity", data=df, kind="violin");

In [None]:
sns.catplot(x="location", y="min_rel_humidity", data=df, kind="violin");

## Wind Direction

Wind comes mostly from one direction.

In [None]:
sns.scatterplot(x="location", y="aspect", data=loc_meta);

In [None]:
sns.catplot(x="location", y="mean_wind_dir", data=df, kind="violin");#

In [None]:
sns.catplot(x="location", y="std_wind_dir", data=df, kind="violin");

## Wind Speed

Location B and E are a bit more windy. Rare storms happen in location C. Mostly just (very) light wind.

In [None]:
sns.catplot(x="location", y="mean_wind_spd", data=df, kind="violin");

In [None]:
sns.catplot(x="location", y="max_wind_spd", data=df, kind="violin");

In [None]:
sns.catplot(x="location", y="std_wind_spd", data=df, kind="violin");

## Atmospheric Pressure

Location D is located far lower than the other locations. Therefore here the atmospheric pressure is higher.

Might depend on temperature.

In [None]:
sns.scatterplot(x="location", y="loc_altitude", data=loc_meta);

In [None]:
sns.catplot(x="location", y="mean_atmos_press", data=df, kind="violin", height=7, aspect=10/7)\
    .set(xlabel='Location', ylabel='Mean Atmospheric Pressure in kPa', title='Mean Atmospheric Pressure by Location').savefig('images/mean_atmos.png');

In [None]:
sns.catplot(x="location", y="min_atmos_press", data=df, kind="violin");

In [None]:
sns.catplot(x="location", y="max_atmos_press", data=df, kind="violin");

In [None]:
sns.catplot(x="location", y="std_atmos_press", data=df, kind="violin");

## Target

In [None]:
sns.catplot(x="location", y="target", data=df, kind="violin", height=7, aspect=10/7)\
    .set(ylim=(0,200), xlabel='Location', ylabel='Mean PM2.5 in µg/m³', title='Mean Air Pollution by Location').savefig('images/mean_target.png');

In [None]:
sns.lmplot(x="mean_temp", y="target", col='location', scatter_kws={"s": 10}, line_kws={'color': 'red'}, data=df);

In [None]:
sns.lmplot(x="mean_precip", y="target", col='location', scatter_kws={"s": 10}, line_kws={'color': 'red'}, data=df);

In [None]:
sns.lmplot(x="mean_rel_humidity", y="target", col='location', scatter_kws={"s": 10}, line_kws={'color': 'red'}, data=df);

In [None]:
sns.lmplot(x="mean_wind_spd", y="target", col='location', scatter_kws={"s": 10}, line_kws={'color': 'red'}, data=df);

In [None]:
sns.lmplot(x="mean_wind_dir", y="target", col='location', scatter_kws={"s": 10}, line_kws={'color': 'red'}, data=df);

In [None]:
sns.lmplot(x="mean_atmos_press", y="target", col='location', scatter_kws={"s": 10}, line_kws={'color': 'red'}, data=df);

In [None]:
roads = loc_meta.melt(id_vars='location',value_vars=['dist_trunk','dist_primary','dist_secondary','dist_tertiary','dist_unclassified','dist_residential'])
sns.barplot(x="location", y="value", hue='variable', data=roads).set(ylim=(0,1000),ylabel='Distance in m');

In [None]:
#fire = loc_meta.melt(id_vars='location',value_vars=['rel_hh_cook_charcoal', 'rel_hh_cook_firewood', 'rel_hh_cook_unknown'])
#sns.barplot(x="location", y="value", hue='variable', data=fire).set(ylabel='Number of households');
sns.barplot(x="location", y="rel_hh_cook_charcoal", color='b', data=loc_meta)
sns.barplot(x="location", y="rel_hh_cook_firewood", color='r', data=loc_meta)
sns.barplot(x="location", y="rel_hh_cook_unknown", color='g', data=loc_meta);

## Scatterplots

In [None]:
sns.lmplot(x="mean_temp", y="mean_precip", col='location', scatter_kws={"s": 10}, line_kws={'color': 'red'}, data=df);

In [None]:
sns.lmplot(x="mean_temp", y="mean_atmos_press", col='location', scatter_kws={"s": 10}, line_kws={'color': 'red'}, data=df);

In [None]:
sns.lmplot(x="mean_temp", y="mean_wind_spd", col='location', robust=True, scatter_kws={"s": 10}, line_kws={'color': 'red'}, data=df);

In [None]:
sns.lmplot(x="mean_temp", y="mean_rel_humidity", col='location', scatter_kws={"s": 10}, line_kws={'color': 'red'}, data=df);

In [None]:
sns.lmplot(x="mean_rel_humidity", y="mean_atmos_press", col='location', scatter_kws={"s": 10}, line_kws={'color': 'red'}, data=df);

In [None]:
sns.lmplot(x="mean_wind_dir", y="mean_wind_spd", col='location', scatter_kws={"s": 10}, line_kws={'color': 'red'}, data=df);