In [4]:
from pathlib import Path
import json
import os
from datosgobmx import client
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

In [33]:
#Tests the data from aqip compared to cdmx
def t_test(df_aqip, df_mx):
    """Calculates t test and p value for two air quality dataframes.

    Args:
        df_aqip (dataframe): dataframe with concentrations for mexican cities from air quality index project.
        df_mx (dataframe): dafaframe with concentrations from stations for mexican cities.

    Returns:
        [tuple]: tuple with t and p values for the compared dataframes.
    """

    t, p = stats.ttest_ind(df_mx, df_aqip, equal_var=False)
    
    return (t, p)

def pollutant(p):
    """Function that returns a str with a pollutant.

    Args:
        p (int): values from 0 to 5 for list place.

    Returns:
        str: pollutant.
    """
    #Parametros de contaminantes
    param = ['CO','NO2', 'O3','PM10','SO2']
    return (param[p])

def aqip_mx(city):

    """Merges dataframes from csv with air quality daily median concentrations for
        air quality index project and mexican cities stations
    """

    dir_pcs = '../data/processed/'
    dir_pcs_aqip = '../data/processed/aqip/'

    city_dict = {'gdl':'Guadalajara', 'cmdx':'Mexico City'}

    if not os.path.isdir(dir_pcs+'aqip_'+city): 
        os.mkdir(dir_pcs+city) 

    dir_pcs_cat = dir_pcs+'aqip_'+city #Directory to save concatenation

    mx = pd.read_csv(dir_pcs+city+'/'+'median_res_2017-2020.csv')
    aqip = pd.read_csv(dir_pcs_aqip +'MX_2015_2020.csv', index_col=[0])

    aqip = aqip.loc[city_dict[city]]

    compare = pd.merge(aqip, mx, how='inner', left_on=['Specie','Date'], right_on=['PARAM','FECHA'])

    compare = compare.drop(columns=['count','min','max','median','variance','PARAM','FECHA']).rename(columns={'Specie':'Contaminante',
                                                                                                   'Date':'Fecha',
                                                                                                   'c_median':'aqip_median',
                                                                                                   '0':'mx_median'})
    compare.to_csv(dir_pcs_cat +city+'_AQIP.csv')

    
def data_valid(city):
    """Function that compares if the air quality data from mexican monitoring stations and
        the air quality index project are statistically different.

    Args:
        city (str): city code for the city to by analyzed.
    """

    dir_pcs = '../data/processed/'
    dir_pcs_cat = dir_pcs+'aqip_'+city #Directory to save concatenation

    valid_check = pd.read_csv(dir_pcs_cat +city+'_AQIP.csv')
    
    df_aqip = valid_check[['Contaminante','aqip_median']]
    df_mx = valid_check[['Contaminante','mx_median']]
    
    df_aqip['Contaminante']
    
    for i in range(6):
        
        #print (df_aqip[df_aqip['Contaminante']==c].drop(columns=['Contaminante']))
        
        t,p = t_test(df_aqip[df_aqip['Contaminante']==pollutant(i)].drop(columns=['Contaminante']),
                    df_mx[df_mx['Contaminante']==pollutant(i)].drop(columns=['Contaminante']))

        print ('For: '+pollutant(i)+' t value is: '+str(t)+' and p value is: '+str(p))

def airquality_average(city):
    """Function that creates separate csv for the first four months of the yearly data available.

    Args:
        data_csv (str): string containing the directory and name of the csv file

    Returns:
        csv: individual csv for each pollutant with the average data by week of the first four months of the yearly data available.
    """
    dir_pcs_mx = '../data/processed/'+city+'/'
    #data_csv = pd.read_csv(dir_pcs_mx+city+'_2017-2019.csv').set_index(['FECHA','PARAM'])
    data_csv = dir_pcs_mx+city+'_2017-2019.csv'
    
    if city == 'cdmx':
        data_csv = '../data/processed/'+city+'/'+city+'_2017-2020_filtered.csv'
    
    month = [1,2,3,4]
    
    data_bydate = pd.read_csv(data_csv).set_index(['FECHA','PARAM']).groupby(level=('FECHA','PARAM')).mean().reset_index()
    
    data_bydate['FECHA'] = pd.to_datetime(data_bydate['FECHA'])
    
    for m in month:
        if m == 1: 
            filter_month=data_bydate[data_bydate['FECHA'].dt.month==m]

        else:

            month_tmp = data_bydate[data_bydate['FECHA'].dt.month==m]


            filter_month = filter_month.append(month_tmp)
    
    for i in range(5):
        
        data_bydateParam = filter_month[filter_month['PARAM']==pollutant(i)].set_index('FECHA')
        
        data_bydateParam = data_bydateParam.rolling(7, min_periods=1).mean()
        
        if pollutant(i)!= 'PM10':
            data_bydateParam = data_bydateParam*1000
        
        data_bydateParam.to_csv(data_csv[:-4]+'_'+pollutant(i)+'.csv')
        
    #return (data_bydateParam)

In [34]:
airquality_average('gdl')

In [31]:
df = airquality_average('gdl')

In [32]:
df = df*10000
df

Unnamed: 0_level_0,ATM,AGU,LDO,MIR,CEN,OBL,PIN,TLA,SFE,VAL
FECHA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2017-01-01,20.416667,20.916667,16.708333,,18.208333,25.291667,,31.291667,24.583333,15.666667
2017-01-02,25.104167,20.458333,28.083333,,18.354167,29.604167,,35.041667,28.500000,15.250000
2017-01-03,27.013889,21.083333,29.513889,,18.513889,32.236111,,37.013889,31.888889,15.333333
2017-01-04,27.531250,21.843750,27.427083,,18.697917,31.666667,,39.510417,34.322917,16.364583
2017-01-05,28.333333,23.550000,27.891667,,18.841667,33.850000,,40.191667,35.450000,15.700000
...,...,...,...,...,...,...,...,...,...,...
2019-04-26,20.108333,23.857143,13.898810,19.857143,30.038690,20.732143,52.607143,41.809524,39.767857,15.303571
2019-04-27,19.560714,24.363095,13.602743,20.363095,34.252976,19.922619,53.529449,41.029762,39.619963,15.791667
2019-04-28,20.310714,25.172619,11.965580,21.172619,34.806548,18.464286,55.159357,40.797619,41.115201,15.607143
2019-04-29,20.215476,24.958333,11.507246,20.958333,35.122024,18.357143,58.532895,41.488095,41.775916,15.565476
