# 3. Graficar las correlaciones

Los archivos de salida con las correlaciones de 0 a 72 horas se grafican aquí.

In [1]:
# Se importan las bibliotecas necesarias
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

import plotly.plotly as py
import plotly.graph_objs as go

#nwH7euTNo27AATjelWEi

Cada línea de una gráfica será un contaminante con distinto color, por ello se construye un diccionario de colores para que sea consistente dentro de lo siguiente que se vaya a graficar.

In [2]:
# Diccionario de colores
dic_colores = {'UVA': '#543005'
              ,'UVB': '#8c510a'
              ,'PA': '#bf812d'
              ,'CO': '#dfc27d'
              ,'NO': '#a50026'
              ,'NO2': '#d73027'
              ,'NOX': '#f46d43'
              ,'O3': '#fdae61'
              ,'PM2.5': '#fee090'
              ,'PMCO': '#d6604d'
              ,'SO2': '#810f7c'
              ,'RH': '#abd9e9'
              ,'TMP': '#74add1'
              ,'WSP': '#4575b4'
              ,'WDR': '#313695'
              ,'PM10': '#081d58'}

Se crea una función llamada *crear_trazo* que va a definir los parámetros en `plotly` para crear cada línea. El objeto en `plotly` se llama `Scatter`. Esta línea en el eje x serán las horas, que irán de 0 a 72. El eje y será la correlación que presenta cada contaminante ya sea con PM10, PM2.5 y O3. A esta función se le pasan también parámetros como el dataframe que se usa, el ancho de la línea y el color (obtenido del diccionario de colores).

In [3]:
# Función que crea cada trazo, es decir una línea por contaminante
def crear_trazo(df, y, color, x='horas', width=2):
    trazo = go.Scatter(x=df[x],
                      y=df[y],
                      name=y,
                      line=dict(color=color, width=width))
    return trazo

## 3.1 Correlación entre PM10 y demás contaminantes

In [4]:
df_pm10 = pd.read_csv("/home/pradel/Desktop/Forecast_CDMX_pollution/correlacion/sub_dataset/corr_pm10_72.csv")

In [5]:
df_pm10.head()

Unnamed: 0,horas,UVA,UVB,PA,CO,NO,NO2,NOX,O3,PM2.5,PMCO,SO2,RH,TMP,WSP,WDR,PM10
0,0,0.123515,0.08758,0.076545,0.594815,0.423973,0.615966,0.543273,0.163469,0.77954,0.890052,0.328258,-0.400048,0.082995,-0.05705,-0.224941,1.0
1,1,0.080591,0.046393,0.069959,0.571624,0.463028,0.545379,0.551669,0.139422,0.71678,0.807676,0.313125,-0.36605,0.032821,-0.126081,-0.19603,0.910007
2,2,0.059409,0.033295,0.064172,0.491751,0.429516,0.462595,0.497568,0.129903,0.653893,0.69221,0.296926,-0.336801,-0.007755,-0.190461,-0.168067,0.794766
3,3,0.054252,0.04116,0.062331,0.389902,0.351801,0.391407,0.411622,0.123662,0.593826,0.579216,0.283703,-0.309816,-0.041595,-0.237207,-0.147523,0.679227
4,4,0.055614,0.056559,0.064473,0.297361,0.263249,0.349946,0.326656,0.110384,0.544312,0.485117,0.278224,-0.285917,-0.06964,-0.272836,-0.130343,0.581331


In [6]:
# Ciclo for para cada trazo
data_pm10=[crear_trazo(df_pm10, col, dic_colores[col]) for col in df_pm10.columns[1:]]

In [7]:
# Edit the layout
layout = dict(title = 'Correlación de PM10 con otros contaminantes',
              xaxis = dict(title = 'Horas'),
              yaxis = dict(title = 'Correlación'),
              )

fig = dict(data=data_pm10, layout=layout)
py.iplot(fig, filename='styled-line')

## 3.2 Correlación entre O3 y demás contaminantes

In [8]:
df_o3 = pd.read_csv("/home/pradel/Desktop/Forecast_CDMX_pollution/correlacion/sub_dataset/corr_o3_72.csv")

In [9]:
df_o3.head()

Unnamed: 0,horas,UVA,UVB,PA,CO,NO,NO2,NOX,O3,PM2.5,PMCO,SO2,RH,TMP,WSP,WDR,PM10
0,0,0.637874,0.630691,-0.113039,-0.307982,-0.434319,-0.248527,-0.431471,1.0,0.236114,0.157634,-0.100709,-0.713099,0.82826,0.436214,-0.240336,0.163469
1,1,0.765184,0.755169,-0.080621,-0.186266,-0.324123,-0.147059,-0.309432,0.940699,0.339016,0.186329,-0.055048,-0.671763,0.773348,0.310367,-0.302205,0.216989
2,2,0.802512,0.785601,-0.047059,-0.026806,-0.160733,-0.025265,-0.137863,0.797727,0.416824,0.230577,-0.005326,-0.579915,0.655609,0.179123,-0.341926,0.282626
3,3,0.751115,0.72368,-0.017406,0.147656,0.037982,0.092991,0.061027,0.600658,0.463492,0.275256,0.043574,-0.44952,0.490179,0.043942,-0.354822,0.344966
4,4,0.626953,0.588015,0.004878,0.304681,0.238007,0.18746,0.253189,0.380821,0.47535,0.304631,0.084591,-0.296365,0.296833,-0.088036,-0.341385,0.387384


In [10]:
# Ciclo for para cada trazo
data_o3=[crear_trazo(df_o3, col, dic_colores[col]) for col in df_pm10.columns[1:]]

In [11]:
# Edit the layout
layout = dict(title = 'Correlación de O3 con otros contaminantes',
              xaxis = dict(title = 'Horas'),
              yaxis = dict(title = 'Correlación'))

fig = dict(data=data_o3, layout=layout)
py.iplot(fig, filename='styled-line')

## 3.3 Correlación entre PM2.5 y demás contaminantes

In [12]:
df_pm25 = pd.read_csv("/home/pradel/Desktop/Forecast_CDMX_pollution/correlacion/sub_dataset/corr_pm25_72.csv")

In [13]:
df_pm25.head()

Unnamed: 0,horas,UVA,UVB,PA,CO,NO,NO2,NOX,O3,PM2.5,PMCO,SO2,RH,TMP,WSP,WDR,PM10
0,0,0.275307,0.249868,0.102274,0.528108,0.291985,0.587487,0.427495,0.236114,1.0,0.515379,0.353769,-0.196711,0.068236,-0.297184,-0.254297,0.77954
1,1,0.197185,0.166877,0.109746,0.559618,0.369722,0.57263,0.485358,0.126102,0.916603,0.486001,0.358245,-0.118853,-0.033155,-0.363638,-0.236122,0.741768
2,2,0.104426,0.076262,0.112819,0.563101,0.422031,0.535421,0.515336,0.023779,0.833065,0.447759,0.353504,-0.044447,-0.127693,-0.408396,-0.203778,0.690851
3,3,0.011677,-0.009025,0.112403,0.527447,0.426606,0.49041,0.504264,-0.057861,0.742662,0.396256,0.340543,0.017847,-0.205362,-0.432288,-0.164802,0.621345
4,4,-0.0701,-0.082135,0.109151,0.45856,0.382344,0.447973,0.454684,-0.11615,0.654258,0.336386,0.325674,0.062999,-0.259689,-0.441237,-0.124779,0.542837


In [14]:
# Ciclo for para cada trazo
data_pm25=[crear_trazo(df_pm25, col, dic_colores[col]) for col in df_pm10.columns[1:]]

In [15]:
# Graficar correlaciones entre PM2.5 y demás contaminantes
# Edit the layout
layout = dict(title = 'Correlación de PM2.5 con otros contaminantes',
              xaxis = dict(title = 'Horas'),
              yaxis = dict(title = 'Correlación'))

fig = dict(data=data_pm25, layout=layout)
py.iplot(fig, filename='styled-line')