In [18]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [19]:
año_a_cargar = 2024 #Rango desde 2019 hasta 2024
mes_a_cargar = 5 #1-enero, 2-febrero, 3-marzo, ...

In [20]:
def carga_data_air_quality(año_a_cargar, mes_a_cargar):
    '''
        Función que carga datos desde la pagina oficial de nueva york y retorna un dataframe con los datos de calidad de aire
        Recibe año y mes a cargar
        Retorna dataframe con datos cargados
    '''
    try:
        if not (2019 <= año_a_cargar <= 2024) or not (1 <= mes_a_cargar <= 12):
            raise ValueError("El año o mes no está dentro del rango permitido")
    except ValueError as e:
            print(e)
    else:
        #print("El año y mes están dentro del rango permitido")
        url = f"https://azdohv2staticweb.blob.core.windows.net/$web/hist/csv/{año_a_cargar}/{mes_a_cargar}/hourlyMonitoring.csv"
        df = pd.read_csv(url)
    return df

In [21]:
def carga_data_location():
    '''
        Función que carga datos desde la pagina oficial de nueva york y retorna un dataframe con los datos de ubicacion
        Retorna dataframe
    '''
    try:
        if not (2019 <= año_a_cargar <= 2024) or not (1 <= mes_a_cargar <= 12):
            raise ValueError("El año o mes no está dentro del rango permitido")
    except ValueError as e:
            print(e)
    else:
        #print("El año y mes están dentro del rango permitido")
        url = f"https://azdohv2staticweb.blob.core.windows.net/$web/hist/csv/location.csv"
        df = pd.read_csv(url)
    return df

In [22]:
df_location = carga_data_location()

In [23]:
df_location

Unnamed: 0,SiteID,Latitude,Longitude,SiteName,Address
0,36061NY08454,40.71651,-73.997004,Manhattan Bridge,Canal St SS1E of Elizabeth St
1,36005NY11790,40.819092,-73.885659,Hunts Point,Southeast Corner of Seneca Ave and Longfellow Ave
2,36081NY08198,40.707113,-73.8734,Glendale,Cooper Rapid Rehousing Center - 78-16 Cooper Ave
3,36081NY09285,40.737107,-73.821556,Queens College,Queens College Reference Site
4,36061NY09734,40.75069,-73.98783,Broadway/35th St,Broadway ES1N of 35th St
5,36061NY08653,40.722288,-73.974651,FDR,Southbound FDR access Road between E 10th St a...
6,36005NY11534,40.806486,-73.922487,Mott Haven,North side of E 135th St between Brown Pl and ...
7,36005NY12387,40.845167,-73.906143,Cross Bronx Expy,Cross Bronx Expressway Service road NS1E of Mo...
8,36081NY07615,40.690155,-73.80908,Van Wyck,Northbound Van Wyck Expy access road between 1...
9,36047NY07974,40.702798,-73.960824,BQE,West side of Williamsburg St W between Bedford...


El Dataset location contiene datos de las ubicaciones de Nueva York, estas ubicaciones son utilizadas para  definir en donde se encuentran los sensores de calidad del aire

In [24]:
df = carga_data_air_quality(año_a_cargar, mes_a_cargar)


In [25]:
df

Unnamed: 0,ID,SiteID,ObservationTimeUTC,Value
0,549205,36061NY08552,2024-05-01 00:00:00.000,11.07
1,549206,36005NY12387,2024-05-01 00:00:00.000,9.40
2,549207,36061NY08454,2024-05-01 00:00:00.000,14.51
3,549208,36081NY08198,2024-05-01 00:00:00.000,9.06
4,549209,36081NY07615,2024-05-01 00:00:00.000,8.93
...,...,...,...,...
5967,561822,36061NY08653,2024-05-31 23:00:00.000,9.29
5968,561823,36081NY07615,2024-05-31 23:00:00.000,7.10
5969,561824,36061NY12380,2024-05-31 23:00:00.000,9.47
5970,561825,36085NY04805,2024-05-31 23:00:00.000,6.74


---
1a Tabla a cargar: Location
---
---


In [26]:
df_location

Unnamed: 0,SiteID,Latitude,Longitude,SiteName,Address
0,36061NY08454,40.71651,-73.997004,Manhattan Bridge,Canal St SS1E of Elizabeth St
1,36005NY11790,40.819092,-73.885659,Hunts Point,Southeast Corner of Seneca Ave and Longfellow Ave
2,36081NY08198,40.707113,-73.8734,Glendale,Cooper Rapid Rehousing Center - 78-16 Cooper Ave
3,36081NY09285,40.737107,-73.821556,Queens College,Queens College Reference Site
4,36061NY09734,40.75069,-73.98783,Broadway/35th St,Broadway ES1N of 35th St
5,36061NY08653,40.722288,-73.974651,FDR,Southbound FDR access Road between E 10th St a...
6,36005NY11534,40.806486,-73.922487,Mott Haven,North side of E 135th St between Brown Pl and ...
7,36005NY12387,40.845167,-73.906143,Cross Bronx Expy,Cross Bronx Expressway Service road NS1E of Mo...
8,36081NY07615,40.690155,-73.80908,Van Wyck,Northbound Van Wyck Expy access road between 1...
9,36047NY07974,40.702798,-73.960824,BQE,West side of Williamsburg St W between Bedford...


---
2da Tabla a cargar: Air Quality mes de Mayo
---
---

In [27]:
df_air_quality_mayo = df

In [28]:
df_air_quality_mayo

Unnamed: 0,ID,SiteID,ObservationTimeUTC,Value
0,549205,36061NY08552,2024-05-01 00:00:00.000,11.07
1,549206,36005NY12387,2024-05-01 00:00:00.000,9.40
2,549207,36061NY08454,2024-05-01 00:00:00.000,14.51
3,549208,36081NY08198,2024-05-01 00:00:00.000,9.06
4,549209,36081NY07615,2024-05-01 00:00:00.000,8.93
...,...,...,...,...
5967,561822,36061NY08653,2024-05-31 23:00:00.000,9.29
5968,561823,36081NY07615,2024-05-31 23:00:00.000,7.10
5969,561824,36061NY12380,2024-05-31 23:00:00.000,9.47
5970,561825,36085NY04805,2024-05-31 23:00:00.000,6.74


---
3a Tabla a cargar: Air Quality mes de Junio
---
---

In [29]:
df_air_quality_junio = carga_data_air_quality(año_a_cargar, 6)

In [30]:
df_air_quality_junio

Unnamed: 0,ID,SiteID,ObservationTimeUTC,Value
0,561837,36061NY08552,2024-06-01 00:00:00.000,11.79
1,561838,36061NY08454,2024-06-01 00:00:00.000,11.01
2,561839,36081NY08198,2024-06-01 00:00:00.000,6.09
3,561840,36005NY11534,2024-06-01 00:00:00.000,6.92
4,561841,36061NY08653,2024-06-01 00:00:00.000,10.99
...,...,...,...,...
6915,575908,36061NY08653,2024-06-30 23:00:00.000,9.15
6916,575909,36081NY07615,2024-06-30 23:00:00.000,5.72
6917,575920,36005NY12387,2024-06-30 23:00:00.000,30.58
6918,575921,36061NY09734,2024-06-30 23:00:00.000,10.44


---
Para este archivo es conveniente cargar 3 tablas importantes: 
1. ´df_location´, 
2. ´df_air_quality_junio´, 
3. ´df_air_quality_mayo´

Se guarda la data limpia en un archivo csv dentrod e la carpeta Clean Data en la carpeta Datasets

In [32]:
df_location.to_csv('../Datasets/Clean Data/locations.csv', index=False)
df_air_quality_junio.to_csv('../Datasets/Clean Data/air_quality_junio.csv', index=False)
df_air_quality_mayo.to_csv('../Datasets/Clean Data/air_quality_mayo.csv', index=False)