## **API Weather Readings**

In [1]:
# Main libraries

import requests 
import os 
import pandas as pd  
import numpy as np
from datetime import datetime
from meteostat import Point, Daily, Hourly
import sqlalchemy
from sqlalchemy import create_engine

In [2]:
pd.set_option('display.max_rows', 15)

**Method**: Meteostat Python Library 

In [5]:
# Data cleaning for a given variable in a dataframe

def cleansing(dframe, variable):
    # z-score method for detecting outliers
    z_scores = (dframe[variable]-dframe[variable].mean())/dframe[variable].std()

    # mean value excluding outliers
    dates_mean = dframe[variable][abs(z_scores)<=3].mean()

    #replacement of NAN values for the mean value
    dframe.loc[:, variable] = dframe[variable].fillna(dates_mean)

    #replacement of outliers for the mean value
    mean_value = dframe[variable].mean()
    dframe.loc[np.abs(z_scores) > 3, variable] = mean_value

    dframe = dframe[[variable]]
    return dframe


In [None]:
#Location = Point(20.61, -100.43)
# Function that filters average measures on the same day by year
# variables tavg	tmin	tmax	prcp	snow	wdir	wspd	wpgt	pres	tsun
def hist_by_same_day(latitude, longitude, variable, start=  datetime(1994, 1, 1), end= datetime(2024, 10, 28)): 
    Location =Point(latitude, longitude)
    data = Daily(Location, start, end)
    data = data.fetch()
    years = data.index.strftime('%m-%d')=='01-10'
    set_of_dates = data[years].copy()
    return cleansing(set_of_dates, variable)
  

In [17]:
hist_by_same_day(20.61,-100.43,'tavg')

Unnamed: 0_level_0,tavg
time,Unnamed: 1_level_1
1994-01-10,13.251502
1995-01-10,19.200000
1996-01-10,15.100000
1997-01-10,12.300000
1998-01-10,17.600000
...,...
2020-01-10,14.100000
2021-01-10,13.600000
2022-01-10,14.600000
2023-01-10,14.100000


In [10]:

#  Function that filters hourly measures on the same day by year
Location = Point(20.61, -100.43)
def hist_by_same_day_and_hour(latitude, longitude, the_hour, variable, start= datetime(1994, 1, 1,0,0), end= datetime(2024, 1, 1,0,0)): 
    Location = Point(latitude, longitude)
    start =start.replace(hour=the_hour)
    end = end.replace(hour=the_hour)
    years = [start.year + n  for n in range(end.year-start.year+1)]
    df = pd.DataFrame(columns=['temp',	'dwpt',	'rhum',	'prcp',	'snow',	'wdir',	'wspd',	'wpgt',	'pres',	'tsun',	'coco'])
    for y in years: 
        start = start.replace(year=y)
        end = end.replace(year =y)
        data = Hourly(Location, start, end)
        data = data.fetch()
        df=pd.concat([df,data])
    return cleansing(df, variable)
   

In [None]:
# Example: variable 'tem'
MX_latitude = 19.4333   
MX_longitude = -99.1333
hist_by_same_day_and_hour(MX_latitude, MX_longitude, 18, 'temp')

Unnamed: 0,temp
1995-01-01 18:00:00,16.1
1997-01-01 18:00:00,13.8
1998-01-01 18:00:00,11.5
1999-01-01 18:00:00,18.0
2008-01-01 18:00:00,16.8
...,...
2020-01-01 18:00:00,22.0
2021-01-01 18:00:00,17.0
2022-01-01 18:00:00,21.0
2023-01-01 18:00:00,19.0
