# **ETL**

In [2]:
# Main libraries

import requests 
import os 
import pandas as pd  
import numpy as np
from datetime import datetime
from meteostat import Point, Daily, Hourly
import sqlalchemy
from sqlalchemy import create_engine

In [210]:
pd.set_option('display.max_rows', 5)

In [211]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)
    


## **Weather Readings: Querétaro**

**Method**: Meteostat Python Library 

In [226]:
Qro_latitude = 20.5875   
Qro_longitude = -100.393  
Qro = Point(Qro_latitude, Qro_longitude)

**Daily data**

In [227]:
# Set time period
start = datetime(1994, 1, 1)
end = datetime(2024, 11, 3)

# Get daily data
Qro_daily = Daily(Qro, start, end)
Qro_daily = Qro_daily.fetch()
Qro_daily

Unnamed: 0_level_0,tavg,tmin,tmax,prcp,snow,wdir,wspd,wpgt,pres,tsun
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1994-01-06,-7.7,,,,,,,,,
1994-01-07,-8.3,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...
2024-11-02,17.8,12.0,26.6,0.0,,99.0,11.2,,1023.9,
2024-11-03,18.3,10.0,26.0,0.0,,262.0,12.7,,1022.9,


**Hourly data**

In [228]:
# Data exploration
Qro_hourly = Hourly(Qro, start, end)
Qro_hourly = Qro_hourly.fetch()
Qro_hourly

Unnamed: 0_level_0,temp,dwpt,rhum,prcp,snow,wdir,wspd,wpgt,pres,tsun,coco
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1994-01-04 21:00:00,-5.6,-16.2,43.0,,,60.0,36.7,,1000.1,,
1994-01-04 22:00:00,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
2024-11-02 23:00:00,23.0,10.1,44.0,0.0,,70.0,11.0,,1022.0,,2.0
2024-11-03 00:00:00,21.0,9.9,49.0,0.0,,60.0,13.0,,1023.0,,3.0


In [229]:
# Adding columns according to the ER diagram 
Qro_hourly['date']=Qro_hourly.index.date 
Qro_hourly['time']=Qro_hourly.index.time 
Qro_hourly['city']= 'Querétaro'
Qro_hourly

Unnamed: 0_level_0,temp,dwpt,rhum,prcp,snow,wdir,wspd,wpgt,pres,tsun,coco,date,time,city
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1994-01-04 21:00:00,-5.6,-16.2,43.0,,,60.0,36.7,,1000.1,,,1994-01-04,21:00:00,Querétaro
1994-01-04 22:00:00,,,,,,,,,,,,1994-01-04,22:00:00,Querétaro
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-11-02 23:00:00,23.0,10.1,44.0,0.0,,70.0,11.0,,1022.0,,2.0,2024-11-02,23:00:00,Querétaro
2024-11-03 00:00:00,21.0,9.9,49.0,0.0,,60.0,13.0,,1023.0,,3.0,2024-11-03,00:00:00,Querétaro


In [230]:
# Renaming columns according to the ER diagram 
Qro_hourly = Qro_hourly.rename(columns={'temp':'temperature','coco':'code', 'dwpt':'dew_point', 'pres':'pressure', 'wspd':'wind_speed','rhum':'relative_humidity'})
Qro_hourly

Unnamed: 0_level_0,temperature,dew_point,relative_humidity,prcp,snow,wdir,wind_speed,wpgt,pressure,tsun,code,date,time,city
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1994-01-04 21:00:00,-5.6,-16.2,43.0,,,60.0,36.7,,1000.1,,,1994-01-04,21:00:00,Querétaro
1994-01-04 22:00:00,,,,,,,,,,,,1994-01-04,22:00:00,Querétaro
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-11-02 23:00:00,23.0,10.1,44.0,0.0,,70.0,11.0,,1022.0,,2.0,2024-11-02,23:00:00,Querétaro
2024-11-03 00:00:00,21.0,9.9,49.0,0.0,,60.0,13.0,,1023.0,,3.0,2024-11-03,00:00:00,Querétaro


In [234]:
# Filtering needed columns
Qro_weather_readings=Qro_hourly[['date','time','temperature','dew_point','pressure','wind_speed', 'relative_humidity']]
Qro_weather_readings

Unnamed: 0_level_0,date,time,temperature,dew_point,pressure,wind_speed,relative_humidity
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1994-01-04 21:00:00,1994-01-04,21:00:00,-5.6,-16.2,1000.1,36.7,43.0
1994-01-04 22:00:00,1994-01-04,22:00:00,,,,,
...,...,...,...,...,...,...,...
2024-11-02 23:00:00,2024-11-02,23:00:00,23.0,10.1,1022.0,11.0,44.0
2024-11-03 00:00:00,2024-11-03,00:00:00,21.0,9.9,1023.0,13.0,49.0


In [None]:
# Database Connection: We use sql server with a trusted connection and weather as database name
engine = create_engine('mssql+pyodbc://SERVER_NAME/weather?trusted_connection=yes&driver=ODBC+Driver+17+for+SQL+Server')


In [236]:
# Database load
Qro_weather_readings.to_sql('weather_readings', engine, if_exists='append', index=False)

247

In [238]:
# Filtering needed columns
Qro_city_codes=Qro_hourly[['date','time','code', 'city','temperature','dew_point','pressure','wind_speed', 'relative_humidity']]
Qro_city_codes

Unnamed: 0_level_0,date,time,code,city,temperature,dew_point,pressure,wind_speed,relative_humidity
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1994-01-04 21:00:00,1994-01-04,21:00:00,,Querétaro,-5.6,-16.2,1000.1,36.7,43.0
1994-01-04 22:00:00,1994-01-04,22:00:00,,Querétaro,,,,,
...,...,...,...,...,...,...,...,...,...
2024-11-02 23:00:00,2024-11-02,23:00:00,2.0,Querétaro,23.0,10.1,1022.0,11.0,44.0
2024-11-03 00:00:00,2024-11-03,00:00:00,3.0,Querétaro,21.0,9.9,1023.0,13.0,49.0


In [240]:
# Database load
Qro_city_codes.to_sql('w_readings_city_codes', engine, if_exists='append', index=False)


197