# **ETL**

In [1]:
# Main libraries

import requests 
import os 
import pandas as pd  
import numpy as np
from datetime import datetime
from meteostat import Point, Daily, Hourly
import sqlalchemy
from sqlalchemy import create_engine

In [2]:
pd.set_option('display.max_rows', 5)

In [22]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)
    


## **Weather Readings: Mexico city**

**Method**: Meteostat Python Library 

In [7]:

MxC_latitude = 19.4333   
MxC_longitude = -99.1333

MxC = Point(MxC_latitude, MxC_longitude)

**Daily data**

In [9]:
# Set time period
start = datetime(1994, 1, 1)
end = datetime(2024, 11, 3)

# Get daily data
MxC_daily = Daily(MxC, start, end)
MxC_daily = MxC_daily.fetch()
MxC_daily

Unnamed: 0_level_0,tavg,tmin,tmax,prcp,snow,wdir,wspd,wpgt,pres,tsun
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2007-06-29,17.6,13.5,23.4,,,,,,,
2007-06-30,18.1,11.7,24.4,0.0,,,,,,
...,...,...,...,...,...,...,...,...,...,...
2024-11-02,17.6,13.0,23.0,0.0,,12.0,11.0,,1024.5,
2024-11-03,18.1,12.0,25.0,0.0,,274.0,6.1,,1015.8,


**Hourly data**

In [None]:
# Data exploration
MxC_hourly = Hourly(MxC, start, end)
MxC_hourly = MxC_hourly.fetch()
MxC_hourly

In [12]:
# Adding columns according to the ER diagram 
MxC_hourly['date']=MxC_hourly.index.date 
MxC_hourly['time']=MxC_hourly.index.time 
MxC_hourly['city']= 'Mexico city'
MxC_hourly

Unnamed: 0_level_0,temp,dwpt,rhum,prcp,snow,wdir,wspd,wpgt,pres,tsun,coco,date,time,city
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1994-01-01 21:00:00,18.5,4.3,39.0,,,30.0,3.6,,1012.2,,,1994-01-01,21:00:00,Mexico city
1994-01-01 22:00:00,,,,,,,,,,,,1994-01-01,22:00:00,Mexico city
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-11-02 23:00:00,20.0,9.0,49.0,0.0,,110.0,19.0,,1023.0,,8.0,2024-11-02,23:00:00,Mexico city
2024-11-03 00:00:00,18.0,11.1,64.0,0.0,,70.0,13.0,,1024.0,,8.0,2024-11-03,00:00:00,Mexico city


In [13]:
# Renaming columns according to the ER diagram 
MxC_hourly = MxC_hourly.rename(columns={'temp':'temperature','coco':'code', 'dwpt':'dew_point', 'pres':'pressure', 'wspd':'wind_speed','rhum':'relative_humidity'})
MxC_hourly

Unnamed: 0_level_0,temperature,dew_point,relative_humidity,prcp,snow,wdir,wind_speed,wpgt,pressure,tsun,code,date,time,city
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1994-01-01 21:00:00,18.5,4.3,39.0,,,30.0,3.6,,1012.2,,,1994-01-01,21:00:00,Mexico city
1994-01-01 22:00:00,,,,,,,,,,,,1994-01-01,22:00:00,Mexico city
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-11-02 23:00:00,20.0,9.0,49.0,0.0,,110.0,19.0,,1023.0,,8.0,2024-11-02,23:00:00,Mexico city
2024-11-03 00:00:00,18.0,11.1,64.0,0.0,,70.0,13.0,,1024.0,,8.0,2024-11-03,00:00:00,Mexico city


In [15]:
# Filtering needed columns
MxC_weather_readings=MxC_hourly[['date','time','temperature','dew_point','pressure','wind_speed', 'relative_humidity']]
MxC_weather_readings

Unnamed: 0_level_0,date,time,temperature,dew_point,pressure,wind_speed,relative_humidity
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1994-01-01 21:00:00,1994-01-01,21:00:00,18.5,4.3,1012.2,3.6,39.0
1994-01-01 22:00:00,1994-01-01,22:00:00,,,,,
...,...,...,...,...,...,...,...
2024-11-02 23:00:00,2024-11-02,23:00:00,20.0,9.0,1023.0,19.0,49.0
2024-11-03 00:00:00,2024-11-03,00:00:00,18.0,11.1,1024.0,13.0,64.0


In [None]:
# Database Connection: We use sql server with a trusted connection and weather as database name
engine = create_engine('mssql+pyodbc://SERVER_NAME/weather?trusted_connection=yes&driver=ODBC+Driver+17+for+SQL+Server')


In [17]:
# Database load
MxC_weather_readings.to_sql('weather_readings', engine, if_exists='append', index=False)

20

In [19]:
# Filtering needed columns
MxC_city_codes = MxC_hourly[['date','time','code', 'city','temperature','dew_point','pressure','wind_speed', 'relative_humidity']]
MxC_city_codes

Unnamed: 0_level_0,date,time,code,city,temperature,dew_point,pressure,wind_speed,relative_humidity
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1994-01-01 21:00:00,1994-01-01,21:00:00,,Mexico city,18.5,4.3,1012.2,3.6,39.0
1994-01-01 22:00:00,1994-01-01,22:00:00,,Mexico city,,,,,
...,...,...,...,...,...,...,...,...,...
2024-11-02 23:00:00,2024-11-02,23:00:00,8.0,Mexico city,20.0,9.0,1023.0,19.0,49.0
2024-11-03 00:00:00,2024-11-03,00:00:00,8.0,Mexico city,18.0,11.1,1024.0,13.0,64.0


In [21]:
# Database load
MxC_city_codes.to_sql('w_readings_city_codes', engine, if_exists='append', index=False)


36