# **ETL**

In [1]:
# Main libraries

import requests 
import os 
import pandas as pd  
import numpy as np
from datetime import datetime
from meteostat import Point, Daily, Hourly
import sqlalchemy
from sqlalchemy import create_engine

In [2]:
pd.set_option('display.max_rows', 5)

In [3]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)
    


## **Weather Readings: Tepic**

**Method**: Meteostat Python Library 

In [4]:
Tepic_latitude = 21.5083   
Tepic_longitude = -104.893    
Tepic = Point(Tepic_latitude, Tepic_longitude)

**Daily data**

In [5]:
# Set time period
start = datetime(1994, 1, 1)
end = datetime(2024, 11, 3)

# Get daily data
Tepic_daily = Daily(Tepic, start, end)
Tepic_daily = Tepic_daily.fetch()
Tepic_daily

Unnamed: 0_level_0,tavg,tmin,tmax,prcp,snow,wdir,wspd,wpgt,pres,tsun
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1994-01-01,19.6,,,0.0,,,,,,
1994-01-02,19.5,,,0.0,,,,,,
...,...,...,...,...,...,...,...,...,...,...
2024-11-02,21.5,10.0,29.0,0.0,,249.0,6.6,,1015.3,
2024-11-03,20.6,11.0,29.0,0.0,,236.0,6.7,,1013.7,


**Hourly data**

In [6]:
# Data exploration
Tepic_hourly = Hourly(Tepic, start, end)
Tepic_hourly = Tepic_hourly.fetch()
Tepic_hourly

Unnamed: 0_level_0,temp,dwpt,rhum,prcp,snow,wdir,wspd,wpgt,pres,tsun,coco
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1994-01-01 00:00:00,24.0,12.0,47.0,,,360.0,14.8,,1000.1,,
1994-01-01 01:00:00,21.0,15.1,69.0,,,360.0,9.0,,1000.1,,
...,...,...,...,...,...,...,...,...,...,...,...
2024-11-02 23:00:00,24.0,19.9,78.0,0.0,,360.0,11.0,,1015.0,,1.0
2024-11-03 00:00:00,22.0,16.1,69.0,0.0,,325.0,6.5,,1015.0,,1.0


In [7]:
# Adding columns according to the ER diagram 
Tepic_hourly['date']=Tepic_hourly.index.date 
Tepic_hourly['time']=Tepic_hourly.index.time 
Tepic_hourly['city']= 'Querétaro'
Tepic_hourly

Unnamed: 0_level_0,temp,dwpt,rhum,prcp,snow,wdir,wspd,wpgt,pres,tsun,coco,date,time,city
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1994-01-01 00:00:00,24.0,12.0,47.0,,,360.0,14.8,,1000.1,,,1994-01-01,00:00:00,Querétaro
1994-01-01 01:00:00,21.0,15.1,69.0,,,360.0,9.0,,1000.1,,,1994-01-01,01:00:00,Querétaro
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-11-02 23:00:00,24.0,19.9,78.0,0.0,,360.0,11.0,,1015.0,,1.0,2024-11-02,23:00:00,Querétaro
2024-11-03 00:00:00,22.0,16.1,69.0,0.0,,325.0,6.5,,1015.0,,1.0,2024-11-03,00:00:00,Querétaro


In [8]:
# Renaming columns according to the ER diagram 
Tepic_hourly = Tepic_hourly.rename(columns={'temp':'temperature','coco':'code', 'dwpt':'dew_point', 'pres':'pressure', 'wspd':'wind_speed','rhum':'relative_humidity'})
Tepic_hourly

Unnamed: 0_level_0,temperature,dew_point,relative_humidity,prcp,snow,wdir,wind_speed,wpgt,pressure,tsun,code,date,time,city
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1994-01-01 00:00:00,24.0,12.0,47.0,,,360.0,14.8,,1000.1,,,1994-01-01,00:00:00,Querétaro
1994-01-01 01:00:00,21.0,15.1,69.0,,,360.0,9.0,,1000.1,,,1994-01-01,01:00:00,Querétaro
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-11-02 23:00:00,24.0,19.9,78.0,0.0,,360.0,11.0,,1015.0,,1.0,2024-11-02,23:00:00,Querétaro
2024-11-03 00:00:00,22.0,16.1,69.0,0.0,,325.0,6.5,,1015.0,,1.0,2024-11-03,00:00:00,Querétaro


In [9]:
# Filtering needed columns
Tepic_weather_readings=Tepic_hourly[['date','time','temperature','dew_point','pressure','wind_speed', 'relative_humidity']]
Tepic_weather_readings

Unnamed: 0_level_0,date,time,temperature,dew_point,pressure,wind_speed,relative_humidity
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1994-01-01 00:00:00,1994-01-01,00:00:00,24.0,12.0,1000.1,14.8,47.0
1994-01-01 01:00:00,1994-01-01,01:00:00,21.0,15.1,1000.1,9.0,69.0
...,...,...,...,...,...,...,...
2024-11-02 23:00:00,2024-11-02,23:00:00,24.0,19.9,1015.0,11.0,78.0
2024-11-03 00:00:00,2024-11-03,00:00:00,22.0,16.1,1015.0,6.5,69.0


In [None]:
# Database Connection: We use sql server with a trusted connection and weather as database name
engine = create_engine('mssql+pyodbc://SERVER_NAME/weather?trusted_connection=yes&driver=ODBC+Driver+17+for+SQL+Server')


In [11]:
# Database load
Tepic_weather_readings.to_sql('weather_readings', engine, if_exists='append', index=False)

41

In [12]:
# Filtering needed columns
Tepic_city_codes=Tepic_hourly[['date','time','code', 'city','temperature','dew_point','pressure','wind_speed', 'relative_humidity']]
Tepic_city_codes

Unnamed: 0_level_0,date,time,code,city,temperature,dew_point,pressure,wind_speed,relative_humidity
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1994-01-01 00:00:00,1994-01-01,00:00:00,,Querétaro,24.0,12.0,1000.1,14.8,47.0
1994-01-01 01:00:00,1994-01-01,01:00:00,,Querétaro,21.0,15.1,1000.1,9.0,69.0
...,...,...,...,...,...,...,...,...,...
2024-11-02 23:00:00,2024-11-02,23:00:00,1.0,Querétaro,24.0,19.9,1015.0,11.0,78.0
2024-11-03 00:00:00,2024-11-03,00:00:00,1.0,Querétaro,22.0,16.1,1015.0,6.5,69.0


In [13]:
# Database load
Tepic_city_codes.to_sql('w_readings_city_codes', engine, if_exists='append', index=False)


57