# Testing Plotting.

In [1]:
import json
import time
from math import sqrt, radians
import numpy as np

from datetime import datetime

import pandas as pd
import geopandas as gpd

import folium
import folium.plugins

import os
from dotenv import load_dotenv

load_dotenv("../.env")

import sys

sys.path.append("../Scripts/")
sys.path.append("../")

import paths
import config
import plot

In [2]:
Cities = config.Cities

os.listdir(paths.RAW_DATA_DIR)

['Palermo_HistoricalData_01092022_30042024.parquet',
 '.ipynb_checkpoints',
 'Venezia_HistoricalData_01092022_30042024.parquet',
 "L'Aquila_HistoricalData_01092022_30042024.parquet",
 'Catanzaro_HistoricalData_01092022_30042024.parquet',
 'Perugia_HistoricalData_01092022_30042024.parquet',
 'Milano_HistoricalData_01092022_30042024.parquet',
 'Bologna_HistoricalData_01092022_30042024.parquet',
 'Potenza_HistoricalData_01092022_30042024.parquet',
 'Cagliari_HistoricalData_01092022_30042024.parquet',
 'Roma_HistoricalData_01092022_30042024.parquet',
 'Older',
 'Napoli_HistoricalData_01092022_30042024.parquet',
 'Genova_HistoricalData_01092022_30042024.parquet',
 'Campobasso_HistoricalData_01092022_30042024.parquet',
 'Trento_HistoricalData_01092022_30042024.parquet',
 'Firenze_HistoricalData_01092022_30042024.parquet',
 'Torino_HistoricalData_01092022_30042024.parquet',
 'Trieste_HistoricalData_01092022_30042024.parquet',
 'Ancona_HistoricalData_01092022_30042024.parquet',
 'Aosta_Histori

In [3]:
LastRecordDF = pd.DataFrame()

for file in os.listdir(paths.RAW_DATA_DIR):
    if file.endswith(".parquet"):
        #Splitting the FileName for '_' character and then getting the first item in the list as it's going to be the correspective name of the city.
        CityName = file.split("_")[0]
        print(f'Fetching Raw Data from Disk in Raw Data Dir for {CityName}')
        
        #Get the City ID for given City from Cities List
        CityID = next((x["CityID"] for x in Cities if x["CityName"] == CityName), None)
        Latitude = next((x["Latitude"] for x in Cities if x["CityName"] == CityName), None)
        Longitude = next((x["Longitude"] for x in Cities if x["CityName"] == CityName), None)

        print(f'Got City ID {CityID} with Coordinates: {Latitude, Longitude}')
        
        TempCityDF = pd.read_parquet(paths.RAW_DATA_DIR / file)
        
        #Creating a Single (Last) Record DataFrame
        
        #print([CityID, Latitude, Longitude, TempCityDF["EuropeanAQI"].iloc[-1]])
        LastRecord = pd.DataFrame(data = [[CityID, Latitude, Longitude, TempCityDF["EuropeanAQI"].iloc[-1]]], columns = ["CityID", "Latitude", "Longitude", "EuropeanAQI"])
                
        LastRecordDF = pd.concat([LastRecordDF, LastRecord])

Fetching Raw Data from Disk in Raw Data Dir for Palermo
Got City ID PA with Coordinates: (38.115662, 13.36147)
Fetching Raw Data from Disk in Raw Data Dir for Venezia
Got City ID VE with Coordinates: (45.440845, 12.315515)
Fetching Raw Data from Disk in Raw Data Dir for L'Aquila
Got City ID AQ with Coordinates: (42.3507, 13.39993)
Fetching Raw Data from Disk in Raw Data Dir for Catanzaro
Got City ID CZ with Coordinates: (38.910542, 16.587761)
Fetching Raw Data from Disk in Raw Data Dir for Perugia
Got City ID PG with Coordinates: (43.110718, 12.390828)
Fetching Raw Data from Disk in Raw Data Dir for Milano
Got City ID MI with Coordinates: (45.464203, 9.189982)
Fetching Raw Data from Disk in Raw Data Dir for Bologna
Got City ID BO with Coordinates: (44.494888, 11.342616)
Fetching Raw Data from Disk in Raw Data Dir for Potenza
Got City ID PZ with Coordinates: (40.637241, 15.80222)
Fetching Raw Data from Disk in Raw Data Dir for Cagliari
Got City ID CA with Coordinates: (39.215408, 9.1093

In [4]:
LastRecordDF

Unnamed: 0,CityID,Latitude,Longitude,EuropeanAQI
0,PA,38.115662,13.36147,31
0,VE,45.440845,12.315515,34
0,AQ,42.3507,13.39993,25
0,CZ,38.910542,16.587761,37
0,PG,43.110718,12.390828,24
0,MI,45.464203,9.189982,44
0,BO,44.494888,11.342616,27
0,PZ,40.637241,15.80222,26
0,CA,39.215408,9.10932,40
0,RM,41.902782,12.496365,29


In [5]:
df = plot.FillDFwIntermediates(LastRecordDF)

In [6]:
df

Unnamed: 0,CityID,Latitude,Longitude,EuropeanAQI
0,PA,38.115662,13.361470,31
1,VE,45.440845,12.315515,34
2,AQ,42.350700,13.399930,25
3,CZ,38.910542,16.587761,37
4,PG,43.110718,12.390828,24
...,...,...,...,...
750,IntermediatePoint,38.215022,13.764756,31
751,IntermediatePoint,38.195150,13.684099,31
752,IntermediatePoint,38.175278,13.603442,31
753,IntermediatePoint,38.155406,13.522785,31


In [7]:
m = plot.GenerateMap(LastRecordDF)

In [8]:
m

# Testing Sourcing and Feature Engineering.

In [9]:
import sourcing
import featureengineering

In [10]:
DF = sourcing.FetchHistoricalData(False, True)

-----------------------------------------------------
Skipping for AO - Aosta as already into Disk
-----------------------------------------------------
-----------------------------------------------------
Skipping for TO - Torino as already into Disk
-----------------------------------------------------
-----------------------------------------------------
Skipping for TN - Trento as already into Disk
-----------------------------------------------------
-----------------------------------------------------
Skipping for MI - Milano as already into Disk
-----------------------------------------------------
-----------------------------------------------------
Skipping for VE - Venezia as already into Disk
-----------------------------------------------------
-----------------------------------------------------
Skipping for TS - Trieste as already into Disk
-----------------------------------------------------
-----------------------------------------------------
Skipping for GE - Gen

In [11]:
DF

In [12]:
DF = sourcing.FetchFromDisk()

Fetching Raw Data from Disk in Raw Data Dir for Palermo
Fetching Raw Data from Disk in Raw Data Dir for Venezia
Fetching Raw Data from Disk in Raw Data Dir for L'Aquila
Fetching Raw Data from Disk in Raw Data Dir for Catanzaro
Fetching Raw Data from Disk in Raw Data Dir for Perugia
Fetching Raw Data from Disk in Raw Data Dir for Milano
Fetching Raw Data from Disk in Raw Data Dir for Bologna
Fetching Raw Data from Disk in Raw Data Dir for Potenza
Fetching Raw Data from Disk in Raw Data Dir for Cagliari
Fetching Raw Data from Disk in Raw Data Dir for Roma
Fetching Raw Data from Disk in Raw Data Dir for Napoli
Fetching Raw Data from Disk in Raw Data Dir for Genova
Fetching Raw Data from Disk in Raw Data Dir for Campobasso
Fetching Raw Data from Disk in Raw Data Dir for Trento
Fetching Raw Data from Disk in Raw Data Dir for Firenze
Fetching Raw Data from Disk in Raw Data Dir for Torino
Fetching Raw Data from Disk in Raw Data Dir for Trieste
Fetching Raw Data from Disk in Raw Data Dir for A

In [13]:
DF

Unnamed: 0,CityID,Date_GMT+1_Europe/Berlin,Temperature_2m,Relative_Humidity_2m,Dew_Point_2m,Precipitation,Pressure_msl,Surface_Pressure,Cloud_Cover,Wind_Speed_10m,...,Wind_Wirection_10m,Wind_Direction_100m,Soil_Temperature_0-7cm,Soil_Temperature_7-28cm,Soil_Temperature_28-100cm,Soil_Temperature_100-255cm,Soil_Moisture_0-7cm,Soil_Moisture_7-28cm,Soil_Moisture_28-100cm,EuropeanAQI
0,PA,2022-09-01T00:00,26.9,52,16.4,0.2,1014.9,1011.2,39,1.8,...,143,180,26.5,28.3,26.4,23.7,0.111,0.047,0.058,36
1,PA,2022-09-01T01:00,24.2,68,18.0,0.0,1014.4,1010.7,25,4.8,...,153,139,25.6,28.2,26.4,23.7,0.111,0.047,0.058,35
2,PA,2022-09-01T02:00,26.8,58,17.7,0.0,1013.8,1010.1,39,3.1,...,249,264,26.8,28.1,26.4,23.7,0.111,0.047,0.060,34
3,PA,2022-09-01T03:00,25.1,62,17.3,0.0,1013.4,1009.7,4,4.0,...,265,288,26.0,28.0,26.4,23.7,0.111,0.047,0.060,34
4,PA,2022-09-01T04:00,24.4,74,19.5,0.0,1013.6,1009.9,11,3.9,...,236,259,25.2,27.9,26.4,23.7,0.111,0.047,0.060,34
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14587,BA,2024-04-30T19:00,21.1,60,13.0,0.0,1014.0,1012.9,33,16.3,...,96,107,24.9,20.4,17.2,16.1,0.109,0.182,0.162,43
14588,BA,2024-04-30T20:00,20.9,59,12.5,0.0,1014.0,1012.9,30,11.8,...,130,148,23.5,20.5,17.2,16.1,0.109,0.182,0.162,36
14589,BA,2024-04-30T21:00,20.8,50,10.0,0.0,1014.5,1013.4,31,10.6,...,162,175,22.2,20.6,17.2,16.1,0.109,0.182,0.162,34
14590,BA,2024-04-30T22:00,20.2,48,8.9,0.0,1014.4,1013.3,28,10.8,...,180,186,21.3,20.6,17.2,16.1,0.108,0.182,0.162,32


In [14]:
FEDF = featureengineering.EngineerWholeDF(DF)

In [15]:
FEDF

Unnamed: 0,Temperature_2m,Relative_Humidity_2m,Dew_Point_2m,Precipitation,Pressure_msl,Surface_Pressure,Cloud_Cover,Wind_Speed_10m,Wind_Speed_100m,Wind_Wirection_10m,...,IsHour_14,IsHour_15,IsHour_16,IsHour_17,IsHour_18,IsHour_19,IsHour_20,IsHour_21,IsHour_22,IsHour_23
0,26.9,52,16.4,0.2,1014.9,1011.2,39,1.8,1.8,143,...,0,0,0,0,0,0,0,0,0,0
1,24.2,68,18.0,0.0,1014.4,1010.7,25,4.8,6.6,153,...,0,0,0,0,0,0,0,0,0,0
2,26.8,58,17.7,0.0,1013.8,1010.1,39,3.1,3.6,249,...,0,0,0,0,0,0,0,0,0,0
3,25.1,62,17.3,0.0,1013.4,1009.7,4,4.0,7.2,265,...,0,0,0,0,0,0,0,0,0,0
4,24.4,74,19.5,0.0,1013.6,1009.9,11,3.9,7.7,236,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14587,21.1,60,13.0,0.0,1014.0,1012.9,33,16.3,26.3,96,...,0,0,0,0,0,1,0,0,0,0
14588,20.9,59,12.5,0.0,1014.0,1012.9,30,11.8,21.7,130,...,0,0,0,0,0,0,1,0,0,0
14589,20.8,50,10.0,0.0,1014.5,1013.4,31,10.6,22.0,162,...,0,0,0,0,0,0,0,1,0,0
14590,20.2,48,8.9,0.0,1014.4,1013.3,28,10.8,23.2,180,...,0,0,0,0,0,0,0,0,1,0


In [16]:
FEDF.columns

Index(['Temperature_2m', 'Relative_Humidity_2m', 'Dew_Point_2m',
       'Precipitation', 'Pressure_msl', 'Surface_Pressure', 'Cloud_Cover',
       'Wind_Speed_10m', 'Wind_Speed_100m', 'Wind_Wirection_10m',
       'Wind_Direction_100m', 'Soil_Temperature_0-7cm',
       'Soil_Temperature_7-28cm', 'Soil_Temperature_28-100cm',
       'Soil_Temperature_100-255cm', 'Soil_Moisture_0-7cm',
       'Soil_Moisture_7-28cm', 'Soil_Moisture_28-100cm', 'EuropeanAQI',
       'IsSubRegion_Center', 'IsSubRegion_North', 'IsSubRegion_South',
       'IsSeason_Autumn', 'IsSeason_Spring', 'IsSeason_Summer',
       'IsSeason_Winter', 'IsHour_0', 'IsHour_1', 'IsHour_2', 'IsHour_3',
       'IsHour_4', 'IsHour_5', 'IsHour_6', 'IsHour_7', 'IsHour_8', 'IsHour_9',
       'IsHour_10', 'IsHour_11', 'IsHour_12', 'IsHour_13', 'IsHour_14',
       'IsHour_15', 'IsHour_16', 'IsHour_17', 'IsHour_18', 'IsHour_19',
       'IsHour_20', 'IsHour_21', 'IsHour_22', 'IsHour_23'],
      dtype='object')

In [None]:
#Test Feature Engineering on single data point.
#Double Squared Brackets to preserve DF Format
SingleRecord = DF.reset_index(drop=True).loc[[1000]]

In [None]:
SingleRecord = SingleRecord.reset_index(drop=True)
SingleRecord

In [None]:
#Date Feature Engineering - Then Dropping the Column
SingleRecord["Date"] = [datetime.strptime(SingleRecord["Date_GMT+1_Europe/Berlin"].iloc[0].replace("T", " "), "%Y-%m-%d %H:%M")]
#As it's going to be a pd.Series, we don't need to specify the axis
SingleRecord.drop("Date_GMT+1_Europe/Berlin", axis = 1, inplace = True)
SingleRecord

In [None]:
#We Need [1] or [0] as Dict values cause otherwise it's going to mess with shape, we can't treat them as scalars
SubRegion = {"IsSubRegion_" + x: [1] if x == next((z[1] for z in config.CitiesSubRegion if SingleRecord["CityID"].iloc[0] == z[0]), None) else [0] for x in ["Center", "North", "South"]}

#Quick Check
if sum([x[0] for x in SubRegion.values()]) == 0 or sum([x[0] for x in SubRegion.values()]) > 1:
    raise DataIntegrityError("There's been an Error in Categorizing Data in SubRegions!!")

else:
    print("Check Passed, Datetimes correctly Transformed in SubReagions!")

SingleRecord = pd.concat([SingleRecord, pd.DataFrame(SubRegion)], axis = 1)
SingleRecord.drop("CityID", axis = 1, inplace = True)
SingleRecord

In [None]:
#Season Feature Engineering 
Date = SingleRecord["Date"][0]
Season = {"IsSeason_Winter": [1] if ((Date.month >= 12 and Date.day >= 21) or (Date.month >= 1 and Date.month < 3) or (Date.month == 3 and x.day <= 20)) else [0],
          "IsSeason_Spring": [1] if ((Date.month >= 3 and Date.day >= 21) or (Date.month >= 4 and Date.month < 6) or (Date.month == 6 and Date.day <= 20)) else [0],
          "IsSeason_Summer": [1] if ((Date.month >= 6 and Date.day >= 21) or (Date.month >= 7 and Date.month < 9) or (Date.month == 9 and Date.day <= 22))  else [0],
          "IsSeason_Autumn": [1] if ((Date.month >= 9 and Date.day >= 23) or (Date.month >= 10 and Date.month < 12) or (Date.month == 12 and Date.day <= 20)) else [0]         
         }

#Quick Check
if sum([x[0] for x in Season.values()]) == 0 or sum([x[0] for x in Season.values()]) > 1:
    raise DataIntegrityError("There's been an Error in Categorizing Data in Seasons!!")

else:
    print("Check Passed, Datetimes correctly Transformed in Seasons!")

SingleRecord = pd.concat([SingleRecord, pd.DataFrame(Season)], axis = 1)
SingleRecord

In [None]:

SingleRecord

In [None]:
SingleRecord = DF.reset_index(drop=True).loc[[1000]]
SingleRecord = SingleRecord.reset_index(drop=True)
SingleRecord

In [None]:
SingleEngineered = featureengineering.EngineerSingleFeature(SingleRecord)
SingleEngineered

# Testing MariaDB.

In [None]:
import backfillmariadb

In [None]:
DataFromDisk = sourcing.FetchFromDisk()

In [None]:
DataFromDisk

In [None]:
import sourcing
Data = sourcing.FetchHistoricalData(True, False)
Data

In [None]:
Data[Data["CityID"] == "PA"][-50:].isnull().sum()

In [19]:
import requests
import json
url = "https://archive-api.open-meteo.com/v1/archive"

params = {
    "latitude": 38.115662,
    "longitude": 13.361470,
    "hourly": "temperature_2m",
    "timezone": "Europe/Berlin",
    "start_hour": "2024-04-30",
    "end_hour": "2024-05-01T23:00"
}

response = requests.get(url, params=params)

json.loads(response.text)

{'latitude': 38.13708,
 'longitude': 13.342318,
 'generationtime_ms': 0.14901161193847656,
 'utc_offset_seconds': 7200,
 'timezone': 'Europe/Berlin',
 'timezone_abbreviation': 'CEST',
 'elevation': 32.0,
 'hourly_units': {'time': 'iso8601', 'temperature_2m': '°C'},
 'hourly': {'time': ['2024-04-30T00:00',
   '2024-04-30T01:00',
   '2024-04-30T02:00',
   '2024-04-30T03:00',
   '2024-04-30T04:00',
   '2024-04-30T05:00',
   '2024-04-30T06:00',
   '2024-04-30T07:00',
   '2024-04-30T08:00',
   '2024-04-30T09:00',
   '2024-04-30T10:00',
   '2024-04-30T11:00',
   '2024-04-30T12:00',
   '2024-04-30T13:00',
   '2024-04-30T14:00',
   '2024-04-30T15:00',
   '2024-04-30T16:00',
   '2024-04-30T17:00',
   '2024-04-30T18:00',
   '2024-04-30T19:00',
   '2024-04-30T20:00',
   '2024-04-30T21:00',
   '2024-04-30T22:00',
   '2024-04-30T23:00',
   '2024-05-01T00:00',
   '2024-05-01T01:00',
   '2024-05-01T02:00',
   '2024-05-01T03:00',
   '2024-05-01T04:00',
   '2024-05-01T05:00',
   '2024-05-01T06:00',
   

In [None]:
from datetime import datetime, timedelta
datetime.strftime(datetime.now() - timedelta(days=2), "%Y-%m-%d") + "T23:00"

In [5]:
import sourcing
Data = sourcing.FetchHistoricalData(False, True)

-----------------------------------------------------
Skipping for AO - Aosta as already into Disk
-----------------------------------------------------
-----------------------------------------------------
Fetching Data for TO - Torino
-----------------------------------------------------
Checking Weather Data Integrity of Torino for: ('time', 'Date_GMT+1_Europe/Berlin')
-----------------------------------------------------
Weather Data Integrity Check for Torino Passed!!
-----------------------------------------------------
Checking Air Quality Data Integrity of Torino for: ('time', 'Date_GMT+1_Europe/Berlin')
-----------------------------------------------------
Air Quality Data Integrity Check for Torino Passed!!
-----------------------------------------------------
Merging all Data Together...
-----------------------------------------------------
Data Correctly Merged!!
-----------------------------------------------------
Adding Column CityID TO to Torino
------------------------

In [None]:
from datetime import datetime, timedelta

StartDate = "01092022"
#End Date 2 Days ago, cause Yesterday not available, one day lag in historical Data loading.
EndDate = datetime.strftime(datetime.now() - timedelta(days=2), "%d%m%Y")

import os
if os.path.exists(paths.RAW_DATA_DIR / f'Ancona_HistoricalData_{StartDate}_{EndDate}.parquet'):
    print("miao")

In [None]:
Data

In [None]:
EndDate = datetime.strftime(datetime.now() - timedelta(days=2), "%d%m%Y")


In [None]:
StartDate

In [21]:
import requests
import json
url = "https://archive-api.open-meteo.com/v1/archive"

params = {
    "latitude": 38.115662,
    "longitude": 13.361470,
    "hourly": "temperature_2m",
    "timezone": "Europe/Berlin",
    "start_hour": "2024-04-30",
    "end_hour": "2024-05-01T23:00"
}

response = requests.get(url, params=params)

json.loads(response.text)

{'latitude': 38.13708,
 'longitude': 13.342318,
 'generationtime_ms': 0.11301040649414062,
 'utc_offset_seconds': 7200,
 'timezone': 'Europe/Berlin',
 'timezone_abbreviation': 'CEST',
 'elevation': 32.0,
 'hourly_units': {'time': 'iso8601', 'temperature_2m': '°C'},
 'hourly': {'time': ['2024-04-30T00:00',
   '2024-04-30T01:00',
   '2024-04-30T02:00',
   '2024-04-30T03:00',
   '2024-04-30T04:00',
   '2024-04-30T05:00',
   '2024-04-30T06:00',
   '2024-04-30T07:00',
   '2024-04-30T08:00',
   '2024-04-30T09:00',
   '2024-04-30T10:00',
   '2024-04-30T11:00',
   '2024-04-30T12:00',
   '2024-04-30T13:00',
   '2024-04-30T14:00',
   '2024-04-30T15:00',
   '2024-04-30T16:00',
   '2024-04-30T17:00',
   '2024-04-30T18:00',
   '2024-04-30T19:00',
   '2024-04-30T20:00',
   '2024-04-30T21:00',
   '2024-04-30T22:00',
   '2024-04-30T23:00',
   '2024-05-01T00:00',
   '2024-05-01T01:00',
   '2024-05-01T02:00',
   '2024-05-01T03:00',
   '2024-05-01T04:00',
   '2024-05-01T05:00',
   '2024-05-01T06:00',
   

In [22]:
#Fetching on a Fixed Date and Time

url = "https://api.open-meteo.com/v1/forecast"

params = {
    "latitude": 52.52,
    "longitude": 13.41,
    "hourly": "temperature_2m",
    "timezone": "Europe/Berlin",
    "start_hour": "2024-04-30",
    "end_hour": "2024-05-02"
}

response = requests.get(url, params=params)

json.loads(response.text)

{'latitude': 52.52,
 'longitude': 13.419998,
 'generationtime_ms': 0.030994415283203125,
 'utc_offset_seconds': 7200,
 'timezone': 'Europe/Berlin',
 'timezone_abbreviation': 'CEST',
 'elevation': 38.0,
 'hourly_units': {'time': 'iso8601', 'temperature_2m': '°C'},
 'hourly': {'time': ['2024-04-30T00:00',
   '2024-04-30T01:00',
   '2024-04-30T02:00',
   '2024-04-30T03:00',
   '2024-04-30T04:00',
   '2024-04-30T05:00',
   '2024-04-30T06:00',
   '2024-04-30T07:00',
   '2024-04-30T08:00',
   '2024-04-30T09:00',
   '2024-04-30T10:00',
   '2024-04-30T11:00',
   '2024-04-30T12:00',
   '2024-04-30T13:00',
   '2024-04-30T14:00',
   '2024-04-30T15:00',
   '2024-04-30T16:00',
   '2024-04-30T17:00',
   '2024-04-30T18:00',
   '2024-04-30T19:00',
   '2024-04-30T20:00',
   '2024-04-30T21:00',
   '2024-04-30T22:00',
   '2024-04-30T23:00',
   '2024-05-01T00:00',
   '2024-05-01T01:00',
   '2024-05-01T02:00',
   '2024-05-01T03:00',
   '2024-05-01T04:00',
   '2024-05-01T05:00',
   '2024-05-01T06:00',
   '2