# Testing Plotting.

In [1]:
import json
import time
from math import sqrt, radians
import numpy as np

from datetime import datetime

import pandas as pd
import geopandas as gpd

import folium
import folium.plugins

import os
from dotenv import load_dotenv

load_dotenv("../.env")

import sys

sys.path.append("../Scripts/")
sys.path.append("../")

import paths
import config
import plot

In [2]:
Cities = config.Cities

os.listdir(paths.RAW_DATA_DIR)

['Perugia_HistoricalData_01092022_29022024.parquet',
 '.ipynb_checkpoints',
 'Roma_HistoricalData_01092022_29022024.parquet',
 'Bologna_HistoricalData_01092022_29022024.parquet',
 'Trento_HistoricalData_01092022_29022024.parquet',
 'Potenza_HistoricalData_01092022_29022024.parquet',
 'Firenze_HistoricalData_01092022_29022024.parquet',
 'Ancona_HistoricalData_01092022_29022024.parquet',
 'Catanzaro_HistoricalData_01092022_29022024.parquet',
 'Cagliari_HistoricalData_01092022_29022024.parquet',
 'Venezia_HistoricalData_01092022_29022024.parquet',
 'Genova_HistoricalData_01092022_29022024.parquet',
 'Milano_HistoricalData_01092022_29022024.parquet',
 'Bari_HistoricalData_01092022_29022024.parquet',
 'Campobasso_HistoricalData_01092022_29022024.parquet',
 'Trieste_HistoricalData_01092022_29022024.parquet',
 "L'Aquila_HistoricalData_01092022_29022024.parquet",
 'Palermo_HistoricalData_01092022_29022024.parquet',
 'Torino_HistoricalData_01092022_29022024.parquet',
 'Aosta_HistoricalData_0109

In [3]:
LastRecordDF = pd.DataFrame()

for file in os.listdir(paths.RAW_DATA_DIR):
    if file.endswith(".parquet"):
        #Splitting the FileName for '_' character and then getting the first item in the list as it's going to be the correspective name of the city.
        CityName = file.split("_")[0]
        print(f'Fetching Raw Data from Disk in Raw Data Dir for {CityName}')
        
        #Get the City ID for given City from Cities List
        CityID = next((x["CityID"] for x in Cities if x["CityName"] == CityName), None)
        Latitude = next((x["Latitude"] for x in Cities if x["CityName"] == CityName), None)
        Longitude = next((x["Longitude"] for x in Cities if x["CityName"] == CityName), None)

        print(f'Got City ID {CityID} with Coordinates: {Latitude, Longitude}')
        
        TempCityDF = pd.read_parquet(paths.RAW_DATA_DIR / file)
        
        #Creating a Single (Last) Record DataFrame
        
        #print([CityID, Latitude, Longitude, TempCityDF["EuropeanAQI"].iloc[-1]])
        LastRecord = pd.DataFrame(data = [[CityID, Latitude, Longitude, TempCityDF["EuropeanAQI"].iloc[-1]]], columns = ["CityID", "Latitude", "Longitude", "EuropeanAQI"])
                
        LastRecordDF = pd.concat([LastRecordDF, LastRecord])

Fetching Raw Data from Disk in Raw Data Dir for Perugia
Got City ID PG with Coordinates: (43.110718, 12.390828)
Fetching Raw Data from Disk in Raw Data Dir for Roma
Got City ID RM with Coordinates: (41.902782, 12.496365)
Fetching Raw Data from Disk in Raw Data Dir for Bologna
Got City ID BO with Coordinates: (44.494888, 11.342616)
Fetching Raw Data from Disk in Raw Data Dir for Trento
Got City ID TN with Coordinates: (46.066669, 11.12907)
Fetching Raw Data from Disk in Raw Data Dir for Potenza
Got City ID PZ with Coordinates: (40.637241, 15.80222)
Fetching Raw Data from Disk in Raw Data Dir for Firenze
Got City ID FI with Coordinates: (43.769562, 11.255814)
Fetching Raw Data from Disk in Raw Data Dir for Ancona
Got City ID AN with Coordinates: (43.615849, 13.51874)
Fetching Raw Data from Disk in Raw Data Dir for Catanzaro
Got City ID CZ with Coordinates: (38.910542, 16.587761)
Fetching Raw Data from Disk in Raw Data Dir for Cagliari
Got City ID CA with Coordinates: (39.215408, 9.10932)

In [4]:
LastRecordDF

Unnamed: 0,CityID,Latitude,Longitude,EuropeanAQI
0,PG,43.110718,12.390828,22
0,RM,41.902782,12.496365,27
0,BO,44.494888,11.342616,38
0,TN,46.066669,11.12907,63
0,PZ,40.637241,15.80222,17
0,FI,43.769562,11.255814,24
0,AN,43.615849,13.51874,34
0,CZ,38.910542,16.587761,26
0,CA,39.215408,9.10932,17
0,VE,45.440845,12.315515,33


In [5]:
df = plot.FillDFwIntermediates(LastRecordDF)

In [6]:
df

Unnamed: 0,CityID,Latitude,Longitude,EuropeanAQI
0,PG,43.110718,12.390828,22
1,RM,41.902782,12.496365,27
2,BO,44.494888,11.342616,38
3,TN,46.066669,11.129070,63
4,PZ,40.637241,15.802220,17
...,...,...,...,...
750,IntermediatePoint,38.215022,13.764756,22
751,IntermediatePoint,38.195150,13.684099,22
752,IntermediatePoint,38.175278,13.603442,22
753,IntermediatePoint,38.155406,13.522785,22


In [7]:
m = plot.GenerateMap(LastRecordDF)

In [8]:
m

# Testing Sourcing and Feature Engineering.

In [9]:
import sourcing
import featureengineering

In [10]:
DF = sourcing.FetchHistoricalData(False)

-----------------------------------------------------
Skipping for AO - Aosta as already into Disk
-----------------------------------------------------
-----------------------------------------------------
Skipping for TO - Torino as already into Disk
-----------------------------------------------------
-----------------------------------------------------
Skipping for TN - Trento as already into Disk
-----------------------------------------------------
-----------------------------------------------------
Skipping for MI - Milano as already into Disk
-----------------------------------------------------
-----------------------------------------------------
Skipping for VE - Venezia as already into Disk
-----------------------------------------------------
-----------------------------------------------------
Skipping for TS - Trieste as already into Disk
-----------------------------------------------------
-----------------------------------------------------
Skipping for GE - Gen

In [11]:
DF

In [12]:
DF = sourcing.FetchFromDisk()

Fetching Raw Data from Disk in Raw Data Dir for Perugia
Got City ID PG
Fetching Raw Data from Disk in Raw Data Dir for Roma
Got City ID RM
Fetching Raw Data from Disk in Raw Data Dir for Bologna
Got City ID BO
Fetching Raw Data from Disk in Raw Data Dir for Trento
Got City ID TN
Fetching Raw Data from Disk in Raw Data Dir for Potenza
Got City ID PZ
Fetching Raw Data from Disk in Raw Data Dir for Firenze
Got City ID FI
Fetching Raw Data from Disk in Raw Data Dir for Ancona
Got City ID AN
Fetching Raw Data from Disk in Raw Data Dir for Catanzaro
Got City ID CZ
Fetching Raw Data from Disk in Raw Data Dir for Cagliari
Got City ID CA
Fetching Raw Data from Disk in Raw Data Dir for Venezia
Got City ID VE
Fetching Raw Data from Disk in Raw Data Dir for Genova
Got City ID GE
Fetching Raw Data from Disk in Raw Data Dir for Milano
Got City ID MI
Fetching Raw Data from Disk in Raw Data Dir for Bari
Got City ID BA
Fetching Raw Data from Disk in Raw Data Dir for Campobasso
Got City ID CB
Fetching R

In [None]:
DF

In [15]:
FEDF = featureengineering.EngineerWholeDF(DF)

Check Passed, Datetimes correctly Transformed in Seasons!


In [17]:
FEDF

Unnamed: 0,Temperature_2m,Relative_Humidity_2m,Dew_Point_2m,Precipitation,Pressure_msl,Surface_Pressure,Cloud_Cover,Wind_Speed_10m,Wind_Speed_100m,Wind_Wirection_10m,...,IsHour_14,IsHour_15,IsHour_16,IsHour_17,IsHour_18,IsHour_19,IsHour_20,IsHour_21,IsHour_22,IsHour_23
0,18.8,84,15.9,0.0,1012.6,958.8,77,5.4,12.7,127,...,0,0,0,0,0,0,0,0,0,0
1,19.1,83,16.0,0.0,1012.2,958.5,9,3.4,3.4,32,...,0,0,0,0,0,0,0,0,0,0
2,18.8,82,15.6,0.0,1011.8,958.0,80,4.2,6.6,31,...,0,0,0,0,0,0,0,0,0,0
3,18.2,85,15.6,0.0,1011.8,957.9,53,9.8,15.8,163,...,0,0,0,0,0,0,0,0,0,0
4,16.8,97,16.3,7.9,1012.2,958.1,100,1.6,1.5,27,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13123,10.5,90,8.9,0.0,1006.2,1003.9,88,5.1,8.8,315,...,0,0,0,0,0,1,0,0,0,0
13124,10.4,90,8.8,0.0,1006.0,1003.7,99,4.2,9.4,340,...,0,0,0,0,0,0,1,0,0,0
13125,10.2,90,8.7,0.1,1005.8,1003.5,100,4.3,9.3,336,...,0,0,0,0,0,0,0,1,0,0
13126,10.5,92,9.3,1.4,1006.3,1004.0,100,6.0,4.7,115,...,0,0,0,0,0,0,0,0,1,0


In [None]:
#Test Feature Engineering on single data point.
#Double Squared Brackets to preserve DF Format
SingleRecord = DF.reset_index(drop=True).loc[[1000]]

In [None]:
SingleRecord = SingleRecord.reset_index(drop=True)
SingleRecord

In [None]:
#Date Feature Engineering - Then Dropping the Column
SingleRecord["Date"] = [datetime.strptime(SingleRecord["Date_GMT+1_Europe/Berlin"].iloc[0].replace("T", " "), "%Y-%m-%d %H:%M")]
#As it's going to be a pd.Series, we don't need to specify the axis
SingleRecord.drop("Date_GMT+1_Europe/Berlin", axis = 1, inplace = True)
SingleRecord

In [None]:
#We Need [1] or [0] as Dict values cause otherwise it's going to mess with shape, we can't treat them as scalars
SubRegion = {"IsSubRegion_" + x: [1] if x == next((z[1] for z in config.CitiesSubRegion if SingleRecord["CityID"].iloc[0] == z[0]), None) else [0] for x in ["Center", "North", "South"]}

#Quick Check
if sum([x[0] for x in SubRegion.values()]) == 0 or sum([x[0] for x in SubRegion.values()]) > 1:
    raise DataIntegrityError("There's been an Error in Categorizing Data in SubRegions!!")

else:
    print("Check Passed, Datetimes correctly Transformed in SubReagions!")

SingleRecord = pd.concat([SingleRecord, pd.DataFrame(SubRegion)], axis = 1)
SingleRecord.drop("CityID", axis = 1, inplace = True)
SingleRecord

In [None]:
#Season Feature Engineering 
Date = SingleRecord["Date"][0]
Season = {"IsSeason_Winter": [1] if ((Date.month >= 12 and Date.day >= 21) or (Date.month >= 1 and Date.month < 3) or (Date.month == 3 and x.day <= 20)) else [0],
          "IsSeason_Spring": [1] if ((Date.month >= 3 and Date.day >= 21) or (Date.month >= 4 and Date.month < 6) or (Date.month == 6 and Date.day <= 20)) else [0],
          "IsSeason_Summer": [1] if ((Date.month >= 6 and Date.day >= 21) or (Date.month >= 7 and Date.month < 9) or (Date.month == 9 and Date.day <= 22))  else [0],
          "IsSeason_Autumn": [1] if ((Date.month >= 9 and Date.day >= 23) or (Date.month >= 10 and Date.month < 12) or (Date.month == 12 and Date.day <= 20)) else [0]         
         }

#Quick Check
if sum([x[0] for x in Season.values()]) == 0 or sum([x[0] for x in Season.values()]) > 1:
    raise DataIntegrityError("There's been an Error in Categorizing Data in Seasons!!")

else:
    print("Check Passed, Datetimes correctly Transformed in Seasons!")

SingleRecord = pd.concat([SingleRecord, pd.DataFrame(Season)], axis = 1)
SingleRecord

In [None]:

SingleRecord

In [13]:
SingleRecord = DF.reset_index(drop=True).loc[[1000]]
SingleRecord = SingleRecord.reset_index(drop=True)
SingleRecord

Unnamed: 0,CityID,Date_GMT+1_Europe/Berlin,Temperature_2m,Relative_Humidity_2m,Dew_Point_2m,Precipitation,Pressure_msl,Surface_Pressure,Cloud_Cover,Wind_Speed_10m,...,Wind_Wirection_10m,Wind_Direction_100m,Soil_Temperature_0-7cm,Soil_Temperature_7-28cm,Soil_Temperature_28-100cm,Soil_Temperature_100-255cm,Soil_Moisture_0-7cm,Soil_Moisture_7-28cm,Soil_Moisture_28-100cm,EuropeanAQI
0,PG,2022-10-12T16:00,17.6,74,13.0,0.5,1020.4,966.0,92,10.7,...,130,123,19.6,17.9,17.8,19.9,0.297,0.299,0.253,38


In [14]:
SingleEngineered = featureengineering.EngineerSingleFeature(SingleRecord)
SingleEngineered

Check Passed, Datetimes correctly Transformed in SubReagions!
Check Passed, Datetimes correctly Transformed in Seasons!
Check Passed, Datetimes correctly Transformed in Hours!


Unnamed: 0,Temperature_2m,Relative_Humidity_2m,Dew_Point_2m,Precipitation,Pressure_msl,Surface_Pressure,Cloud_Cover,Wind_Speed_10m,Wind_Speed_100m,Wind_Wirection_10m,...,IsHour_14,IsHour_15,IsHour_16,IsHour_17,IsHour_18,IsHour_19,IsHour_20,IsHour_21,IsHour_22,IsHour_23
0,17.6,74,13.0,0.5,1020.4,966.0,92,10.7,15.9,130,...,0,0,1,0,0,0,0,0,0,0


# Testing MariaDB.