In [1]:
import os
import sys
import pandas as pd
import geopandas as gpd
import pygeos as pg
import numpy as np
import tensorflow as tf
import sqlalchemy as sq
import ipyparallel as ipp
from IPython.display import clear_output
from matplotlib import pyplot as plt
from shapely import wkt

In [11]:
dfStationRegion = pd.read_csv("Regions/StationRegion.csv")
dfWeather = pd.read_csv("Data/WeatherDataAggDaily_headers.csv")

In [12]:
dfRegions = pd.DataFrame()
dfRegions["CRnum"] = dfStationRegion["CRnum"].unique().copy()

In [13]:
dfRegions["CRnum"] = dfRegions["CRnum"].astype("int")
dfRegions = dfRegions.sort_values(by="CRnum")
dfRegions = dfRegions.reset_index().drop(columns={"index"})
dfRegions

Unnamed: 0,CRnum
0,0
1,1
2,2
3,3
4,4
5,5
6,6
7,7
8,8
9,9


In [16]:
dfWeather[["ClimateID"]] = dfWeather[["ClimateID"]].astype(str)
dfStationRegion[["ClimateID"]] = dfStationRegion[["ClimateID"]].astype(str)

In [20]:
dfStationRegion = dfStationRegion.drop(columns={dfStationRegion.columns[0]})

In [21]:
print(dfWeather.dtypes)
print(dfStationRegion.dtypes)

ClimateID             object
ProvinceCode          object
Year                   int64
Month                  int64
Day                    int64
MeanTemp             float64
MinTemp              float64
MaxTemp              float64
MeanDewPoint         float64
MinDewPoint          float64
MaxDewPoint          float64
MeanHumidity         float64
MinHumidity          float64
MaxHumidity          float64
MeanPressure         float64
MinPressure          float64
MaxPressure          float64
MeanWindSpeed        float64
MinWindSpeed         float64
MaxWindSpeed         float64
MeanWindChill        float64
MinWindChill         float64
MaxWindChill         float64
TotalPrecip          float64
MeanWindDirection    float64
dtype: object
ClimateID     object
geometry      object
CARname       object
color         object
CRnum        float64
dtype: object


In [23]:
dfJoined = dfWeather.merge(dfStationRegion, on="ClimateID", how="left")

In [24]:
dfJoined

Unnamed: 0,ClimateID,ProvinceCode,Year,Month,Day,MeanTemp,MinTemp,MaxTemp,MeanDewPoint,MinDewPoint,...,MaxWindSpeed,MeanWindChill,MinWindChill,MaxWindChill,TotalPrecip,MeanWindDirection,geometry,CARname,color,CRnum
0,4012403,SK,2020,1,2,-2.558333,-3.8,-1.8,-3.458333,-4.3,...,26.0,-7.208333,-11.0,-4.0,0.0,28.083333,POINT (5395723.67567 1522710.63568),Census Agricultural Region 1A,#4B0082,4.0
1,4012403,SK,2020,1,3,-8.225000,-13.0,-3.6,-10.700000,-14.2,...,28.0,-14.791667,-20.0,-10.0,0.0,26.739130,POINT (5395723.67567 1522710.63568),Census Agricultural Region 1A,#4B0082,4.0
2,4012403,SK,2020,1,4,-2.404167,-9.5,3.5,-5.379167,-11.0,...,67.0,-11.538462,-15.0,-7.0,0.0,17.708333,POINT (5395723.67567 1522710.63568),Census Agricultural Region 1A,#4B0082,4.0
3,4012403,SK,2020,1,5,-4.716667,-10.9,-0.6,-7.945833,-12.5,...,59.0,-12.041667,-17.0,-8.0,0.0,26.916667,POINT (5395723.67567 1522710.63568),Census Agricultural Region 1A,#4B0082,4.0
4,4012403,SK,2020,1,6,-6.100000,-10.6,-1.6,-9.391667,-12.4,...,46.0,-13.541667,-17.0,-9.0,0.0,26.333333,POINT (5395723.67567 1522710.63568),Census Agricultural Region 1A,#4B0082,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
256454,3016505,AB,2022,9,27,17.204167,8.5,26.6,7.087500,5.7,...,11.0,,,,0.0,18.958333,POINT (4735853.631349 2184984.739067),Census Agricultural Region 5,#0000FF,9.0
256455,3016505,AB,2022,9,28,15.583333,4.3,26.7,5.445833,3.5,...,11.0,,,,0.0,12.250000,POINT (4735853.631349 2184984.739067),Census Agricultural Region 5,#0000FF,9.0
256456,3016505,AB,2022,9,29,12.245833,9.7,13.9,8.304167,6.0,...,27.0,,,,0.4,25.708333,POINT (4735853.631349 2184984.739067),Census Agricultural Region 5,#0000FF,9.0
256457,3016505,AB,2022,9,30,12.630435,8.8,19.1,8.852174,7.2,...,15.0,,,,0.0,21.826087,POINT (4735853.631349 2184984.739067),Census Agricultural Region 5,#0000FF,9.0


In [26]:
dfJoined.columns

Index(['ClimateID', 'ProvinceCode', 'Year', 'Month', 'Day', 'MeanTemp',
       'MinTemp', 'MaxTemp', 'MeanDewPoint', 'MinDewPoint', 'MaxDewPoint',
       'MeanHumidity', 'MinHumidity', 'MaxHumidity', 'MeanPressure',
       'MinPressure', 'MaxPressure', 'MeanWindSpeed', 'MinWindSpeed',
       'MaxWindSpeed', 'MeanWindChill', 'MinWindChill', 'MaxWindChill',
       'TotalPrecip', 'MeanWindDirection', 'geometry', 'CARname', 'color',
       'CRnum'],
      dtype='object')

In [37]:
dfAgg = dfJoined.groupby(["Year", "Month", "Day", "CRnum"], as_index=False).agg(
    {
        "MeanTemp": "mean",
        "MinTemp": "min",
        "MaxTemp": "max",
        "MeanDewPoint": "mean",
        "MinDewPoint": "min",
        "MaxDewPoint": "max",
        "MeanHumidity": "mean",
        "MinHumidity": "min",
        "MaxHumidity": "max",
        "MeanPressure": "mean",
        "MinPressure": "min",
        "MaxPressure": "max",
        "MeanWindSpeed": "mean",
        "MinWindSpeed": "min",
        "MaxWindSpeed": "max",
        "MeanWindChill": "mean",
        "MinWindChill": "min",
        "MaxWindChill": "max",
        "TotalPrecip": "sum",
        "MeanWindDirection": "mean",
    }
)

In [38]:
dfAgg

Unnamed: 0,Year,Month,Day,CRnum,MeanTemp,MinTemp,MaxTemp,MeanDewPoint,MinDewPoint,MaxDewPoint,...,MinPressure,MaxPressure,MeanWindSpeed,MinWindSpeed,MaxWindSpeed,MeanWindChill,MinWindChill,MaxWindChill,TotalPrecip,MeanWindDirection
0,2020,1,1,1.0,-6.570833,-13.3,2.0,-8.927083,-15.0,-1.4,...,94.17,96.75,10.305556,0.0,27.0,-11.260762,-21.0,-2.0,0.0,19.365741
1,2020,1,1,2.0,-5.825833,-12.7,0.1,-7.290000,-13.8,-3.8,...,92.28,95.69,8.750000,0.0,24.0,-9.386322,-18.0,-3.0,0.0,22.458333
2,2020,1,1,3.0,-4.390417,-12.7,2.4,-5.975417,-14.2,-0.3,...,91.25,96.53,10.291667,0.0,28.0,-8.745631,-19.0,-2.0,0.0,23.513889
3,2020,1,1,4.0,-2.431250,-10.4,1.7,-4.113542,-11.3,-0.7,...,88.36,92.83,14.118056,2.0,32.0,-7.186941,-14.0,-2.0,0.2,28.559028
4,2020,1,1,5.0,-1.043519,-5.5,1.8,-3.209259,-6.7,-0.3,...,84.64,91.84,12.541667,1.0,26.0,-5.368376,-11.0,0.0,2.3,27.518519
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9040,2022,10,1,5.0,12.400000,10.9,13.8,6.025000,3.7,7.7,...,87.84,94.55,13.500000,2.0,21.0,,,,0.0,8.625000
9041,2022,10,1,6.0,8.250000,6.6,9.7,5.850000,3.2,7.9,...,95.93,98.51,13.400000,7.0,22.0,,,,0.0,7.600000
9042,2022,10,1,7.0,10.794118,7.0,14.8,7.647059,5.7,9.1,...,94.10,97.64,15.352941,2.0,31.0,,,,0.0,10.882353
9043,2022,10,1,8.0,9.837963,2.5,14.9,7.923529,1.4,11.7,...,86.57,94.35,9.937500,0.0,22.0,,,,0.0,26.294737


In [39]:
print(dfAgg.isna().sum().sum())

10740


In [40]:
dfAgg[["CRnum"]] = dfAgg[["CRnum"]].astype(int)

In [41]:
dfR1 = dfAgg[dfAgg["CRnum"] == 1]
dfR2 = dfAgg[dfAgg["CRnum"] == 2]
dfR3 = dfAgg[dfAgg["CRnum"] == 3]
dfR4 = dfAgg[dfAgg["CRnum"] == 4]
dfR5 = dfAgg[dfAgg["CRnum"] == 5]
dfR6 = dfAgg[dfAgg["CRnum"] == 6]
dfR7 = dfAgg[dfAgg["CRnum"] == 7]
dfR8 = dfAgg[dfAgg["CRnum"] == 8]
dfR9 = dfAgg[dfAgg["CRnum"] == 9]
dfR10 = dfAgg[dfAgg["CRnum"] == 10]

In [42]:
dfR1

Unnamed: 0,Year,Month,Day,CRnum,MeanTemp,MinTemp,MaxTemp,MeanDewPoint,MinDewPoint,MaxDewPoint,...,MinPressure,MaxPressure,MeanWindSpeed,MinWindSpeed,MaxWindSpeed,MeanWindChill,MinWindChill,MaxWindChill,TotalPrecip,MeanWindDirection
0,2020,1,1,1,-6.570833,-13.3,2.0,-8.927083,-15.0,-1.4,...,94.17,96.75,10.305556,0.0,27.0,-11.260762,-21.0,-2.0,0.0,19.365741
9,2020,1,2,1,-6.881944,-10.7,-2.5,-9.021875,-14.6,-2.8,...,94.84,97.78,12.472222,0.0,28.0,-11.897544,-18.0,-4.0,0.3,28.689815
18,2020,1,3,1,-4.228704,-10.8,-0.3,-6.389062,-12.4,-2.5,...,96.09,98.81,7.768519,0.0,25.0,-7.321128,-15.0,-1.0,0.1,23.078704
27,2020,1,4,1,-4.148611,-6.7,-1.7,-6.450521,-8.5,-4.0,...,95.45,98.80,11.972222,0.0,36.0,-8.357287,-13.0,-3.0,2.1,18.152778
36,2020,1,5,1,-4.368519,-14.5,2.7,-8.273438,-16.3,-1.6,...,95.43,98.52,21.166667,3.0,55.0,-10.470928,-21.0,-5.0,1.5,26.615741
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9000,2022,9,27,1,6.306208,-2.7,15.3,-0.154436,-9.2,5.5,...,97.80,100.18,5.202083,0.0,14.0,-2.411111,-6.0,-1.0,0.0,13.861667
9009,2022,9,28,1,10.825172,-2.6,22.3,1.149788,-3.6,9.0,...,97.33,100.13,16.521362,1.0,43.0,-1.687500,-4.0,-1.0,0.0,17.240454
9018,2022,9,29,1,14.417083,4.5,25.3,4.870000,-0.2,11.1,...,97.00,99.26,11.883333,0.0,32.0,,,,0.0,15.858333
9027,2022,9,30,1,12.434300,3.8,22.5,7.618841,3.0,10.9,...,97.21,99.57,9.946860,0.0,26.0,,,,0.2,7.502415


In [43]:
dfR2

Unnamed: 0,Year,Month,Day,CRnum,MeanTemp,MinTemp,MaxTemp,MeanDewPoint,MinDewPoint,MaxDewPoint,...,MinPressure,MaxPressure,MeanWindSpeed,MinWindSpeed,MaxWindSpeed,MeanWindChill,MinWindChill,MaxWindChill,TotalPrecip,MeanWindDirection
1,2020,1,1,2,-5.825833,-12.7,0.1,-7.290000,-13.8,-3.8,...,92.28,95.69,8.750000,0.0,24.0,-9.386322,-18.0,-3.0,0.0,22.458333
10,2020,1,2,2,-6.037500,-9.9,-1.2,-7.633333,-11.7,-2.8,...,92.88,96.78,10.687500,1.0,26.0,-10.375000,-14.0,-6.0,0.1,23.812500
19,2020,1,3,2,-2.943333,-8.7,-0.3,-5.090833,-10.1,-2.3,...,93.98,97.89,14.322917,1.0,30.0,-6.958333,-12.0,-2.0,0.2,26.156250
28,2020,1,4,2,-5.110833,-9.8,-2.5,-6.622500,-10.7,-4.4,...,93.24,97.84,11.541667,3.0,30.0,-9.312500,-15.0,-5.0,0.4,13.770833
37,2020,1,5,2,-5.234167,-14.3,-0.5,-9.792500,-16.0,-3.8,...,93.47,97.64,23.656250,2.0,42.0,-11.385417,-19.0,-6.0,0.9,26.896739
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9001,2022,9,27,2,8.476667,-0.1,19.2,2.930000,-1.9,6.4,...,95.90,99.20,8.875000,0.0,19.0,0.000000,0.0,0.0,0.0,15.827536
9010,2022,9,28,2,13.956667,4.3,24.2,2.456667,-3.3,6.9,...,95.11,98.93,13.558333,0.0,31.0,,,,0.4,17.449510
9019,2022,9,29,2,15.401667,6.0,25.5,7.435833,3.8,11.8,...,95.06,98.22,9.916667,1.0,34.0,,,,0.0,15.578623
9028,2022,9,30,2,10.712174,5.7,17.3,9.201063,3.5,12.5,...,95.42,99.00,14.426087,3.0,30.0,,,,0.0,13.826087


In [44]:
dfR1.to_csv("Regions/region1.csv", index=False)
dfR2.to_csv("Regions/region2.csv", index=False)
dfR3.to_csv("Regions/region3.csv", index=False)
dfR4.to_csv("Regions/region4.csv", index=False)
dfR5.to_csv("Regions/region5.csv", index=False)
dfR6.to_csv("Regions/region6.csv", index=False)
dfR7.to_csv("Regions/region7.csv", index=False)
dfR8.to_csv("Regions/region8.csv", index=False)
dfR9.to_csv("Regions/region9.csv", index=False)
dfR10.to_csv("Regions/region10.csv", index=False)