In [237]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import torch
from models.architectures.temperature_prediction.simple_model import ModelSimple
from models.architectures.temperature_prediction.model_v2 import Modelv2
from models.architectures.temperature_prediction.model_v3 import Modelv3
from models.architectures.temperature_prediction.model_v4 import Modelv4
from models.architectures.temperature_prediction.model_v5 import Modelv5
from models.architectures.temperature_prediction.model_v6 import Modelv6
from models.architectures.temperature_prediction.model_v7 import Modelv7
from sklearn.preprocessing import MinMaxScaler

from torch.utils.data import Dataset, DataLoader

In [238]:
df = pd.read_csv("Data/data_concatenated/Albuquerque_concatenated.csv")
df["datetime"] = pd.to_datetime(df["datetime"])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45253 entries, 0 to 45252
Data columns (total 7 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   datetime             45253 non-null  datetime64[ns]
 1   humidity             44543 non-null  float64       
 2   pressure             44797 non-null  float64       
 3   temperature          45252 non-null  float64       
 4   weather_description  45252 non-null  object        
 5   wind_direction       45252 non-null  float64       
 6   wind_speed           45249 non-null  float64       
dtypes: datetime64[ns](1), float64(5), object(1)
memory usage: 2.4+ MB


In [239]:
df

Unnamed: 0,datetime,humidity,pressure,temperature,weather_description,wind_direction,wind_speed
0,2012-10-01 12:00:00,,,,,,
1,2012-10-01 13:00:00,50.0,1024.0,285.120000,sky is clear,360.0,4.0
2,2012-10-01 14:00:00,49.0,1024.0,285.154558,sky is clear,360.0,4.0
3,2012-10-01 15:00:00,49.0,1024.0,285.233952,sky is clear,360.0,4.0
4,2012-10-01 16:00:00,49.0,1024.0,285.313345,sky is clear,360.0,4.0
...,...,...,...,...,...,...,...
45248,2017-11-29 20:00:00,37.0,1025.0,285.720000,broken clouds,217.0,1.0
45249,2017-11-29 21:00:00,34.0,1024.0,286.450000,broken clouds,217.0,1.0
45250,2017-11-29 22:00:00,32.0,1024.0,286.440000,broken clouds,190.0,1.0
45251,2017-11-29 23:00:00,30.0,1024.0,286.140000,overcast clouds,200.0,1.0


In [240]:
df["date"] = pd.to_datetime(df["datetime"].dt.date)
df = df.drop(columns = ['weather_description',  'wind_direction','humidity','pressure', 'wind_speed'])
df = df.drop([df.index[0], df.index[-1]])
df

Unnamed: 0,datetime,temperature,date
1,2012-10-01 13:00:00,285.120000,2012-10-01
2,2012-10-01 14:00:00,285.154558,2012-10-01
3,2012-10-01 15:00:00,285.233952,2012-10-01
4,2012-10-01 16:00:00,285.313345,2012-10-01
5,2012-10-01 17:00:00,285.392738,2012-10-01
...,...,...,...
45247,2017-11-29 19:00:00,283.210000,2017-11-29
45248,2017-11-29 20:00:00,285.720000,2017-11-29
45249,2017-11-29 21:00:00,286.450000,2017-11-29
45250,2017-11-29 22:00:00,286.440000,2017-11-29


In [241]:
df['min_temperature'] = df.groupby('date')['temperature'].transform('min')
df['max_temperature'] = df.groupby('date')['temperature'].transform('max')
df['std_temperature'] = df.groupby('date')['temperature'].transform('std')
df

Unnamed: 0,datetime,temperature,date,min_temperature,max_temperature,std_temperature
1,2012-10-01 13:00:00,285.120000,2012-10-01,285.12,285.869099,0.256826
2,2012-10-01 14:00:00,285.154558,2012-10-01,285.12,285.869099,0.256826
3,2012-10-01 15:00:00,285.233952,2012-10-01,285.12,285.869099,0.256826
4,2012-10-01 16:00:00,285.313345,2012-10-01,285.12,285.869099,0.256826
5,2012-10-01 17:00:00,285.392738,2012-10-01,285.12,285.869099,0.256826
...,...,...,...,...,...,...
45247,2017-11-29 19:00:00,283.210000,2017-11-29,269.17,286.450000,5.551446
45248,2017-11-29 20:00:00,285.720000,2017-11-29,269.17,286.450000,5.551446
45249,2017-11-29 21:00:00,286.450000,2017-11-29,269.17,286.450000,5.551446
45250,2017-11-29 22:00:00,286.440000,2017-11-29,269.17,286.450000,5.551446


In [242]:
df = df.groupby(by="date").mean().drop(columns=["datetime"]).reset_index()
df["day_of_year"] = df["date"].dt.day_of_year
df = df.drop(columns = ['date'])
df

Unnamed: 0,temperature,min_temperature,max_temperature,std_temperature,day_of_year
0,285.476208,285.120000,285.869099,0.256826,275
1,289.771821,285.948492,298.060000,4.684044,276
2,291.205417,284.320000,298.900000,4.563974,277
3,293.096250,286.090000,301.410000,5.250096,278
4,292.829167,285.290000,300.210000,5.008357,279
...,...,...,...,...,...
1881,282.564583,274.680000,293.150000,6.100497,329
1882,282.211250,273.310000,293.470000,6.962903,330
1883,281.325833,272.010000,294.260000,7.731655,331
1884,283.251667,277.440000,291.930000,3.571754,332


In [243]:
df["day_of_year_radians"] = 2 * np.pi * df["day_of_year"] / 365.25
df['day_of_year_sin'] = np.sin(df["day_of_year_radians"])
df['day_of_year_cos'] = np.cos(df["day_of_year_radians"])
df = df.drop(columns = ['day_of_year', 'day_of_year_radians'])
df

Unnamed: 0,temperature,min_temperature,max_temperature,std_temperature,day_of_year_sin,day_of_year_cos
0,285.476208,285.120000,285.869099,0.256826,-0.999833,0.018277
1,289.771821,285.948492,298.060000,4.684044,-0.999371,0.035473
2,291.205417,284.320000,298.900000,4.563974,-0.998613,0.052658
3,293.096250,286.090000,301.410000,5.250096,-0.997559,0.069828
4,292.829167,285.290000,300.210000,5.008357,-0.996210,0.086977
...,...,...,...,...,...,...
1881,282.564583,274.680000,293.150000,6.100497,-0.583951,0.811789
1882,282.211250,273.310000,293.470000,6.962903,-0.569901,0.821713
1883,281.325833,272.010000,294.260000,7.731655,-0.555682,0.831395
1884,283.251667,277.440000,291.930000,3.571754,-0.541298,0.840831


In [244]:
df_proposition = df
df_proposition['target_temperature'] = df_proposition['temperature'].shift(-4)
df_proposition

Unnamed: 0,temperature,min_temperature,max_temperature,std_temperature,day_of_year_sin,day_of_year_cos,target_temperature
0,285.476208,285.120000,285.869099,0.256826,-0.999833,0.018277,292.829167
1,289.771821,285.948492,298.060000,4.684044,-0.999371,0.035473,292.885417
2,291.205417,284.320000,298.900000,4.563974,-0.998613,0.052658,288.764792
3,293.096250,286.090000,301.410000,5.250096,-0.997559,0.069828,281.987500
4,292.829167,285.290000,300.210000,5.008357,-0.996210,0.086977,284.581667
...,...,...,...,...,...,...,...
1881,282.564583,274.680000,293.150000,6.100497,-0.583951,0.811789,277.646250
1882,282.211250,273.310000,293.470000,6.962903,-0.569901,0.821713,
1883,281.325833,272.010000,294.260000,7.731655,-0.555682,0.831395,
1884,283.251667,277.440000,291.930000,3.571754,-0.541298,0.840831,


In [245]:
features_to_expand = ['temperature' , 'min_temperature','max_temperature','std_temperature']

for feature in features_to_expand:
    df_proposition[f'{feature}_1'] = df_proposition[feature]
    df_proposition[f'{feature}_2'] = df_proposition[feature].shift(-1) 
    df_proposition[f'{feature}_3'] = df_proposition[feature].shift(-2)
    df_proposition[f'{feature}_2'] = df_proposition[f'{feature}_2'] - df_proposition[f'{feature}_3']
    df_proposition[f'{feature}_1'] = df_proposition[f'{feature}_1'] - df_proposition[f'{feature}_3']

df_proposition = df_proposition.drop(columns=features_to_expand)

df_proposition = df_proposition[:-4]

df_proposition

Unnamed: 0,day_of_year_sin,day_of_year_cos,target_temperature,temperature_1,temperature_2,temperature_3,min_temperature_1,min_temperature_2,min_temperature_3,max_temperature_1,max_temperature_2,max_temperature_3,std_temperature_1,std_temperature_2,std_temperature_3
0,-0.999833,0.018277,292.829167,-5.729209,-1.433596,291.205417,0.800000,1.628492,284.32,-13.030901,-0.84,298.90,-4.307148,0.120070,4.563974
1,-0.999371,0.035473,292.885417,-3.324429,-1.890833,293.096250,-0.141508,-1.770000,286.09,-3.350000,-2.51,301.41,-0.566052,-0.686123,5.250096
2,-0.998613,0.052658,288.764792,-1.623750,0.267083,292.829167,-0.970000,0.800000,285.29,-1.310000,1.20,300.21,-0.444383,0.241740,5.008357
3,-0.997559,0.069828,281.987500,0.210833,-0.056250,292.885417,-0.730000,-1.530000,286.82,2.600000,1.40,298.81,1.466218,1.224478,3.783879
4,-0.996210,0.086977,284.581667,4.064375,4.120625,288.764792,5.310000,6.840000,279.98,3.700000,2.30,296.51,-0.423566,-1.648044,5.431922
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1877,-0.638384,0.769718,282.564583,1.245000,-0.858333,281.507083,2.990000,-2.790000,273.82,-1.520000,-3.03,293.20,-2.149315,-0.638640,6.855511
1878,-0.625050,0.780585,282.211250,-1.066667,-0.208333,281.715417,-1.250000,1.540000,272.28,-4.040000,-1.01,294.21,-1.328788,-0.690149,7.545660
1879,-0.611530,0.791221,281.325833,-1.057500,-0.849167,282.564583,-0.860000,-2.400000,274.68,0.050000,1.06,293.15,0.755014,1.445163,6.100497
1880,-0.597829,0.801624,283.251667,-0.495833,0.353333,282.211250,-1.030000,1.370000,273.31,0.740000,-0.32,293.47,0.582757,-0.862406,6.962903


In [246]:
scaler = MinMaxScaler()

normalized_df = df_proposition

normalized_df = pd.DataFrame(scaler.fit_transform(normalized_df), columns=normalized_df.columns)

normalized_df

Unnamed: 0,day_of_year_sin,day_of_year_cos,target_temperature,temperature_1,temperature_2,temperature_3,min_temperature_1,min_temperature_2,min_temperature_3,max_temperature_1,max_temperature_2,max_temperature_3,std_temperature_1,std_temperature_2,std_temperature_3
0,0.000083,0.509136,0.668474,0.232609,0.265340,0.633875,0.498705,0.490598,0.548829,0.104957,0.392899,0.700823,0.109121,0.530104,0.473568
1,0.000314,0.517734,0.669672,0.321247,0.245260,0.674165,0.471900,0.378703,0.582009,0.377943,0.329887,0.755199,0.411122,0.466793,0.544762
2,0.000693,0.526327,0.581870,0.383933,0.340029,0.668474,0.448313,0.463320,0.567013,0.435468,0.469871,0.729203,0.420944,0.539659,0.519679
3,0.001220,0.534912,0.437460,0.451553,0.325830,0.669672,0.455146,0.386605,0.595693,0.545724,0.477418,0.698873,0.575178,0.616834,0.392624
4,0.001895,0.543486,0.492737,0.593591,0.509267,0.581870,0.627103,0.662188,0.467473,0.576742,0.511376,0.649047,0.422624,0.391252,0.563629
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1877,0.180809,0.884862,0.449757,0.489672,0.290604,0.427224,0.561053,0.345119,0.352000,0.429546,0.310267,0.577340,0.283312,0.470522,0.711344
1878,0.187476,0.890295,0.442228,0.404466,0.319150,0.431663,0.440342,0.487685,0.323132,0.358486,0.386485,0.599220,0.349550,0.466477,0.782955
1879,0.194236,0.895614,0.423362,0.404804,0.291007,0.449757,0.451445,0.357960,0.368121,0.473818,0.464589,0.576256,0.517766,0.634165,0.633002
1880,0.201087,0.900815,0.464397,0.425506,0.343817,0.442228,0.446605,0.482088,0.342440,0.493275,0.412519,0.583189,0.503860,0.452949,0.722487


In [247]:
class WeatherDataset(Dataset):
    def __init__(self, data, targets):
        self.data = torch.tensor(data, dtype=torch.float32)
        self.targets = torch.tensor(targets, dtype=torch.float32)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.targets[idx]

In [248]:
data = normalized_df.drop(columns=['target_temperature']).to_numpy()
targets = normalized_df['target_temperature'].to_numpy()

train_data, test_data = data[:1400], data[1400:]
train_targets, test_targets = targets[:1400], targets[1400:]

train_dataset = WeatherDataset(train_data, train_targets)
test_dataset = WeatherDataset(test_data, test_targets)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

ModelSimple

Model v2

In [249]:
input_size = data.shape[1] 

model = Modelv2(input_size=input_size, learning_rate=0.0001)

model.train_model(train_loader, epochs=70)

model.test_model(test_loader)

absolute_differences = []
max_value = df_proposition['target_temperature'].max()
min_value = df_proposition['target_temperature'].min()

for row_idx in range(len(test_data)):
    prediction = model.predict(torch.tensor(test_data[row_idx], dtype=torch.float32).unsqueeze(0))

    unnormalized_prediction = prediction * (max_value - min_value) + min_value
    unnormalized_target = test_targets[row_idx] * (max_value - min_value) + min_value
    
    absolute_difference = abs(unnormalized_prediction - unnormalized_target)
    absolute_differences.append(absolute_difference)

absolute_differences = torch.tensor(absolute_differences)

print(f"Mean Absolute Difference: {torch.mean(absolute_differences)}")

Epoch 1/70, Loss: 0.6342
Epoch 10/70, Loss: 0.0226
Epoch 20/70, Loss: 0.0050
Epoch 30/70, Loss: 0.0046
Epoch 40/70, Loss: 0.0044
Epoch 50/70, Loss: 0.0043
Epoch 60/70, Loss: 0.0043
Epoch 70/70, Loss: 0.0043
Test Loss: 0.0044
Mean Absolute Difference: 2.491658622698739


In [250]:
input_size = data.shape[1] 

model = Modelv3(input_size=input_size, learning_rate=0.0001)

model.train_model(train_loader, epochs=70)

model.test_model(test_loader)

absolute_differences = []
max_value = df_proposition['target_temperature'].max()
min_value = df_proposition['target_temperature'].min()

for row_idx in range(len(test_data)):
    prediction = model.predict(torch.tensor(test_data[row_idx], dtype=torch.float32).unsqueeze(0))

    unnormalized_prediction = prediction * (max_value - min_value) + min_value
    unnormalized_target = test_targets[row_idx] * (max_value - min_value) + min_value
    
    absolute_difference = abs(unnormalized_prediction - unnormalized_target)
    absolute_differences.append(absolute_difference)

absolute_differences = torch.tensor(absolute_differences)

print(f"Mean Absolute Difference: {torch.mean(absolute_differences)}")

Epoch 1/70, Loss: 0.2736
Epoch 10/70, Loss: 0.0044
Epoch 20/70, Loss: 0.0042
Epoch 30/70, Loss: 0.0040
Epoch 40/70, Loss: 0.0039
Epoch 50/70, Loss: 0.0039
Epoch 60/70, Loss: 0.0039
Epoch 70/70, Loss: 0.0038
Test Loss: 0.0040
Mean Absolute Difference: 2.3465519087222932


In [251]:
input_size = data.shape[1] 

model = Modelv4(input_size=input_size, learning_rate=0.0001)

model.train_model(train_loader, epochs=70)

model.test_model(test_loader)

absolute_differences = []
max_value = df_proposition['target_temperature'].max()
min_value = df_proposition['target_temperature'].min()

for row_idx in range(len(test_data)):
    prediction = model.predict(torch.tensor(test_data[row_idx], dtype=torch.float32).unsqueeze(0))

    unnormalized_prediction = prediction * (max_value - min_value) + min_value
    unnormalized_target = test_targets[row_idx] * (max_value - min_value) + min_value
    
    absolute_difference = abs(unnormalized_prediction - unnormalized_target)
    absolute_differences.append(absolute_difference)

absolute_differences = torch.tensor(absolute_differences)

print(f"Mean Absolute Difference: {torch.mean(absolute_differences)}")

Epoch 1/70, Loss: 0.4473
Epoch 10/70, Loss: 0.0049
Epoch 20/70, Loss: 0.0043
Epoch 30/70, Loss: 0.0042
Epoch 40/70, Loss: 0.0042
Epoch 50/70, Loss: 0.0041
Epoch 60/70, Loss: 0.0040
Epoch 70/70, Loss: 0.0040
Test Loss: 0.0043
Mean Absolute Difference: 2.479905145936989


In [256]:
input_size = data.shape[1] 

model = Modelv5(input_size=input_size, learning_rate=0.0001)

model.train_model(train_loader, epochs=70)

model.test_model(test_loader)

absolute_differences = []
max_value = df_proposition['target_temperature'].max()
min_value = df_proposition['target_temperature'].min()

for row_idx in range(len(test_data)):
    prediction = model.predict(torch.tensor(test_data[row_idx], dtype=torch.float32).unsqueeze(0))

    unnormalized_prediction = prediction * (max_value - min_value) + min_value
    unnormalized_target = test_targets[row_idx] * (max_value - min_value) + min_value
    
    absolute_difference = abs(unnormalized_prediction - unnormalized_target)
    absolute_differences.append(absolute_difference)

absolute_differences = torch.tensor(absolute_differences)

print(f"Mean Absolute Difference: {torch.mean(absolute_differences)}")

Epoch 1/70, Loss: 0.0583
Epoch 10/70, Loss: 0.0053
Epoch 20/70, Loss: 0.0045
Epoch 30/70, Loss: 0.0043
Epoch 40/70, Loss: 0.0042
Epoch 50/70, Loss: 0.0041
Epoch 60/70, Loss: 0.0041
Epoch 70/70, Loss: 0.0041
Test Loss: 0.0041
Mean Absolute Difference: 2.4052242909008674


In [258]:
input_size = data.shape[1] 

model = Modelv6(input_size=input_size, learning_rate=0.0001)

model.train_model(train_loader, epochs=70)

model.test_model(test_loader)

absolute_differences = []
max_value = df_proposition['target_temperature'].max()
min_value = df_proposition['target_temperature'].min()

for row_idx in range(len(test_data)):
    prediction = model.predict(torch.tensor(test_data[row_idx], dtype=torch.float32).unsqueeze(0))

    unnormalized_prediction = prediction * (max_value - min_value) + min_value
    unnormalized_target = test_targets[row_idx] * (max_value - min_value) + min_value
    
    absolute_difference = abs(unnormalized_prediction - unnormalized_target)
    absolute_differences.append(absolute_difference)

absolute_differences = torch.tensor(absolute_differences)

print(f"Mean Absolute Difference: {torch.mean(absolute_differences)}")

Epoch 1/70, Loss: 0.0325
Epoch 10/70, Loss: 0.0044
Epoch 20/70, Loss: 0.0022
Epoch 30/70, Loss: 0.0021
Epoch 40/70, Loss: 0.0021
Epoch 50/70, Loss: 0.0021
Epoch 60/70, Loss: 0.0021
Epoch 70/70, Loss: 0.0020
Test Loss: 0.0022
Mean Absolute Difference: 2.5549774234230322


In [264]:
input_size = data.shape[1] 

model = Modelv7(input_size=input_size, learning_rate=0.0001)

model.train_model(train_loader, epochs=70)

model.test_model(test_loader)

absolute_differences = []
max_value = df_proposition['target_temperature'].max()
min_value = df_proposition['target_temperature'].min()

for row_idx in range(len(test_data)):
    prediction = model.predict(torch.tensor(test_data[row_idx], dtype=torch.float32).unsqueeze(0))

    unnormalized_prediction = prediction * (max_value - min_value) + min_value
    unnormalized_target = test_targets[row_idx] * (max_value - min_value) + min_value
    
    absolute_difference = abs(unnormalized_prediction - unnormalized_target)
    absolute_differences.append(absolute_difference)

absolute_differences = torch.tensor(absolute_differences)

print(f"Mean Absolute Difference: {torch.mean(absolute_differences)}")

Epoch 1/70, Loss: 0.0580
Epoch 10/70, Loss: 0.0097
Epoch 20/70, Loss: 0.0060
Epoch 30/70, Loss: 0.0049
Epoch 40/70, Loss: 0.0043
Epoch 50/70, Loss: 0.0036
Epoch 60/70, Loss: 0.0033
Epoch 70/70, Loss: 0.0031
Test Loss: 0.0022
Mean Absolute Difference: 2.5099151454401842
