In [541]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import torch
from models.architectures.temperature_prediction.simple_model import ModelSimple
from models.architectures.temperature_prediction.model_v2 import Modelv2
from models.architectures.temperature_prediction.model_v3 import Modelv3
from models.architectures.temperature_prediction.model_v4 import Modelv4
from models.architectures.temperature_prediction.model_v5 import Modelv5
from models.architectures.temperature_prediction.model_v6 import Modelv6
from sklearn.preprocessing import MinMaxScaler

from torch.utils.data import Dataset, DataLoader

In [542]:
df = pd.read_csv("Data/data_concatenated/Seattle_concatenated.csv")
df["datetime"] = pd.to_datetime(df["datetime"])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45253 entries, 0 to 45252
Data columns (total 7 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   datetime             45253 non-null  datetime64[ns]
 1   humidity             44964 non-null  float64       
 2   pressure             45240 non-null  float64       
 3   temperature          45250 non-null  float64       
 4   weather_description  45252 non-null  object        
 5   wind_direction       45252 non-null  float64       
 6   wind_speed           45252 non-null  float64       
dtypes: datetime64[ns](1), float64(5), object(1)
memory usage: 2.4+ MB


In [543]:
df

Unnamed: 0,datetime,humidity,pressure,temperature,weather_description,wind_direction,wind_speed
0,2012-10-01 12:00:00,,,,,,
1,2012-10-01 13:00:00,81.0,1027.0,281.800000,sky is clear,0.0,0.0
2,2012-10-01 14:00:00,80.0,1027.0,281.797217,sky is clear,2.0,0.0
3,2012-10-01 15:00:00,80.0,1028.0,281.789833,sky is clear,10.0,0.0
4,2012-10-01 16:00:00,79.0,1028.0,281.782449,sky is clear,17.0,0.0
...,...,...,...,...,...,...,...
45248,2017-11-29 20:00:00,93.0,1030.0,280.820000,light rain,190.0,4.0
45249,2017-11-29 21:00:00,87.0,1030.0,281.650000,overcast clouds,130.0,5.0
45250,2017-11-29 22:00:00,93.0,1029.0,282.750000,broken clouds,210.0,5.0
45251,2017-11-29 23:00:00,87.0,1028.0,282.960000,broken clouds,210.0,3.0


In [544]:
df["date"] = pd.to_datetime(df["datetime"].dt.date)
df = df.drop(columns = ['weather_description', 'pressure', 'humidity', 'wind_direction', 'wind_speed'])
df = df.drop([df.index[0], df.index[-1]])
df

Unnamed: 0,datetime,temperature,date
1,2012-10-01 13:00:00,281.800000,2012-10-01
2,2012-10-01 14:00:00,281.797217,2012-10-01
3,2012-10-01 15:00:00,281.789833,2012-10-01
4,2012-10-01 16:00:00,281.782449,2012-10-01
5,2012-10-01 17:00:00,281.775065,2012-10-01
...,...,...,...
45247,2017-11-29 19:00:00,280.030000,2017-11-29
45248,2017-11-29 20:00:00,280.820000,2017-11-29
45249,2017-11-29 21:00:00,281.650000,2017-11-29
45250,2017-11-29 22:00:00,282.750000,2017-11-29


In [545]:
df['min_temperature'] = df.groupby('date')['temperature'].transform('min')
df['max_temperature'] = df.groupby('date')['temperature'].transform('max')
df['std_temperature'] = df.groupby('date')['temperature'].transform('std')
df

Unnamed: 0,datetime,temperature,date,min_temperature,max_temperature,std_temperature
1,2012-10-01 13:00:00,281.800000,2012-10-01,281.73076,281.80,0.023827
2,2012-10-01 14:00:00,281.797217,2012-10-01,281.73076,281.80,0.023827
3,2012-10-01 15:00:00,281.789833,2012-10-01,281.73076,281.80,0.023827
4,2012-10-01 16:00:00,281.782449,2012-10-01,281.73076,281.80,0.023827
5,2012-10-01 17:00:00,281.775065,2012-10-01,281.73076,281.80,0.023827
...,...,...,...,...,...,...
45247,2017-11-29 19:00:00,280.030000,2017-11-29,277.96000,282.96,1.385492
45248,2017-11-29 20:00:00,280.820000,2017-11-29,277.96000,282.96,1.385492
45249,2017-11-29 21:00:00,281.650000,2017-11-29,277.96000,282.96,1.385492
45250,2017-11-29 22:00:00,282.750000,2017-11-29,277.96000,282.96,1.385492


In [546]:
df = df.groupby(by="date").mean().drop(columns=["datetime"]).reset_index()
df["day_of_year"] = df["date"].dt.day_of_year
df = df.drop(columns = ['date'])
df

Unnamed: 0,temperature,min_temperature,max_temperature,std_temperature,day_of_year
0,281.767262,281.73076,281.80,0.023827,275
1,285.156888,281.62000,296.11,5.396786,276
2,287.673958,283.23000,295.24,3.320187,277
3,284.391667,279.15000,290.05,3.450668,278
4,284.756250,280.08000,290.21,3.453840,279
...,...,...,...,...,...
1881,280.994167,277.65000,282.95,1.603032,329
1882,282.576667,280.92000,285.29,1.506365,330
1883,279.854167,278.34000,282.55,1.240375,331
1884,280.094167,279.15000,282.65,0.651940,332


In [547]:
df["day_of_year_radians"] = 2 * np.pi * df["day_of_year"] / 365.25
df['day_of_year_sin'] = np.sin(df["day_of_year_radians"])
df['day_of_year_cos'] = np.cos(df["day_of_year_radians"])
df = df.drop(columns = ['day_of_year', 'day_of_year_radians'])
df

Unnamed: 0,temperature,min_temperature,max_temperature,std_temperature,day_of_year_sin,day_of_year_cos
0,281.767262,281.73076,281.80,0.023827,-0.999833,0.018277
1,285.156888,281.62000,296.11,5.396786,-0.999371,0.035473
2,287.673958,283.23000,295.24,3.320187,-0.998613,0.052658
3,284.391667,279.15000,290.05,3.450668,-0.997559,0.069828
4,284.756250,280.08000,290.21,3.453840,-0.996210,0.086977
...,...,...,...,...,...,...
1881,280.994167,277.65000,282.95,1.603032,-0.583951,0.811789
1882,282.576667,280.92000,285.29,1.506365,-0.569901,0.821713
1883,279.854167,278.34000,282.55,1.240375,-0.555682,0.831395
1884,280.094167,279.15000,282.65,0.651940,-0.541298,0.840831


In [548]:
df_proposition = df
df_proposition['target_temperature'] = df_proposition['temperature'].shift(-4)
df_proposition['middleDay_temperature'] = df_proposition['temperature'].shift(-3)
df_proposition

Unnamed: 0,temperature,min_temperature,max_temperature,std_temperature,day_of_year_sin,day_of_year_cos,target_temperature,middleDay_temperature
0,281.767262,281.73076,281.80,0.023827,-0.999833,0.018277,284.756250,284.391667
1,285.156888,281.62000,296.11,5.396786,-0.999371,0.035473,285.674583,284.756250
2,287.673958,283.23000,295.24,3.320187,-0.998613,0.052658,286.183333,285.674583
3,284.391667,279.15000,290.05,3.450668,-0.997559,0.069828,287.144167,286.183333
4,284.756250,280.08000,290.21,3.453840,-0.996210,0.086977,286.707708,287.144167
...,...,...,...,...,...,...,...,...
1881,280.994167,277.65000,282.95,1.603032,-0.583951,0.811789,279.272917,280.094167
1882,282.576667,280.92000,285.29,1.506365,-0.569901,0.821713,,279.272917
1883,279.854167,278.34000,282.55,1.240375,-0.555682,0.831395,,
1884,280.094167,279.15000,282.65,0.651940,-0.541298,0.840831,,


In [549]:
features_to_expand = ['temperature','min_temperature', 'max_temperature', 'std_temperature']

for feature in features_to_expand:
    df_proposition[f'{feature}_1'] = df_proposition[feature]
    df_proposition[f'{feature}_2'] = df_proposition[feature].shift(-1) 
    df_proposition[f'{feature}_3'] = df_proposition[feature].shift(-2)
    df_proposition[f'{feature}_2'] = df_proposition[f'{feature}_2'] - df_proposition[f'{feature}_3']
    df_proposition[f'{feature}_1'] = df_proposition[f'{feature}_1'] - df_proposition[f'{feature}_3']

df_proposition = df_proposition.drop(columns=features_to_expand)

df_proposition = df_proposition[:-4]

df_proposition

Unnamed: 0,day_of_year_sin,day_of_year_cos,target_temperature,middleDay_temperature,temperature_1,temperature_2,temperature_3,min_temperature_1,min_temperature_2,min_temperature_3,max_temperature_1,max_temperature_2,max_temperature_3,std_temperature_1,std_temperature_2,std_temperature_3
0,-0.999833,0.018277,284.756250,284.391667,-5.906696,-2.517070,287.673958,-1.49924,-1.61,283.23,-13.44,0.87,295.24,-3.296360,2.076599,3.320187
1,-0.999371,0.035473,285.674583,284.756250,0.765222,3.282292,284.391667,2.47000,4.08,279.15,6.06,5.19,290.05,1.946118,-0.130481,3.450668
2,-0.998613,0.052658,286.183333,285.674583,2.917708,-0.364583,284.756250,3.15000,-0.93,280.08,5.03,-0.16,290.21,-0.133653,-0.003172,3.453840
3,-0.997559,0.069828,287.144167,286.183333,-1.282917,-0.918333,285.674583,-1.40000,-0.47,280.55,-2.80,-2.64,292.85,-0.773084,-0.769912,4.223752
4,-0.996210,0.086977,286.707708,287.144167,-1.427083,-0.508750,286.183333,0.54000,1.01,279.54,-4.57,-1.93,294.78,-1.744817,-0.974905,5.198657
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1877,-0.638384,0.769718,280.994167,282.076667,-8.940000,0.293333,287.532500,-9.09000,-0.73,284.44,-8.99,-0.08,291.74,-0.275658,-0.733905,2.401444
1878,-0.625050,0.780585,282.576667,280.994167,5.749167,5.455833,282.076667,2.87000,3.60,280.84,7.12,7.20,284.54,0.708338,1.442243,0.959201
1879,-0.611530,0.791221,279.854167,282.576667,6.538333,1.082500,280.994167,6.79000,3.19,277.65,8.79,1.59,282.95,0.798412,-0.643831,1.603032
1880,-0.597829,0.801624,280.094167,279.854167,-0.500000,-1.582500,282.576667,-0.08000,-3.27,280.92,-0.75,-2.34,285.29,-0.547164,0.096667,1.506365


In [550]:
scaler = MinMaxScaler()

normalized_df = df_proposition

normalized_df = pd.DataFrame(scaler.fit_transform(normalized_df), columns=normalized_df.columns)

normalized_df

Unnamed: 0,day_of_year_sin,day_of_year_cos,target_temperature,middleDay_temperature,temperature_1,temperature_2,temperature_3,min_temperature_1,min_temperature_2,min_temperature_3,max_temperature_1,max_temperature_2,max_temperature_3,std_temperature_1,std_temperature_2,std_temperature_3
0,0.000083,0.509136,0.548142,0.536459,0.138939,0.392220,0.641642,0.374572,0.392899,0.639634,0.000000,0.479665,0.668863,0.187553,0.695171,0.484595
1,0.000314,0.517734,0.577571,0.548142,0.427868,0.730895,0.536459,0.556133,0.724098,0.505459,0.710383,0.660797,0.526359,0.682258,0.496807,0.503639
2,0.000693,0.526327,0.593874,0.577571,0.521081,0.517922,0.548142,0.587238,0.432480,0.536043,0.672860,0.436478,0.530752,0.486001,0.508249,0.504102
3,0.001220,0.534912,0.624665,0.593874,0.339173,0.485584,0.577571,0.379111,0.459255,0.551500,0.387614,0.332495,0.603240,0.425661,0.439337,0.616474
4,0.001895,0.543486,0.610678,0.624665,0.332930,0.509503,0.593874,0.467851,0.545402,0.518285,0.323133,0.362264,0.656233,0.333964,0.420913,0.758765
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1877,0.180809,0.884862,0.427583,0.462273,0.007582,0.556344,0.637109,0.027354,0.444121,0.679426,0.162113,0.439832,0.572762,0.472601,0.442573,0.350500
1878,0.187476,0.890295,0.478296,0.427583,0.643698,0.857826,0.462273,0.574430,0.696158,0.561037,0.748998,0.745073,0.375069,0.565455,0.638158,0.139999
1879,0.194236,0.895614,0.391051,0.478296,0.677873,0.602430,0.427583,0.753739,0.672293,0.456130,0.809836,0.509853,0.331411,0.573955,0.450669,0.233969
1880,0.201087,0.900815,0.398742,0.391051,0.373077,0.446798,0.478296,0.439491,0.296275,0.563667,0.462295,0.345073,0.395662,0.446980,0.517222,0.219860


In [551]:
class WeatherDataset(Dataset):
    def __init__(self, data, targets):
        self.data = torch.tensor(data, dtype=torch.float32)
        self.targets = torch.tensor(targets, dtype=torch.float32)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.targets[idx]

In [552]:
data = normalized_df.drop(columns=['target_temperature', 'middleDay_temperature']).to_numpy()
targets = normalized_df['middleDay_temperature'].to_numpy()

# Split data into training and testing
train_data, test_data = data[:1400], data[1400:]
train_targets, test_targets = targets[:1400], targets[1400:]

# Create dataset and data loaders
train_dataset = WeatherDataset(train_data, train_targets)
test_dataset = WeatherDataset(test_data, test_targets)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [553]:
# Number of features
input_size = data.shape[1] 

# Initialize the model
model = Modelv6(input_size=input_size, learning_rate=0.0001)

# Train the model
model.train_model(train_loader, epochs=70)

model.test_model(test_loader)

prediction_middleDay = []
max_value = df_proposition['target_temperature'].max()
min_value = df_proposition['target_temperature'].min()

for row_idx in range(len(test_data)):
    prediction = model.predict(torch.tensor(test_data[row_idx], dtype=torch.float32).unsqueeze(0))
    prediction_middleDay.append(prediction)

Epoch 1/70, Loss: 0.0484
Epoch 10/70, Loss: 0.0330
Epoch 20/70, Loss: 0.0163
Epoch 30/70, Loss: 0.0017
Epoch 40/70, Loss: 0.0016
Epoch 50/70, Loss: 0.0015
Epoch 60/70, Loss: 0.0015
Epoch 70/70, Loss: 0.0015
Test Loss: 0.0016


In [554]:
data = normalized_df[['middleDay_temperature','day_of_year_sin','day_of_year_cos']].to_numpy()
targets = normalized_df['target_temperature'].to_numpy()

# Split data into training and testing
train_data, test_data = data[:1400], data[1400:]
train_targets, test_targets = targets[:1400], targets[1400:]

# Create dataset and data loaders
train_dataset = WeatherDataset(train_data, train_targets)
test_dataset = WeatherDataset(test_data, test_targets)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [555]:
# Number of features
input_size = data.shape[1] 

# Initialize the model
model_middleDay = Modelv6(input_size=input_size, learning_rate=0.0001)

# Train the model
model_middleDay.train_model(train_loader, epochs=70)

model_middleDay.test_model(test_loader)

Epoch 1/70, Loss: 0.0803
Epoch 10/70, Loss: 0.0136
Epoch 20/70, Loss: 0.0022
Epoch 30/70, Loss: 0.0018
Epoch 40/70, Loss: 0.0017
Epoch 50/70, Loss: 0.0016
Epoch 60/70, Loss: 0.0016
Epoch 70/70, Loss: 0.0015
Test Loss: 0.0018


0.0017694888018888973

In [556]:
test_data[:, 0] = prediction_middleDay

In [557]:
absolute_differences = []
max_value = df_proposition['target_temperature'].max()
min_value = df_proposition['target_temperature'].min()

for row_idx in range(len(test_data)):
    prediction = model_middleDay.predict(torch.tensor(test_data[row_idx], dtype=torch.float32).unsqueeze(0))

    unnormalized_prediction = prediction * (max_value - min_value) + min_value
    unnormalized_target = test_targets[row_idx] * (max_value - min_value) + min_value
    
    absolute_difference = abs(unnormalized_prediction - unnormalized_target)
    absolute_differences.append(absolute_difference)

# Optionally, convert the differences into a NumPy array or Tensor
absolute_differences = torch.tensor(absolute_differences)

# Print summary
print(f"Mean Absolute Difference: {torch.mean(absolute_differences)}")

Mean Absolute Difference: 1.9405996704773667
