In [213]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import torch
from models.architectures.temperature_prediction.simple_model import ModelSimple
from models.architectures.temperature_prediction.model_v2 import Modelv2
from models.architectures.temperature_prediction.model_v3 import Modelv3
from models.architectures.weather_prediction.model_w1 import Modelw1
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score
from scipy import stats

from torch.utils.data import Dataset, DataLoader

In [214]:
df = pd.read_csv("Data/data_concatenated/Albuquerque_concatenated.csv")
df["datetime"] = pd.to_datetime(df["datetime"])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45253 entries, 0 to 45252
Data columns (total 7 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   datetime             45253 non-null  datetime64[ns]
 1   humidity             44543 non-null  float64       
 2   pressure             44797 non-null  float64       
 3   temperature          45252 non-null  float64       
 4   weather_description  45252 non-null  object        
 5   wind_direction       45252 non-null  float64       
 6   wind_speed           45249 non-null  float64       
dtypes: datetime64[ns](1), float64(5), object(1)
memory usage: 2.4+ MB


In [215]:
# TODO change object to num
df = df.drop(columns=["weather_description", "humidity"])

In [216]:
df.head(10)

Unnamed: 0,datetime,pressure,temperature,wind_direction,wind_speed
0,2012-10-01 12:00:00,,,,
1,2012-10-01 13:00:00,1024.0,285.12,360.0,4.0
2,2012-10-01 14:00:00,1024.0,285.154558,360.0,4.0
3,2012-10-01 15:00:00,1024.0,285.233952,360.0,4.0
4,2012-10-01 16:00:00,1024.0,285.313345,360.0,4.0
5,2012-10-01 17:00:00,1024.0,285.392738,360.0,4.0
6,2012-10-01 18:00:00,1024.0,285.472132,360.0,4.0
7,2012-10-01 19:00:00,1024.0,285.551525,360.0,4.0
8,2012-10-01 20:00:00,1024.0,285.630919,360.0,4.0
9,2012-10-01 21:00:00,1024.0,285.710312,360.0,4.0


In [217]:

df['datetime'] = pd.to_datetime(df['datetime'])

df['date'] = df['datetime'].dt.date

daily_max_wind = df.groupby('date')['wind_speed'].transform('max')

df['wind_max'] = daily_max_wind

df['wind_std'] = df.groupby('date')['wind_speed'].transform('std')

df = df.drop(columns=['date', 'pressure', 'temperature'])
df = df.drop([df.index[0], df.index[-1]])
df

Unnamed: 0,datetime,wind_direction,wind_speed,wind_max,wind_std
1,2012-10-01 13:00:00,360.0,4.0,4.0,0.000000
2,2012-10-01 14:00:00,360.0,4.0,4.0,0.000000
3,2012-10-01 15:00:00,360.0,4.0,4.0,0.000000
4,2012-10-01 16:00:00,360.0,4.0,4.0,0.000000
5,2012-10-01 17:00:00,360.0,4.0,4.0,0.000000
...,...,...,...,...,...
45247,2017-11-29 19:00:00,291.0,2.0,9.0,2.871335
45248,2017-11-29 20:00:00,217.0,1.0,9.0,2.871335
45249,2017-11-29 21:00:00,217.0,1.0,9.0,2.871335
45250,2017-11-29 22:00:00,190.0,1.0,9.0,2.871335


In [218]:
df["date"] = pd.to_datetime(df["datetime"].dt.date)
df = df.groupby(by="date").mean().drop(columns=["datetime"]).reset_index()

df["day_of_year"] = df["date"].dt.day_of_year

In [219]:
df

Unnamed: 0,date,wind_direction,wind_speed,wind_max,wind_std,day_of_year
0,2012-10-01,360.000000,4.000000,4.0,0.000000,275
1,2012-10-02,303.458333,3.166667,4.0,1.203859,276
2,2012-10-03,132.708333,2.958333,9.0,2.349453,277
3,2012-10-04,144.541667,1.916667,7.0,2.124734,278
4,2012-10-05,225.875000,2.583333,6.0,1.411649,279
...,...,...,...,...,...,...
1881,2017-11-25,211.375000,2.166667,4.0,0.816497,329
1882,2017-11-26,194.333333,1.541667,5.0,1.062367,330
1883,2017-11-27,168.458333,1.708333,4.0,0.954585,331
1884,2017-11-28,227.500000,3.708333,9.0,2.235663,332


In [220]:
df["day_of_year_radians"] = 2 * np.pi * df["day_of_year"] / 365.25
df['day_of_year_sin'] = np.sin(df["day_of_year_radians"])
df['day_of_year_cos'] = np.cos(df["day_of_year_radians"])
df

Unnamed: 0,date,wind_direction,wind_speed,wind_max,wind_std,day_of_year,day_of_year_radians,day_of_year_sin,day_of_year_cos
0,2012-10-01,360.000000,4.000000,4.0,0.000000,275,4.730667,-0.999833,0.018277
1,2012-10-02,303.458333,3.166667,4.0,1.203859,276,4.747869,-0.999371,0.035473
2,2012-10-03,132.708333,2.958333,9.0,2.349453,277,4.765071,-0.998613,0.052658
3,2012-10-04,144.541667,1.916667,7.0,2.124734,278,4.782274,-0.997559,0.069828
4,2012-10-05,225.875000,2.583333,6.0,1.411649,279,4.799476,-0.996210,0.086977
...,...,...,...,...,...,...,...,...,...
1881,2017-11-25,211.375000,2.166667,4.0,0.816497,329,5.659597,-0.583951,0.811789
1882,2017-11-26,194.333333,1.541667,5.0,1.062367,330,5.676800,-0.569901,0.821713
1883,2017-11-27,168.458333,1.708333,4.0,0.954585,331,5.694002,-0.555682,0.831395
1884,2017-11-28,227.500000,3.708333,9.0,2.235663,332,5.711205,-0.541298,0.840831


In [221]:
df["wind_direction_radians"] = 2 * np.pi * df["wind_direction"] / 360.00
df['wind_direction_sin'] = np.sin(df["wind_direction_radians"])
df['wind_direction_cos'] = np.cos(df["wind_direction_radians"])

In [222]:
df_proposition = df
df_proposition = df_proposition.drop(columns = ['day_of_year', 'day_of_year_radians', 'wind_direction_radians' , 'wind_direction','date'])
df_proposition

Unnamed: 0,wind_speed,wind_max,wind_std,day_of_year_sin,day_of_year_cos,wind_direction_sin,wind_direction_cos
0,4.000000,4.0,0.000000,-0.999833,0.018277,-2.449294e-16,1.000000
1,3.166667,4.0,1.203859,-0.999371,0.035473,-8.342870e-01,0.551330
2,2.958333,9.0,2.349453,-0.998613,0.052658,7.348160e-01,-0.678267
3,1.916667,7.0,2.124734,-0.997559,0.069828,5.801108e-01,-0.814538
4,2.583333,6.0,1.411649,-0.996210,0.086977,-7.178226e-01,-0.696226
...,...,...,...,...,...,...,...
1881,2.166667,4.0,0.816497,-0.583951,0.811789,-5.206372e-01,-0.853778
1882,1.541667,5.0,1.062367,-0.569901,0.821713,-2.475627e-01,-0.968872
1883,1.708333,4.0,0.954585,-0.555682,0.831395,2.000805e-01,-0.979779
1884,3.708333,9.0,2.235663,-0.541298,0.840831,-7.372773e-01,-0.675590


In [223]:
df_proposition['target_wind'] = df_proposition['wind_max'].shift(-4)
df_proposition

Unnamed: 0,wind_speed,wind_max,wind_std,day_of_year_sin,day_of_year_cos,wind_direction_sin,wind_direction_cos,target_wind
0,4.000000,4.0,0.000000,-0.999833,0.018277,-2.449294e-16,1.000000,6.0
1,3.166667,4.0,1.203859,-0.999371,0.035473,-8.342870e-01,0.551330,8.0
2,2.958333,9.0,2.349453,-0.998613,0.052658,7.348160e-01,-0.678267,7.0
3,1.916667,7.0,2.124734,-0.997559,0.069828,5.801108e-01,-0.814538,7.0
4,2.583333,6.0,1.411649,-0.996210,0.086977,-7.178226e-01,-0.696226,3.0
...,...,...,...,...,...,...,...,...
1881,2.166667,4.0,0.816497,-0.583951,0.811789,-5.206372e-01,-0.853778,9.0
1882,1.541667,5.0,1.062367,-0.569901,0.821713,-2.475627e-01,-0.968872,
1883,1.708333,4.0,0.954585,-0.555682,0.831395,2.000805e-01,-0.979779,
1884,3.708333,9.0,2.235663,-0.541298,0.840831,-7.372773e-01,-0.675590,


In [224]:
features_to_expand = ['wind_speed', 'wind_max', 'wind_std', 'wind_direction_sin', 'wind_direction_cos']

# Create shifted columns for each feature
for feature in features_to_expand:
    df_proposition[f'{feature}_1'] = df_proposition[feature]
    df_proposition[f'{feature}_2'] = df_proposition[feature].shift(-1)
    df_proposition[f'{feature}_3'] = df_proposition[feature].shift(-2)

# Drop the original columns that were expanded
df_proposition = df_proposition.drop(columns=features_to_expand)

df_proposition = df_proposition[:-4]

df_proposition

Unnamed: 0,day_of_year_sin,day_of_year_cos,target_wind,wind_speed_1,wind_speed_2,wind_speed_3,wind_max_1,wind_max_2,wind_max_3,wind_std_1,wind_std_2,wind_std_3,wind_direction_sin_1,wind_direction_sin_2,wind_direction_sin_3,wind_direction_cos_1,wind_direction_cos_2,wind_direction_cos_3
0,-0.999833,0.018277,6.0,4.000000,3.166667,2.958333,4.0,4.0,9.0,0.000000,1.203859,2.349453,-2.449294e-16,-0.834287,0.734816,1.000000,0.551330,-0.678267
1,-0.999371,0.035473,8.0,3.166667,2.958333,1.916667,4.0,9.0,7.0,1.203859,2.349453,2.124734,-8.342870e-01,0.734816,0.580111,0.551330,-0.678267,-0.814538
2,-0.998613,0.052658,7.0,2.958333,1.916667,2.583333,9.0,7.0,6.0,2.349453,2.124734,1.411649,7.348160e-01,0.580111,-0.717823,-0.678267,-0.814538,-0.696226
3,-0.997559,0.069828,7.0,1.916667,2.583333,3.333333,7.0,6.0,8.0,2.124734,1.411649,2.443566,5.801108e-01,-0.717823,-0.748956,-0.814538,-0.696226,-0.662620
4,-0.996210,0.086977,3.0,2.583333,3.333333,2.166667,6.0,8.0,7.0,1.411649,2.443566,1.857222,-7.178226e-01,-0.748956,0.479713,-0.696226,-0.662620,-0.877425
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1877,-0.638384,0.769718,4.0,3.208333,3.000000,1.416667,9.0,7.0,3.0,2.126012,2.431675,0.583592,-5.603981e-01,0.512543,-0.941422,-0.828223,-0.858662,-0.337232
1878,-0.625050,0.780585,5.0,3.000000,1.416667,1.333333,7.0,3.0,3.0,2.431675,0.583592,0.816497,5.125425e-01,-0.941422,-0.477798,-0.858662,-0.337232,-0.878470
1879,-0.611530,0.791221,4.0,1.416667,1.333333,2.166667,3.0,3.0,4.0,0.583592,0.816497,0.816497,-9.414215e-01,-0.477798,-0.520637,-0.337232,-0.878470,-0.853778
1880,-0.597829,0.801624,9.0,1.333333,2.166667,1.541667,3.0,4.0,5.0,0.816497,0.816497,1.062367,-4.777977e-01,-0.520637,-0.247563,-0.878470,-0.853778,-0.968872


## Regresja wiatru

In [225]:
scaler = MinMaxScaler()

normalized_df = pd.DataFrame(scaler.fit_transform(df_proposition), columns=df_proposition.columns)
normalized_df

Unnamed: 0,day_of_year_sin,day_of_year_cos,target_wind,wind_speed_1,wind_speed_2,wind_speed_3,wind_max_1,wind_max_2,wind_max_3,wind_std_1,wind_std_2,wind_std_3,wind_direction_sin_1,wind_direction_sin_2,wind_direction_sin_3,wind_direction_cos_1,wind_direction_cos_2,wind_direction_cos_3
0,0.000083,0.509136,0.238095,0.325843,0.250936,0.232210,0.142857,0.142857,0.380952,0.000000,0.235319,0.459248,0.500007,0.082857,0.867419,1.000000,0.795957,0.165075
1,0.000314,0.517734,0.333333,0.250936,0.232210,0.138577,0.142857,0.380952,0.285714,0.235319,0.459248,0.415322,0.082857,0.867419,0.790066,0.775665,0.165075,0.095157
2,0.000693,0.526327,0.285714,0.232210,0.138577,0.198502,0.380952,0.285714,0.238095,0.459248,0.415322,0.275935,0.867419,0.790066,0.141090,0.160867,0.095157,0.155860
3,0.001220,0.534912,0.285714,0.138577,0.198502,0.265918,0.285714,0.238095,0.333333,0.415322,0.275935,0.477644,0.790066,0.141090,0.125524,0.092731,0.155860,0.173103
4,0.001895,0.543486,0.095238,0.198502,0.265918,0.161049,0.238095,0.333333,0.285714,0.275935,0.477644,0.363032,0.141090,0.125524,0.739866,0.151887,0.173103,0.062890
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1877,0.180809,0.884862,0.142857,0.254682,0.235955,0.093633,0.380952,0.285714,0.095238,0.415572,0.475320,0.114075,0.219804,0.756281,0.029290,0.085888,0.072518,0.340053
1878,0.187476,0.890295,0.190476,0.235955,0.093633,0.086142,0.285714,0.095238,0.095238,0.475320,0.114075,0.159601,0.756281,0.029290,0.261104,0.070669,0.340053,0.062355
1879,0.194236,0.895614,0.142857,0.093633,0.086142,0.161049,0.095238,0.095238,0.142857,0.114075,0.159601,0.159601,0.029290,0.261104,0.239684,0.331384,0.062355,0.075023
1880,0.201087,0.900815,0.380952,0.086142,0.161049,0.104869,0.095238,0.142857,0.190476,0.159601,0.159601,0.207661,0.261104,0.239684,0.376224,0.060765,0.075023,0.015971


In [226]:
class WeatherDataset(Dataset):
    def __init__(self, data, targets):
        self.data = torch.tensor(data, dtype=torch.float32)
        self.targets = torch.tensor(targets, dtype=torch.float32)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.targets[idx]

In [227]:
data = normalized_df.drop(columns=['target_wind']).to_numpy()
targets = normalized_df['target_wind'].to_numpy()

# Split data into training and testing
train_data, test_data = data[:1400], data[1400:]
train_targets, test_targets = targets[:1400], targets[1400:]

# Create dataset and data loaders
train_dataset = WeatherDataset(train_data, train_targets)
test_dataset = WeatherDataset(test_data, test_targets)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [228]:
# Number of features
input_size = data.shape[1] 

# Initialize the model
model = Modelv2(input_size=input_size)

# Train the model
model.train_model(train_loader, epochs=40)

# Evaluate the model on the test set
model.test_model(test_loader)

absolute_differences = []
max_value = df_proposition['target_wind'].max()
min_value = df_proposition['target_wind'].min()

for row_idx in range(len(test_data)):
    prediction = model.predict(torch.tensor(test_data[row_idx], dtype=torch.float32).unsqueeze(0))

    unnormalized_prediction = prediction * (max_value - min_value) + min_value
    unnormalized_target = test_targets[row_idx] * (max_value - min_value) + min_value
    
    absolute_difference = abs(unnormalized_prediction - unnormalized_target)
    absolute_differences.append(absolute_difference)

# Optionally, convert the differences into a NumPy array or Tensor
absolute_differences = torch.tensor(absolute_differences)

# Print summary
print(f"Mean Absolute Difference: {torch.mean(absolute_differences)}")

absolute_differences_mean = []
mean_value = df_proposition['target_wind'].mean()

for row_idx in range(len(test_data)):
    unnormalized_target = test_targets[row_idx] * (max_value - min_value) + min_value
    
    absolute_difference_mean = abs(mean_value - unnormalized_target)
    absolute_differences_mean.append(absolute_difference_mean)

absolute_differences_mean = torch.tensor(absolute_differences_mean)

# Print summary
print(f"Predicting only mean: {torch.mean(absolute_differences_mean)}")

Epoch 1/40, Loss: 0.0263
Epoch 10/40, Loss: 0.0182
Epoch 20/40, Loss: 0.0175
Epoch 30/40, Loss: 0.0162
Epoch 40/40, Loss: 0.0152
Test Loss: 0.0262
Mean Absolute Difference: 2.626362116226764
Predicting only mean: 2.7867678509222555


In [229]:
# Number of features
input_size = data.shape[1] 

# Initialize the model
model = Modelv3(input_size=input_size)

# Train the model
model.train_model(train_loader, epochs=50)

# Evaluate the model on the test set
model.test_model(test_loader)

absolute_differences = []
max_value = df_proposition['target_wind'].max()
min_value = df_proposition['target_wind'].min()

for row_idx in range(len(test_data)):
    prediction = model.predict(torch.tensor(test_data[row_idx], dtype=torch.float32).unsqueeze(0))

    unnormalized_prediction = prediction * (max_value - min_value) + min_value
    unnormalized_target = test_targets[row_idx] * (max_value - min_value) + min_value
    
    absolute_difference = abs(unnormalized_prediction - unnormalized_target)
    absolute_differences.append(absolute_difference)

# Optionally, convert the differences into a NumPy array or Tensor
absolute_differences = torch.tensor(absolute_differences)

# Print summary
print(f"Mean Absolute Difference: {torch.mean(absolute_differences)}")

absolute_differences_mean = []
mean_value = df_proposition['target_wind'].mean()

for row_idx in range(len(test_data)):
    unnormalized_target = test_targets[row_idx] * (max_value - min_value) + min_value
    
    absolute_difference_mean = abs(mean_value - unnormalized_target)
    absolute_differences_mean.append(absolute_difference_mean)

absolute_differences_mean = torch.tensor(absolute_differences_mean)

# Print summary
print(f"Predicting only mean: {torch.mean(absolute_differences_mean)}")

Epoch 1/50, Loss: 0.0331
Epoch 10/50, Loss: 0.0183
Epoch 20/50, Loss: 0.0166
Epoch 30/50, Loss: 0.0151
Epoch 40/50, Loss: 0.0131
Epoch 50/50, Loss: 0.0110
Test Loss: 0.0296
Mean Absolute Difference: 2.757880218172469
Predicting only mean: 2.7867678509222555


## Klasyfikacja binarna wiatru


In [230]:
df_classified = df_proposition

df_classified['target_wind'] = df_classified['target_wind'].apply(lambda x: 'high' if x >= 6.0 else 'low')
df_classified

Unnamed: 0,day_of_year_sin,day_of_year_cos,target_wind,wind_speed_1,wind_speed_2,wind_speed_3,wind_max_1,wind_max_2,wind_max_3,wind_std_1,wind_std_2,wind_std_3,wind_direction_sin_1,wind_direction_sin_2,wind_direction_sin_3,wind_direction_cos_1,wind_direction_cos_2,wind_direction_cos_3
0,-0.999833,0.018277,high,4.000000,3.166667,2.958333,4.0,4.0,9.0,0.000000,1.203859,2.349453,-2.449294e-16,-0.834287,0.734816,1.000000,0.551330,-0.678267
1,-0.999371,0.035473,high,3.166667,2.958333,1.916667,4.0,9.0,7.0,1.203859,2.349453,2.124734,-8.342870e-01,0.734816,0.580111,0.551330,-0.678267,-0.814538
2,-0.998613,0.052658,high,2.958333,1.916667,2.583333,9.0,7.0,6.0,2.349453,2.124734,1.411649,7.348160e-01,0.580111,-0.717823,-0.678267,-0.814538,-0.696226
3,-0.997559,0.069828,high,1.916667,2.583333,3.333333,7.0,6.0,8.0,2.124734,1.411649,2.443566,5.801108e-01,-0.717823,-0.748956,-0.814538,-0.696226,-0.662620
4,-0.996210,0.086977,low,2.583333,3.333333,2.166667,6.0,8.0,7.0,1.411649,2.443566,1.857222,-7.178226e-01,-0.748956,0.479713,-0.696226,-0.662620,-0.877425
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1877,-0.638384,0.769718,low,3.208333,3.000000,1.416667,9.0,7.0,3.0,2.126012,2.431675,0.583592,-5.603981e-01,0.512543,-0.941422,-0.828223,-0.858662,-0.337232
1878,-0.625050,0.780585,low,3.000000,1.416667,1.333333,7.0,3.0,3.0,2.431675,0.583592,0.816497,5.125425e-01,-0.941422,-0.477798,-0.858662,-0.337232,-0.878470
1879,-0.611530,0.791221,low,1.416667,1.333333,2.166667,3.0,3.0,4.0,0.583592,0.816497,0.816497,-9.414215e-01,-0.477798,-0.520637,-0.337232,-0.878470,-0.853778
1880,-0.597829,0.801624,high,1.333333,2.166667,1.541667,3.0,4.0,5.0,0.816497,0.816497,1.062367,-4.777977e-01,-0.520637,-0.247563,-0.878470,-0.853778,-0.968872


In [231]:
scaler_classified = MinMaxScaler()

columns_to_scale = df_classified.columns[df_classified.columns != 'target_wind']
df_classified[columns_to_scale] = scaler_classified.fit_transform(df_classified[columns_to_scale])

scaled_df = pd.DataFrame(scaler_classified.fit_transform(df_classified[columns_to_scale]), 
                         columns=columns_to_scale, index=df_classified.index)
normalized_classified = pd.concat([scaled_df, df_classified[['target_wind']]], axis=1)

normalized_classified

Unnamed: 0,day_of_year_sin,day_of_year_cos,wind_speed_1,wind_speed_2,wind_speed_3,wind_max_1,wind_max_2,wind_max_3,wind_std_1,wind_std_2,wind_std_3,wind_direction_sin_1,wind_direction_sin_2,wind_direction_sin_3,wind_direction_cos_1,wind_direction_cos_2,wind_direction_cos_3,target_wind
0,0.000083,0.509136,0.325843,0.250936,0.232210,0.142857,0.142857,0.380952,0.000000,0.235319,0.459248,0.500007,0.082857,0.867419,1.000000,0.795957,0.165075,high
1,0.000314,0.517734,0.250936,0.232210,0.138577,0.142857,0.380952,0.285714,0.235319,0.459248,0.415322,0.082857,0.867419,0.790066,0.775665,0.165075,0.095157,high
2,0.000693,0.526327,0.232210,0.138577,0.198502,0.380952,0.285714,0.238095,0.459248,0.415322,0.275935,0.867419,0.790066,0.141090,0.160867,0.095157,0.155860,high
3,0.001220,0.534912,0.138577,0.198502,0.265918,0.285714,0.238095,0.333333,0.415322,0.275935,0.477644,0.790066,0.141090,0.125524,0.092731,0.155860,0.173103,high
4,0.001895,0.543486,0.198502,0.265918,0.161049,0.238095,0.333333,0.285714,0.275935,0.477644,0.363032,0.141090,0.125524,0.739866,0.151887,0.173103,0.062890,low
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1877,0.180809,0.884862,0.254682,0.235955,0.093633,0.380952,0.285714,0.095238,0.415572,0.475320,0.114075,0.219804,0.756281,0.029290,0.085888,0.072518,0.340053,low
1878,0.187476,0.890295,0.235955,0.093633,0.086142,0.285714,0.095238,0.095238,0.475320,0.114075,0.159601,0.756281,0.029290,0.261104,0.070669,0.340053,0.062355,low
1879,0.194236,0.895614,0.093633,0.086142,0.161049,0.095238,0.095238,0.142857,0.114075,0.159601,0.159601,0.029290,0.261104,0.239684,0.331384,0.062355,0.075023,low
1880,0.201087,0.900815,0.086142,0.161049,0.104869,0.095238,0.142857,0.190476,0.159601,0.159601,0.207661,0.261104,0.239684,0.376224,0.060765,0.075023,0.015971,high


In [232]:
class WeatherClassifiedDataset(Dataset):
    def __init__(self, data, targets):
        self.data = torch.tensor(data, dtype=torch.float32)
        self.targets = targets 

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.targets[idx]

In [233]:
data = normalized_classified.drop(columns=['target_wind']).to_numpy()
targets = normalized_classified['target_wind'].to_list()

target_map = {'low': 0., 'high': 1.}
targets = [target_map[target] for target in targets]
targets = np.array(targets)

# Split data into training and testing
train_data, test_data = data[:1400], data[1400:]
train_targets, test_targets = targets[:1400], targets[1400:]


# Create dataset and data loaders
train_dataset = WeatherClassifiedDataset(train_data, train_targets)
test_dataset = WeatherClassifiedDataset(test_data, test_targets)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [234]:
# Number of features
input_size = data.shape[1] 

# Initialize the model
model = Modelw1(input_size=input_size, output_size=1, learning_rate=0.001)  # Binary classification

# Train the model
model.train_model(train_loader, epochs=30)

# Evaluate the model on the test set
model.test_model(test_loader)

# Variables for evaluation
probability_scores = []
true_labels = []

for row_idx in range(len(test_data)):
    # Get model prediction (logits -> probabilities -> class)
    logits = model.predict(torch.tensor(test_data[row_idx], dtype=torch.float32).unsqueeze(0))
    probabilities = torch.sigmoid(logits).item() # Convert logits to probabilities

     # Append predictions and true labels
    probability_scores.append(probabilities)
    true_labels.append(test_targets[row_idx])

# Calculate performance metrics
auc_score = roc_auc_score(true_labels, probability_scores)
# Print summary
print(f"AUC Score: {auc_score:.4f}")

Epoch 1/30, Loss: 0.6625
Epoch 10/30, Loss: 0.5610
Epoch 20/30, Loss: 0.5333
Epoch 30/30, Loss: 0.5053
Test Loss: 0.5656
AUC Score: 0.7196


In [238]:
# Number of features
input_size = data.shape[1] 

# Initialize the model
model = Modelw1(input_size=input_size, hidden_sizes=[60, 40, 10], output_size=1, learning_rate=0.0001)  # Binary classification

# Train the model
model.train_model(train_loader, epochs=50)

# Evaluate the model on the test set
model.test_model(test_loader)

# Variables for evaluation
probability_scores = []
true_labels = []

for row_idx in range(len(test_data)):
    # Get model prediction (logits -> probabilities -> class)
    logits = model.predict(torch.tensor(test_data[row_idx], dtype=torch.float32).unsqueeze(0))
    probabilities = torch.sigmoid(logits).item() # Convert logits to probabilities

     # Append predictions and true labels
    probability_scores.append(probabilities)
    true_labels.append(test_targets[row_idx])

# Calculate performance metrics
auc_score = roc_auc_score(true_labels, probability_scores)
# Print summary
print(f"AUC Score: {auc_score:.4f}")

Epoch 1/50, Loss: 0.6928
Epoch 10/50, Loss: 0.6317
Epoch 20/50, Loss: 0.5813
Epoch 30/50, Loss: 0.5765
Epoch 40/50, Loss: 0.5739
Epoch 50/50, Loss: 0.5719
Test Loss: 0.5774
AUC Score: 0.7033
