Your tasks are the following.


---


1. Create and train a FeedForward Neural Network with PyTorch Lightning. Use ModelCheckPoint as callback, and save the best performing model (in terms of MAE) into a ckpt file named *best_model.ckpt*. 50 points


---


2. Save the MAE and the R2 score of your best performing model into a csv file called *mlp_results.csv*. The MAE must be lower and the R2 score must be higher than in linear regression. 20 points.


---


3. Ensemble your neural network model with RandomForestRegressor following the formula
`y_pred_ensemble = 0.5 * y_pred_FeedForwardNet + 0.5 * y_pred_RandomForestRegressor`.
Save the MAE and the R2 score of the ensemble into a csv file called *ensemble_results.csv*.  30 points


---


Besides the Python codes / IPYNB, the CSV files must be saved directly into the git repository you submit.


In [33]:
!pip install pytorch-lightning --quiet

In [34]:
# Dataset
!wget https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/insurance.csv

--2023-11-09 08:28:30--  https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/insurance.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 54288 (53K) [text/plain]
Saving to: ‘insurance.csv.2’


2023-11-09 08:28:30 (4.00 MB/s) - ‘insurance.csv.2’ saved [54288/54288]



In [35]:
import pandas as pd # pandas for data manipulation
import numpy as np # numpy for linear algebra
import matplotlib.pyplot as plt # matplotlib for plotting
import seaborn as sns # seaborn for plotting
from sklearn.metrics import mean_absolute_error, r2_score

In [36]:
df = pd.read_csv("insurance.csv")
df

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.900,0,yes,southwest,16884.92400
1,18,male,33.770,1,no,southeast,1725.55230
2,28,male,33.000,3,no,southeast,4449.46200
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.880,0,no,northwest,3866.85520
...,...,...,...,...,...,...,...
1333,50,male,30.970,3,no,northwest,10600.54830
1334,18,female,31.920,0,no,northeast,2205.98080
1335,18,female,36.850,0,no,southeast,1629.83350
1336,21,female,25.800,0,no,southwest,2007.94500


In [37]:
df = pd.get_dummies(df, drop_first=True) # we have to talk about drop_first -> avoid the dummy variable trap
df

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes,region_northwest,region_southeast,region_southwest
0,19,27.900,0,16884.92400,0,1,0,0,1
1,18,33.770,1,1725.55230,1,0,0,1,0
2,28,33.000,3,4449.46200,1,0,0,1,0
3,33,22.705,0,21984.47061,1,0,1,0,0
4,32,28.880,0,3866.85520,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...
1333,50,30.970,3,10600.54830,1,0,1,0,0
1334,18,31.920,0,2205.98080,0,0,0,0,0
1335,18,36.850,0,1629.83350,0,0,0,1,0
1336,21,25.800,0,2007.94500,0,0,0,0,1


In [38]:
df.dtypes

age                   int64
bmi                 float64
children              int64
charges             float64
sex_male              uint8
smoker_yes            uint8
region_northwest      uint8
region_southeast      uint8
region_southwest      uint8
dtype: object

In [39]:
df.iloc[:, :4] = df.iloc[:, :4].astype('float32')

  df.iloc[:, :4] = df.iloc[:, :4].astype('float32')


In [40]:
df.dtypes

age                 float32
bmi                 float32
children            float32
charges             float32
sex_male              uint8
smoker_yes            uint8
region_northwest      uint8
region_southeast      uint8
region_southwest      uint8
dtype: object

In [41]:
X = df.drop(columns=['charges']).values # features
y = df['charges'].values # label

In [42]:
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
import pytorch_lightning as pl
import torchmetrics

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)


In [43]:
from sklearn.preprocessing import StandardScaler

def create_dataloader(X, y, batch_size, shuffle):
  scaler = StandardScaler()
  scaler.fit(X_train)
  X = scaler.transform(X).astype('float32')
  X = torch.from_numpy(X)
  y = y.astype('float32')
  y = torch.from_numpy(y)
  dataset = TensorDataset(X, y)
  dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)
  return dataloader

batch_size = 64

train_loader = create_dataloader(X_train, y_train, batch_size, True)
val_loader = create_dataloader(X_val, y_val, batch_size, False)
test_loader = create_dataloader(X_test, y_test, batch_size, False)

In [44]:
from sklearn.metrics import r2_score

class MyFeedForwardNet(pl.LightningModule):
    def __init__(self, input_dim, output_dim, lr):
      super(MyFeedForwardNet, self).__init__()
      self.save_hyperparameters()
      self.lr = lr
      self.layers = nn.Sequential(
           nn.Linear(input_dim, 512),
           nn.ReLU(),
           nn.Dropout(0.2),
           nn.BatchNorm1d(512),  # Batch Normalization
           nn.Linear(512, 256),
           nn.ReLU(),
           nn.Dropout(0.2),
           nn.BatchNorm1d(256),  # Batch Normalization
           nn.Linear(256, 64),
           nn.ReLU(),
           nn.Dropout(0.2),
           nn.BatchNorm1d(64),  # Batch Normalization
           nn.Linear(64, output_dim)
      )
      self.loss_fn = nn.L1Loss()

    # forward propagation
    def forward(self, x):
      return self.layers(x)

    def on_epoch_start(self):
      print(f"Epoch {self.current_epoch} started. Training...")

    # one step of training
    def training_step(self, batch, batch_idx):
      inputs, targets = batch
      outputs = self(inputs).squeeze()
      mae = self.loss_fn(outputs, targets)
      self.log('train_mae', mae)
      return mae

    # one step of validation
    def validation_step(self, batch, batch_idx):
      inputs, targets = batch
      outputs = self(inputs).squeeze()
      mae = self.loss_fn(outputs, targets)
      self.log('val_mae', mae)
      print("val-mae at epoch {} : {}".format(self.current_epoch, mae.item()))
      return mae

    def configure_optimizers(self):
      return torch.optim.Adam(self.parameters(), lr=self.lr)

In [45]:
logger = pl.loggers.TensorBoardLogger("logs/", name="heart_disease_logs")

In [46]:
epochs = 120
lr = 0.01
output_dim = 1

# we instantiate our model
model = MyFeedForwardNet(X_train.shape[1], output_dim, lr)

# we use the ModelCheckpoint callback to save the best model
callback = pl.callbacks.ModelCheckpoint(
    monitor='val_mae',
    dirpath = '',
    filename = 'best_model',
    save_top_k=1,
    mode='min'
)

# we use the Trainer class to train our model
trainer = pl.Trainer(
    logger=logger,
    max_epochs=epochs,
    log_every_n_steps=1,
    callbacks=[callback]
)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [47]:
trainer.fit(model, train_loader, val_loader) # train the model

/usr/local/lib/python3.10/dist-packages/pytorch_lightning/callbacks/model_checkpoint.py:634: Checkpoint directory  exists and is not empty.
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type       | Params
---------------------------------------
0 | layers  | Sequential | 154 K 
1 | loss_fn | L1Loss     | 0     
---------------------------------------
154 K     Trainable params
0         Non-trainable params
154 K     Total params
0.616     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=1` in the `DataLoader` to improve performance.
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=1` in the `DataLoader` to improve performance.


val-mae at epoch 0 : 14276.25390625
val-mae at epoch 0 : 13872.365234375


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 0 : 14273.4853515625
val-mae at epoch 0 : 13869.142578125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 1 : 14268.119140625
val-mae at epoch 1 : 13864.3720703125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 2 : 14254.21875
val-mae at epoch 2 : 13848.4853515625


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 3 : 14233.6943359375
val-mae at epoch 3 : 13829.109375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 4 : 14205.5771484375
val-mae at epoch 4 : 13802.220703125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 5 : 14164.900390625
val-mae at epoch 5 : 13760.177734375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 6 : 14117.30078125
val-mae at epoch 6 : 13712.5634765625


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 7 : 14060.388671875
val-mae at epoch 7 : 13653.4306640625


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 8 : 13996.990234375
val-mae at epoch 8 : 13587.7939453125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 9 : 13916.001953125
val-mae at epoch 9 : 13507.7177734375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 10 : 13829.19921875
val-mae at epoch 10 : 13423.1376953125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 11 : 13727.0908203125
val-mae at epoch 11 : 13323.3388671875


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 12 : 13629.755859375
val-mae at epoch 12 : 13226.6044921875


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 13 : 13526.6044921875
val-mae at epoch 13 : 13120.3427734375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 14 : 13400.884765625
val-mae at epoch 14 : 12994.2431640625


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 15 : 13274.716796875
val-mae at epoch 15 : 12863.9140625


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 16 : 13130.775390625
val-mae at epoch 16 : 12718.2138671875


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 17 : 12952.103515625
val-mae at epoch 17 : 12524.505859375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 18 : 12745.6474609375
val-mae at epoch 18 : 12326.8388671875


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 19 : 12586.2041015625
val-mae at epoch 19 : 12184.0576171875


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 20 : 12462.35546875
val-mae at epoch 20 : 12057.5126953125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 21 : 12193.3291015625
val-mae at epoch 21 : 11823.0771484375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 22 : 11991.880859375
val-mae at epoch 22 : 11613.583984375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 23 : 11874.3125
val-mae at epoch 23 : 11545.6220703125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 24 : 11519.6962890625
val-mae at epoch 24 : 11173.5302734375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 25 : 11388.0263671875
val-mae at epoch 25 : 11096.7666015625


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 26 : 11139.4228515625
val-mae at epoch 26 : 10843.65234375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 27 : 10974.59375
val-mae at epoch 27 : 10767.453125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 28 : 10746.7666015625
val-mae at epoch 28 : 10482.685546875


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 29 : 10533.06640625
val-mae at epoch 29 : 10325.4130859375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 30 : 10357.4619140625
val-mae at epoch 30 : 10137.6376953125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 31 : 9901.41015625
val-mae at epoch 31 : 9710.7119140625


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 32 : 9663.9462890625
val-mae at epoch 32 : 9528.0263671875


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 33 : 9321.087890625
val-mae at epoch 33 : 9240.0810546875


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 34 : 9199.3271484375
val-mae at epoch 34 : 9152.6494140625


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 35 : 8862.193359375
val-mae at epoch 35 : 8847.66015625


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 36 : 8790.865234375
val-mae at epoch 36 : 8750.5419921875


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 37 : 8220.3779296875
val-mae at epoch 37 : 8295.9521484375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 38 : 8200.0517578125
val-mae at epoch 38 : 8251.380859375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 39 : 7874.05126953125
val-mae at epoch 39 : 7986.4345703125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 40 : 7311.1318359375
val-mae at epoch 40 : 7498.68310546875


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 41 : 6953.111328125
val-mae at epoch 41 : 7101.02490234375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 42 : 6830.94384765625
val-mae at epoch 42 : 6963.76025390625


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 43 : 6679.037109375
val-mae at epoch 43 : 6873.19384765625


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 44 : 6571.3759765625
val-mae at epoch 44 : 6737.9033203125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 45 : 6215.822265625
val-mae at epoch 45 : 6397.70263671875


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 46 : 5996.513671875
val-mae at epoch 46 : 6184.9111328125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 47 : 5933.2646484375
val-mae at epoch 47 : 6160.19482421875


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 48 : 5789.35546875
val-mae at epoch 48 : 5934.08251953125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 49 : 5605.8056640625
val-mae at epoch 49 : 5754.87451171875


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 50 : 5353.62109375
val-mae at epoch 50 : 5626.236328125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 51 : 5305.86376953125
val-mae at epoch 51 : 5556.39111328125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 52 : 5297.5576171875
val-mae at epoch 52 : 5567.87158203125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 53 : 5270.80517578125
val-mae at epoch 53 : 5520.75439453125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 54 : 5344.0341796875
val-mae at epoch 54 : 5595.44580078125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 55 : 4953.365234375
val-mae at epoch 55 : 5280.0517578125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 56 : 4583.9482421875
val-mae at epoch 56 : 5019.80517578125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 57 : 4460.216796875
val-mae at epoch 57 : 4923.38720703125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 58 : 4386.5
val-mae at epoch 58 : 4852.1806640625


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 59 : 4598.51904296875
val-mae at epoch 59 : 5049.181640625


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 60 : 4535.2734375
val-mae at epoch 60 : 5014.806640625


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 61 : 4015.660400390625
val-mae at epoch 61 : 4770.62646484375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 62 : 4139.08251953125
val-mae at epoch 62 : 4652.29638671875


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 63 : 4698.67724609375
val-mae at epoch 63 : 5177.71142578125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 64 : 4354.486328125
val-mae at epoch 64 : 4972.4951171875


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 65 : 3840.75146484375
val-mae at epoch 65 : 4472.01708984375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 66 : 4054.291748046875
val-mae at epoch 66 : 4589.2587890625


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 67 : 3649.50244140625
val-mae at epoch 67 : 4398.8125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 68 : 3998.870361328125
val-mae at epoch 68 : 4635.12841796875


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 69 : 3602.2236328125
val-mae at epoch 69 : 4445.9833984375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 70 : 3636.1298828125
val-mae at epoch 70 : 4414.24072265625


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 71 : 3810.2646484375
val-mae at epoch 71 : 4593.001953125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 72 : 3542.59228515625
val-mae at epoch 72 : 4319.6357421875


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 73 : 3339.27001953125
val-mae at epoch 73 : 4384.48779296875


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 74 : 2974.25048828125
val-mae at epoch 74 : 4010.931640625


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 75 : 3688.892578125
val-mae at epoch 75 : 4494.55029296875


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 76 : 3479.142578125
val-mae at epoch 76 : 4219.86767578125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 77 : 3007.66845703125
val-mae at epoch 77 : 4019.412109375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 78 : 2540.113037109375
val-mae at epoch 78 : 3833.702880859375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 79 : 2749.3798828125
val-mae at epoch 79 : 3975.6162109375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 80 : 2915.181884765625
val-mae at epoch 80 : 3902.16162109375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 81 : 3409.992431640625
val-mae at epoch 81 : 4105.96875


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 82 : 2925.148193359375
val-mae at epoch 82 : 3980.784423828125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 83 : 2746.2646484375
val-mae at epoch 83 : 3725.48291015625


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 84 : 2623.415283203125
val-mae at epoch 84 : 3626.100341796875


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 85 : 3090.05615234375
val-mae at epoch 85 : 3860.68505859375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 86 : 2970.666259765625
val-mae at epoch 86 : 4016.33935546875


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 87 : 2044.2303466796875
val-mae at epoch 87 : 3442.779052734375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 88 : 2581.095703125
val-mae at epoch 88 : 3745.677734375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 89 : 2353.043212890625
val-mae at epoch 89 : 3626.048583984375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 90 : 2055.036865234375
val-mae at epoch 90 : 3407.81689453125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 91 : 2068.25439453125
val-mae at epoch 91 : 3535.30126953125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 92 : 2013.350341796875
val-mae at epoch 92 : 3333.803466796875


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 93 : 2039.2081298828125
val-mae at epoch 93 : 3450.456298828125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 94 : 1841.7745361328125
val-mae at epoch 94 : 3174.56396484375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 95 : 1940.1522216796875
val-mae at epoch 95 : 3085.83544921875


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 96 : 2387.277587890625
val-mae at epoch 96 : 3288.271728515625


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 97 : 1976.7178955078125
val-mae at epoch 97 : 3206.431640625


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 98 : 2305.23291015625
val-mae at epoch 98 : 3553.759521484375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 99 : 1759.3082275390625
val-mae at epoch 99 : 3042.37841796875


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 100 : 1760.0797119140625
val-mae at epoch 100 : 3203.16455078125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 101 : 2305.1337890625
val-mae at epoch 101 : 3316.388916015625


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 102 : 1983.536376953125
val-mae at epoch 102 : 3325.894287109375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 103 : 1918.31689453125
val-mae at epoch 103 : 3085.494140625


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 104 : 1878.889892578125
val-mae at epoch 104 : 3078.017333984375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 105 : 1894.925537109375
val-mae at epoch 105 : 3133.43896484375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 106 : 1784.1480712890625
val-mae at epoch 106 : 3088.9501953125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 107 : 1707.0447998046875
val-mae at epoch 107 : 3047.226318359375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 108 : 1923.118408203125
val-mae at epoch 108 : 2996.473876953125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 109 : 1877.098876953125
val-mae at epoch 109 : 3040.56689453125


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 110 : 1862.845947265625
val-mae at epoch 110 : 3046.3681640625


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 111 : 2087.71728515625
val-mae at epoch 111 : 3156.380859375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 112 : 1571.4051513671875
val-mae at epoch 112 : 2914.508544921875


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 113 : 1649.1151123046875
val-mae at epoch 113 : 2937.804443359375


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 114 : 1695.66748046875
val-mae at epoch 114 : 3142.47998046875


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 115 : 1732.1785888671875
val-mae at epoch 115 : 2953.088623046875


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 116 : 1904.9075927734375
val-mae at epoch 116 : 3006.0546875


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 117 : 2769.28369140625
val-mae at epoch 117 : 3430.77587890625


Validation: |          | 0/? [00:00<?, ?it/s]

val-mae at epoch 118 : 1808.795654296875
val-mae at epoch 118 : 2975.4052734375


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=120` reached.


val-mae at epoch 119 : 1911.379150390625
val-mae at epoch 119 : 2997.154541015625


In [48]:
# load best model
best_model = MyFeedForwardNet.load_from_checkpoint(callback.best_model_path)

predicts_list = []
targets_list = []
for batch in test_loader:
    inputs, targets = batch
    predicts_batch = best_model(inputs).squeeze().tolist()
    targets_batch = targets.tolist()
    predicts_list.extend(predicts_batch)
    targets_list.extend(targets_batch)

# calculate MAE and R2
mae = mean_absolute_error(targets_list, predicts_list)
r2 = r2_score(targets_list, predicts_list)

# save MAE and R2 to .csv
results = pd.DataFrame({
    'MAE': [mae],
    'R2 Score': [r2]
})
results.to_csv('mlp_results.csv', index=False)

print(f'MAE: {mae}')
print(f'R2 Score: {r2}')

MAE: 2772.9896094478777
R2 Score: 0.8330174121852828


In [49]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler

y_pred_ensemble = []
scaler = StandardScaler()
scaler.fit(X_train)
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(scaler.transform(X_train), y_train)
y_pred_RandomForestRegressor = rf_model.predict(scaler.transform(X_test))
y_pred_ensemble = [0.5 * nn_pred + 0.5 * rf_pred for nn_pred, rf_pred in zip(predicts_list, y_pred_RandomForestRegressor)]

# calculate MAE and R2
mae = mean_absolute_error(targets_list, y_pred_ensemble)
r2 = r2_score(targets_list, y_pred_ensemble)

# save MAE and R2 to .csv
results = pd.DataFrame({
    'MAE': [mae],
    'R2 Score': [r2]
})
results.to_csv('ensemble_results.csv', index=False)

print(f'MAE: {mae}')
print(f'R2 Score: {r2}')

MAE: 2485.7480718720844
R2 Score: 0.8691171037565686
