Your tasks are the following.


---


1. Create and train a FeedForward Neural Network with PyTorch Lightning. Use ModelCheckPoint as callback, and save the best performing model (in terms of MAE) into a ckpt file named *best_model.ckpt*. 50 points


---


2. Save the MAE and the R2 score of your best performing model into a csv file called *mlp_results.csv*. The MAE must be lower and the R2 score must be higher than in linear regression. 20 points.


---


3. Ensemble your neural network model with RandomForestRegressor following the formula
`y_pred_ensemble = 0.5 * y_pred_FeedForwardNet + 0.5 * y_pred_RandomForestRegressor`.
Save the MAE and the R2 score of the ensemble into a csv file called *ensemble_results.csv*.  30 points


---


Besides the Python codes / IPYNB, the CSV files must be saved directly into the git repository you submit.


In [1]:
!pip install pytorch-lightning --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m774.6/774.6 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m805.2/805.2 kB[0m [31m40.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# Dataset
!wget https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/insurance.csv

--2023-10-31 00:16:15--  https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/insurance.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 54288 (53K) [text/plain]
Saving to: ‘insurance.csv’


2023-10-31 00:16:15 (26.9 MB/s) - ‘insurance.csv’ saved [54288/54288]



In [3]:
import pandas as pd # pandas for data manipulation
import numpy as np # numpy for linear algebra
import matplotlib.pyplot as plt # matplotlib for plotting
import seaborn as sns # seaborn for plotting

In [4]:
df = pd.read_csv("insurance.csv")
df

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.900,0,yes,southwest,16884.92400
1,18,male,33.770,1,no,southeast,1725.55230
2,28,male,33.000,3,no,southeast,4449.46200
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.880,0,no,northwest,3866.85520
...,...,...,...,...,...,...,...
1333,50,male,30.970,3,no,northwest,10600.54830
1334,18,female,31.920,0,no,northeast,2205.98080
1335,18,female,36.850,0,no,southeast,1629.83350
1336,21,female,25.800,0,no,southwest,2007.94500


In [5]:
df = pd.get_dummies(df, drop_first=True) # we have to talk about drop_first -> avoid the dummy variable trap
df

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes,region_northwest,region_southeast,region_southwest
0,19,27.900,0,16884.92400,0,1,0,0,1
1,18,33.770,1,1725.55230,1,0,0,1,0
2,28,33.000,3,4449.46200,1,0,0,1,0
3,33,22.705,0,21984.47061,1,0,1,0,0
4,32,28.880,0,3866.85520,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...
1333,50,30.970,3,10600.54830,1,0,1,0,0
1334,18,31.920,0,2205.98080,0,0,0,0,0
1335,18,36.850,0,1629.83350,0,0,0,1,0
1336,21,25.800,0,2007.94500,0,0,0,0,1


In [6]:
df.dtypes

age                   int64
bmi                 float64
children              int64
charges             float64
sex_male              uint8
smoker_yes            uint8
region_northwest      uint8
region_southeast      uint8
region_southwest      uint8
dtype: object

In [7]:
df.iloc[:, :4] = df.iloc[:, :4].astype('float32')

  df.iloc[:, :4] = df.iloc[:, :4].astype('float32')


In [8]:
df.dtypes

age                 float32
bmi                 float32
children            float32
charges             float32
sex_male              uint8
smoker_yes            uint8
region_northwest      uint8
region_southeast      uint8
region_southwest      uint8
dtype: object

In [9]:
X = df.drop(columns=['charges']).values # features
y = df['charges'].values # label

In [10]:
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
import pytorch_lightning as pl
import torchmetrics

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)


In [11]:
from sklearn.preprocessing import StandardScaler

def create_dataloader(X, y, batch_size, shuffle):
  scaler = StandardScaler()
  scaler.fit(X_train)
  X = scaler.transform(X).astype('float32')
  X = torch.from_numpy(X)
  y = y.astype('float32')
  y = torch.from_numpy(y)
  dataset = TensorDataset(X, y)
  dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)
  return dataloader

batch_size = 64

train_loader = create_dataloader(X_train, y_train, batch_size, True)
val_loader = create_dataloader(X_val, y_val, batch_size, False)
test_loader = create_dataloader(X_test, y_test, batch_size, False)

In [12]:
from sklearn.metrics import r2_score

class MyFeedForwardNet(pl.LightningModule):
    def __init__(self, input_dim, output_dim, lr):
      super(MyFeedForwardNet, self).__init__()
      self.save_hyperparameters()
      self.lr = lr
      self.layers = nn.Sequential(
           nn.Linear(input_dim, 512),
           nn.ReLU(),
           nn.Dropout(0.2),
           nn.BatchNorm1d(512),  # Batch Normalization
           nn.Linear(512, 256),
           nn.ReLU(),
           nn.Dropout(0.2),
           nn.BatchNorm1d(256),  # Batch Normalization
           nn.Linear(256, 64),
           nn.ReLU(),
           nn.Dropout(0.2),
           nn.BatchNorm1d(64),  # Batch Normalization
           nn.Linear(64, output_dim)
      )
      self.loss_fn = nn.L1Loss()

    # forward propagation
    def forward(self, x):
      return self.layers(x)

    def on_epoch_start(self):
      print(f"Epoch {self.current_epoch} started. Training...")

    # one step of training
    def training_step(self, batch, batch_idx):
      inputs, targets = batch
      outputs = self(inputs).squeeze()
      loss = self.loss_fn(outputs, targets)
      self.log('train_loss', loss)
      return loss

    # one step of validation
    def validation_step(self, batch, batch_idx):
      inputs, targets = batch
      outputs = self(inputs).squeeze()
      loss = self.loss_fn(outputs, targets)
      self.log('val_loss', loss)
      print("val-loss at epoch {} : {}".format(self.current_epoch, loss.item()))
      return loss

    def configure_optimizers(self):
      return torch.optim.Adam(self.parameters(), lr=self.lr)

In [13]:
logger = pl.loggers.TensorBoardLogger("logs/", name="heart_disease_logs")

In [14]:
epochs = 120
lr = 0.01
output_dim = 1

# we instantiate our model
model = MyFeedForwardNet(X_train.shape[1], output_dim, lr)

# we use the ModelCheckpoint callback to save the best model
callback = pl.callbacks.ModelCheckpoint(
    monitor='val_loss',
    dirpath = '',
    filename = 'best_model',
    save_top_k=1,
    mode='min'
)

# we use the Trainer class to train our model
trainer = pl.Trainer(
    logger=logger,
    max_epochs=epochs,
    log_every_n_steps=1,
    callbacks=[callback]
)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [15]:
trainer.fit(model, train_loader, val_loader) # train the model

/usr/local/lib/python3.10/dist-packages/pytorch_lightning/callbacks/model_checkpoint.py:630: Checkpoint directory  exists and is not empty.
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type       | Params
---------------------------------------
0 | layers  | Sequential | 154 K 
1 | loss_fn | L1Loss     | 0     
---------------------------------------
154 K     Trainable params
0         Non-trainable params
154 K     Total params
0.616     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=1` in the `DataLoader` to improve performance.


val-loss at epoch 0 : 14276.3251953125
val-loss at epoch 0 : 13872.4306640625


/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=1` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 0 : 14273.912109375
val-loss at epoch 0 : 13870.0830078125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 1 : 14266.87109375
val-loss at epoch 1 : 13863.19140625


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 2 : 14252.119140625
val-loss at epoch 2 : 13847.6630859375


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 3 : 14231.2724609375
val-loss at epoch 3 : 13829.2265625


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 4 : 14203.3095703125
val-loss at epoch 4 : 13800.3134765625


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 5 : 14159.6572265625
val-loss at epoch 5 : 13756.6943359375


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 6 : 14116.5302734375
val-loss at epoch 6 : 13712.455078125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 7 : 14058.3984375
val-loss at epoch 7 : 13653.4443359375


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 8 : 13990.8544921875
val-loss at epoch 8 : 13584.9853515625


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 9 : 13915.462890625
val-loss at epoch 9 : 13508.62890625


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 10 : 13828.294921875
val-loss at epoch 10 : 13424.25390625


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 11 : 13732.818359375
val-loss at epoch 11 : 13327.80859375


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 12 : 13633.3984375
val-loss at epoch 12 : 13226.994140625


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 13 : 13517.5205078125
val-loss at epoch 13 : 13111.9365234375


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 14 : 13403.9794921875
val-loss at epoch 14 : 12998.01171875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 15 : 13265.052734375
val-loss at epoch 15 : 12856.5390625


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 16 : 13132.00390625
val-loss at epoch 16 : 12726.9736328125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 17 : 12935.841796875
val-loss at epoch 17 : 12531.3369140625


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 18 : 12721.376953125
val-loss at epoch 18 : 12325.9501953125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 19 : 12605.0419921875
val-loss at epoch 19 : 12163.921875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 20 : 12456.5302734375
val-loss at epoch 20 : 12050.6484375


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 21 : 12261.505859375
val-loss at epoch 21 : 11896.7392578125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 22 : 12054.9541015625
val-loss at epoch 22 : 11675.5185546875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 23 : 11872.140625
val-loss at epoch 23 : 11518.8232421875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 24 : 11614.9482421875
val-loss at epoch 24 : 11260.98828125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 25 : 11477.85546875
val-loss at epoch 25 : 11125.91015625


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 26 : 11208.357421875
val-loss at epoch 26 : 10883.408203125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 27 : 10956.2197265625
val-loss at epoch 27 : 10673.1953125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 28 : 10625.19921875
val-loss at epoch 28 : 10370.7919921875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 29 : 10569.3388671875
val-loss at epoch 29 : 10328.169921875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 30 : 10344.337890625
val-loss at epoch 30 : 10156.2763671875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 31 : 10096.9140625
val-loss at epoch 31 : 9893.8916015625


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 32 : 9697.6884765625
val-loss at epoch 32 : 9546.4755859375


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 33 : 9629.2919921875
val-loss at epoch 33 : 9523.646484375


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 34 : 9259.255859375
val-loss at epoch 34 : 9279.380859375


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 35 : 8708.5615234375
val-loss at epoch 35 : 8618.2822265625


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 36 : 8380.1533203125
val-loss at epoch 36 : 8344.896484375


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 37 : 8368.337890625
val-loss at epoch 37 : 8418.85546875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 38 : 8096.611328125
val-loss at epoch 38 : 8136.306640625


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 39 : 7928.0703125
val-loss at epoch 39 : 8014.48828125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 40 : 7755.876953125
val-loss at epoch 40 : 7802.37939453125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 41 : 7175.7158203125
val-loss at epoch 41 : 7357.00341796875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 42 : 7271.6025390625
val-loss at epoch 42 : 7403.30810546875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 43 : 7099.1982421875
val-loss at epoch 43 : 7216.79345703125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 44 : 6621.35595703125
val-loss at epoch 44 : 6747.52685546875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 45 : 6238.640625
val-loss at epoch 45 : 6457.4013671875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 46 : 6325.19482421875
val-loss at epoch 46 : 6438.830078125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 47 : 6179.5224609375
val-loss at epoch 47 : 6330.482421875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 48 : 5761.369140625
val-loss at epoch 48 : 5940.93017578125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 49 : 6033.4130859375
val-loss at epoch 49 : 6197.16357421875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 50 : 6310.197265625
val-loss at epoch 50 : 6386.37939453125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 51 : 5327.20654296875
val-loss at epoch 51 : 5610.81591796875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 52 : 5169.927734375
val-loss at epoch 52 : 5415.3037109375


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 53 : 4972.880859375
val-loss at epoch 53 : 5302.02978515625


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 54 : 4910.9765625
val-loss at epoch 54 : 5260.62841796875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 55 : 4954.86083984375
val-loss at epoch 55 : 5290.43115234375


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 56 : 4718.56201171875
val-loss at epoch 56 : 5081.46142578125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 57 : 4996.32080078125
val-loss at epoch 57 : 5193.9091796875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 58 : 4384.8681640625
val-loss at epoch 58 : 4787.45947265625


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 59 : 4373.7763671875
val-loss at epoch 59 : 4897.14990234375


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 60 : 4226.6181640625
val-loss at epoch 60 : 4812.21435546875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 61 : 4249.705078125
val-loss at epoch 61 : 4848.376953125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 62 : 4546.90234375
val-loss at epoch 62 : 5070.64013671875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 63 : 4194.3125
val-loss at epoch 63 : 4874.05810546875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 64 : 4347.7236328125
val-loss at epoch 64 : 4877.82470703125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 65 : 4035.9658203125
val-loss at epoch 65 : 4618.28173828125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 66 : 4274.517578125
val-loss at epoch 66 : 4754.095703125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 67 : 3955.3916015625
val-loss at epoch 67 : 4674.4462890625


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 68 : 3582.84912109375
val-loss at epoch 68 : 4323.56982421875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 69 : 3654.34619140625
val-loss at epoch 69 : 4415.42236328125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 70 : 3114.393310546875
val-loss at epoch 70 : 4229.35498046875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 71 : 3379.303466796875
val-loss at epoch 71 : 4446.49658203125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 72 : 3682.61328125
val-loss at epoch 72 : 4336.919921875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 73 : 3072.13037109375
val-loss at epoch 73 : 4206.3701171875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 74 : 3255.37451171875
val-loss at epoch 74 : 4312.5146484375


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 75 : 2898.074951171875
val-loss at epoch 75 : 4061.79443359375


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 76 : 3215.144287109375
val-loss at epoch 76 : 4315.68408203125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 77 : 2323.857421875
val-loss at epoch 77 : 3525.531982421875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 78 : 3051.46240234375
val-loss at epoch 78 : 4096.107421875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 79 : 2600.422607421875
val-loss at epoch 79 : 3857.41357421875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 80 : 2885.57666015625
val-loss at epoch 80 : 3945.903076171875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 81 : 2823.887451171875
val-loss at epoch 81 : 3920.311767578125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 82 : 3255.409423828125
val-loss at epoch 82 : 4399.71435546875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 83 : 2464.833740234375
val-loss at epoch 83 : 3816.280517578125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 84 : 2968.30322265625
val-loss at epoch 84 : 3998.7373046875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 85 : 2880.228515625
val-loss at epoch 85 : 4075.838623046875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 86 : 2217.296875
val-loss at epoch 86 : 3687.827392578125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 87 : 1962.070068359375
val-loss at epoch 87 : 3424.795166015625


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 88 : 2392.108642578125
val-loss at epoch 88 : 3550.6787109375


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 89 : 1758.876953125
val-loss at epoch 89 : 3132.6123046875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 90 : 2046.55126953125
val-loss at epoch 90 : 3445.611083984375


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 91 : 2412.28857421875
val-loss at epoch 91 : 3660.60205078125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 92 : 2228.984375
val-loss at epoch 92 : 3686.46484375


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 93 : 1611.220458984375
val-loss at epoch 93 : 3008.541748046875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 94 : 2057.90478515625
val-loss at epoch 94 : 3332.3095703125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 95 : 2636.138916015625
val-loss at epoch 95 : 3804.29931640625


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 96 : 2048.2294921875
val-loss at epoch 96 : 3348.6962890625


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 97 : 1791.0264892578125
val-loss at epoch 97 : 3096.37646484375


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 98 : 2258.006103515625
val-loss at epoch 98 : 3417.78125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 99 : 2168.5712890625
val-loss at epoch 99 : 3577.85498046875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 100 : 1777.263916015625
val-loss at epoch 100 : 3042.07373046875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 101 : 1865.5406494140625
val-loss at epoch 101 : 3218.260986328125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 102 : 2018.943115234375
val-loss at epoch 102 : 3285.22021484375


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 103 : 2713.1201171875
val-loss at epoch 103 : 3658.737548828125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 104 : 2073.59423828125
val-loss at epoch 104 : 3268.45703125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 105 : 1707.52392578125
val-loss at epoch 105 : 2949.566650390625


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 106 : 1855.24609375
val-loss at epoch 106 : 2928.724853515625


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 107 : 2197.693359375
val-loss at epoch 107 : 3241.017822265625


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 108 : 1977.787109375
val-loss at epoch 108 : 3210.926513671875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 109 : 2100.93310546875
val-loss at epoch 109 : 3265.458251953125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 110 : 2033.62548828125
val-loss at epoch 110 : 3211.578857421875


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 111 : 1781.4859619140625
val-loss at epoch 111 : 3001.0087890625


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 112 : 1917.39990234375
val-loss at epoch 112 : 3201.510986328125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 113 : 1675.41845703125
val-loss at epoch 113 : 2919.877197265625


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 114 : 2029.01806640625
val-loss at epoch 114 : 3361.820556640625


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 115 : 2408.806640625
val-loss at epoch 115 : 3322.487548828125


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 116 : 1777.5897216796875
val-loss at epoch 116 : 3099.859375


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 117 : 1759.298828125
val-loss at epoch 117 : 2898.857177734375


Validation: |          | 0/? [00:00<?, ?it/s]

val-loss at epoch 118 : 1764.021484375
val-loss at epoch 118 : 2745.452880859375


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=120` reached.


val-loss at epoch 119 : 1929.66455078125
val-loss at epoch 119 : 2997.396728515625


In [16]:
from sklearn.metrics import mean_absolute_error, r2_score

# load best model
best_model = MyFeedForwardNet.load_from_checkpoint(callback.best_model_path)

predicts_list = []
targets_list = []
for batch in test_loader:
    inputs, targets = batch
    predicts_batch = best_model(inputs).squeeze().tolist()
    targets_batch = targets.tolist()
    predicts_list.extend(predicts_batch)
    targets_list.extend(targets_batch)

# calculate MAE and R2
mae = mean_absolute_error(targets_list, predicts_list)
r2 = r2_score(targets_list, predicts_list)

# save MAE and R2 to .csv
results = pd.DataFrame({
    'MAE': [mae],
    'R2 Score': [r2]
})
results.to_csv('mlp_results.csv', index=False)

print(f'MAE: {mae}')
print(f'R2 Score: {r2}')

MAE: 2643.1366767313943
R2 Score: 0.8413943335525023


In [19]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler

y_pred_ensemble = []
scaler = StandardScaler()
scaler.fit(X_train)
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(scaler.transform(X_train), y_train)
y_pred_RandomForestRegressor = rf_model.predict(scaler.transform(X_test))
y_pred_ensemble = [0.5 * nn_pred + 0.5 * rf_pred for nn_pred, rf_pred in zip(predicts_list, y_pred_RandomForestRegressor)]

# calculate MAE and R2
mae = mean_absolute_error(targets_list, y_pred_ensemble)
r2 = r2_score(targets_list, y_pred_ensemble)

# save MAE and R2 to .csv
results = pd.DataFrame({
    'MAE': [mae],
    'R2 Score': [r2]
})
results.to_csv('ensemble_results.csv', index=False)

print(f'MAE: {mae}')
print(f'R2 Score: {r2}')

MAE: 2444.2760507088274
R2 Score: 0.8711537194871536
