In [None]:

!pip install pytorch-lightning #download Pytorch Lightning ( for as an user friendly experience with PyTorch.)

Collecting pytorch-lightning
  Downloading pytorch_lightning-2.1.0-py3-none-any.whl (774 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m774.6/774.6 kB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
Collecting torchmetrics>=0.7.0 (from pytorch-lightning)
  Downloading torchmetrics-1.2.0-py3-none-any.whl (805 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m805.2/805.2 kB[0m [31m61.2 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities>=0.8.0 (from pytorch-lightning)
  Downloading lightning_utilities-0.9.0-py3-none-any.whl (23 kB)
Installing collected packages: lightning-utilities, torchmetrics, pytorch-lightning
Successfully installed lightning-utilities-0.9.0 pytorch-lightning-2.1.0 torchmetrics-1.2.0


In [None]:
import torch
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, r2_score
from torch.utils.data import DataLoader, TensorDataset
from sklearn.linear_model import LinearRegression
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # GPU

# Load the dataset
url = "https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/insurance.csv"
data = pd.read_csv(url)
data = pd.get_dummies(data, columns=['sex', 'smoker', 'region'], drop_first=True)

# Extract features and target
X = data.drop(columns='charges')
y = data['charges']

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test.values, dtype=torch.float32)

# Define the FeedForward Neural Network using PyTorch Lightning
class FFNN(pl.LightningModule):
    def __init__(self):
        super(FFNN, self).__init__()
        self.fc1 = torch.nn.Linear(X_train.shape[1], 64)
        self.fc2 = torch.nn.Linear(64, 32)
        self.fc3 = torch.nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.001)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self(x)
        loss = torch.nn.functional.mse_loss(y_pred, y)
        self.log("train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self(x)
        val_loss = torch.nn.functional.mse_loss(y_pred, y)
        self.log("val_loss", val_loss)
        return val_loss

# Create PyTorch Lightning DataLoaders
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_dataset = TensorDataset(X_test, y_test)
val_loader = DataLoader(val_dataset, batch_size=64)

# Define ModelCheckpoint callback
checkpoint_callback = ModelCheckpoint(
    monitor="val_loss",
    dirpath="./",  # Save the checkpoint in the current directory
    filename="best_model",
    save_top_k=1,  # Save only the best model
    mode="min",  # We want to minimize validation loss
)

# Train the model
model = FFNN()
trainer = pl.Trainer(
    max_epochs=10,  # You can adjust the number of epochs
    callbacks=[checkpoint_callback],
)
trainer.fit(model, train_loader, val_loader)

# Load the best model
best_model = FFNN.load_from_checkpoint(checkpoint_callback.best_model_path).to('cpu')

# Evaluate the best model on the test set
y_pred = best_model(X_test)
mae = mean_absolute_error(y_test, y_pred.cpu().detach().numpy())
r2 = r2_score(y_test, y_pred.cpu().detach().numpy())

# Save MAE and R2 score to a CSV file
results = pd.DataFrame({"MAE": [mae], "R2": [r2]})
results.to_csv("mlp_results.csv", index=False)


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/callbacks/model_checkpoint.py:630: Checkpoint directory /content exists and is not empty.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type   | Params
--------------------------------
0 | fc1  | Linear | 576   
1 | fc2  | Linear | 2.1 K 
2 | fc3  | Linear | 33    
--------------------------------
2.7 K     Trainable params
0         Non-trainable params
2.7 K     Total params
0.011     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=1` in the `DataLoader` to improve performance.
  val_loss = torch.nn.functional.mse_loss(y_pred, y)
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=1` in the `DataLoader` to improve performance.
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/loops/fit_loop.py:293: The number of training batches (17) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |          | 0/? [00:00<?, ?it/s]

  loss = torch.nn.functional.mse_loss(y_pred, y)
  loss = torch.nn.functional.mse_loss(y_pred, y)


Validation: |          | 0/? [00:00<?, ?it/s]

  val_loss = torch.nn.functional.mse_loss(y_pred, y)


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=10` reached.


In [None]:
# Required libraries
import torch
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, r2_score
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

# Device setup (GPU if available, else CPU)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load the dataset
url = "https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/insurance.csv"
data = pd.read_csv(url)
data = pd.get_dummies(data, columns=['sex', 'smoker', 'region'], drop_first=True)

# Extract features and target
X = data.drop(columns='charges')
y = data['charges']

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test.values, dtype=torch.float32)

# Define the FeedForward Neural Network using PyTorch Lightning
class FFNN(pl.LightningModule):
    def __init__(self):
        super(FFNN, self).__init__()
        self.fc1 = nn.Linear(X_train.shape[1], 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.001)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self(x)
        loss = F.mse_loss(y_pred, y)
        self.log("train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self(x)
        val_loss = F.mse_loss(y_pred, y)
        self.log("val_loss", val_loss)
        return val_loss

# Create PyTorch Lightning DataLoaders
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_dataset = TensorDataset(X_test, y_test)
val_loader = DataLoader(val_dataset, batch_size=64)

# Define ModelCheckpoint callback
checkpoint_callback = ModelCheckpoint(
    monitor="val_loss",
    dirpath="./",  # Save the checkpoint in the current directory
    filename="best_model",
    save_top_k=1,  # Save only the best model
    mode="min",  # We want to minimize validation loss
)

# Train the model
model = FFNN()
trainer = pl.Trainer(
    max_epochs=10,  # You can adjust the number of epochs
    callbacks=[checkpoint_callback],
)
trainer.fit(model, train_loader, val_loader)

# Load the best model
best_model = FFNN.load_from_checkpoint(checkpoint_callback.best_model_path).to('cpu')

# Evaluate the best model on the test set
y_pred = best_model(X_test)
mae = mean_absolute_error(y_test, y_pred.cpu().detach().numpy())
r2 = r2_score(y_test, y_pred.cpu().detach().numpy())

# Save MAE and R2 score to a CSV file
results = pd.DataFrame({"MAE": [mae], "R2": [r2]})
results.to_csv("mlp_results.csv", index=False)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/callbacks/model_checkpoint.py:630: Checkpoint directory /content exists and is not empty.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type   | Params
--------------------------------
0 | fc1  | Linear | 576   
1 | fc2  | Linear | 2.1 K 
2 | fc3  | Linear | 33    
--------------------------------
2.7 K     Trainable params
0         Non-trainable params
2.7 K     Total params
0.011     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=1` in the `DataLoader` to improve performance.
  val_loss = F.mse_loss(y_pred, y)
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=1` in the `DataLoader` to improve performance.
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/loops/fit_loop.py:293: The number of training batches (17) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |          | 0/? [00:00<?, ?it/s]

  loss = F.mse_loss(y_pred, y)
  loss = F.mse_loss(y_pred, y)


Validation: |          | 0/? [00:00<?, ?it/s]

  val_loss = F.mse_loss(y_pred, y)


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=10` reached.


In [None]:
from sklearn.ensemble import RandomForestRegressor

# Train a RandomForestRegressor
rf_model = RandomForestRegressor(random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions with the RandomForestRegressor
y_pred_rf = rf_model.predict(X_test)

# Combine predictions from the neural network and the RandomForestRegressor
y_pred_ensemble = 0.5 * y_pred+ 0.5 * y_pred_rf.reshape(268, 1)



In [None]:

mae_ensemble = mean_absolute_error(y_test, y_pred_ensemble)
r2_ensemble = r2_score(y_test, y_pred_ensemble)

# Save MAE and R2 score of the ensemble to a CSV file
ensemble_results = pd.DataFrame({"MAE": [mae_ensemble], "R2": [r2_ensemble]})
ensemble_results.to_csv("ensemble_results.csv", index=False)


In [None]:


y_pred.shape

(268, 1)