# Setup / Preparation

In [None]:
import sys
import platform

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn as sk
import torch

has_gpu = torch.cuda.is_available()
has_mps = torch.backends.mps.is_built()
custom_device = "mps" if has_mps else "cuda" if torch.cuda.is_available() else "cpu"

print(f"Python Platform: {platform.platform()}")
print(f"PyTorch Version: {torch.__version__}\n")
print(f"Python {sys.version}")
print(f"Pandas {pd.__version__}")
print(f"Numpy {np.__version__}")
print(f"Scikit-Learn {sk.__version__}")
print("NVIDIA/CUDA GPU is", "available" if has_gpu else "NOT AVAILABLE")
print("MPS (Apple Metal) is", "AVAILABLE" if has_mps else "NOT AVAILABLE")

print(f"\nCustome Device:\t{custom_device}")

# Option 1 on Mac (with Apple Silicon) is to use the CPU:
#torch.set_default_device("cpu") # <- setting it manually to "cpu"

# Option 2 on Mac (with Apple Silicon) is to use MPS:
torch.set_default_device(custom_device)

print(f"Active device:\t{torch.get_default_device()}")

# Testing

print("\nRun test:")
layer = torch.nn.Linear(20,30)
print(f"\tLayer weights are on device: {layer.weight.device}")
print(f"\tLayer creating data on device: {layer(torch.randn(128,20)).device}")

# Linear Regression

## Data (preparing and loading)

In [None]:
w = 0.7
b = 0.3

start = 0
end = 1
step = 0.02
X = torch.arange(start, end, step).unsqueeze(dim=1) # adds an extra dimension to the dataset
y = w * X + b

X[:5], y[:5]

## Splitting data (train, test)

In [None]:
train_split = int(0.8 * len(X))
X_train, y_train = X[:train_split], y[:train_split]
X_test, y_test = X[train_split:], y[train_split:]

print(
    f"Num of TRAINING features:\t{len(X_train)}\
    \nNum of TRAINING labels:\t\t{len(y_train)}\
    \nNum of TESTING features:\t{len(X_test)}\
    \nNum of TESTING labels:\t\t{len(y_test)}"
)

## Visualize data

In [None]:
def plot_predictions(train_data=X_train,
                     train_labels=y_train,
                     test_data=X_test,
                     test_labels=y_test,
                     predictions=None):
    """
    Plots training data, test data and compares predictions
    """
    to_np = lambda x: x.detach().cpu().numpy() if torch.is_tensor(x) else np.asarray(x)
    plt.figure(figsize=(5,3))

    # Plot training data in blue
    plt.scatter(to_np(train_data), to_np(train_labels), c="blue", s=4, label="Training data")

    #Plot test data in green
    plt.scatter(to_np(test_data), to_np(test_labels), c="green", s=4, label="Testing data")

    # Predictions
    if predictions is not None:
        plt.scatter(to_np(test_data), to_np(predictions), c="red", s=4, label="Predictions")
    plt.title("PREDICTIONS")
    plt.legend(prop={"size":8});

In [None]:
plot_predictions();

## Build a Linear Regression model

(An alternative approach is presented to the end of this notebook.)

In [None]:
class LinearRegressionModel(torch.nn.Module):
    """
    Class creates linear regresion models based torch.nn.Module from PyTorch.
    Params are set manually.
    """
    def __init__(self):
        super().__init__()

        # Initialization of model parameters used in various computations;
        # init with random values (= 1)
        # gradients will be tracked with .autograd and gradient descent
        # 32 bits float
        self.weights = torch.nn.Parameter(
            torch.randn(
                1,
                requires_grad=True,
                dtype=torch.float
            )
        )
        self.bias = torch.nn.Parameter(
            torch.randn(
                1,
                requires_grad=True,
                dtype=torch.float
            )
        )

    # Forward Propagation: method with x of type torch tensor for forward computation of model
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # returns result of linear regression function
        return self.weights * x + self.bias

In [None]:
# random seed
torch.manual_seed(42)

# instance of LinearRegressionModel class
model_0 = LinearRegressionModel()

params = list(model_0.parameters())
state = model_0.state_dict()

#print(params)
print(state)

for key, value in state.items():
    print(f"{key}:\n\t{value[0]}")

## Making Predictions

In [None]:
with torch.inference_mode(): # similar, but old-fashioned torch.no_grad()
    y_preds = model_0(X_test)

In [None]:
print(f"Predictions:\n\t{y_preds[:5]}")
print(f"Correct values:\n\t{y_test[:5]}")

In [None]:
plot_predictions(predictions=y_preds)

## Loss Function

Official PyTorch Documentation
* [Loss functions overview](https://docs.pytorch.org/docs/stable/nn.html#loss-functions)
* [Optimizer overview](https://docs.pytorch.org/docs/stable/optim.html)

In [None]:
# setting up loss function (L1 = MSE)
loss_function = torch.nn.L1Loss()

# setting up optimizer: default learning rate is 0.001
optimizer = torch.optim.SGD(
    params=model_0.parameters(),
    lr=0.01
) 

## Training Loops

In [None]:
epochs = 600

# Value tracking
epoch_count = []
loss_values = []
test_loss_values = []

for epoch in range(epochs):
    # flips model into training mode, which changes the behavior of certain layers
    # (like dropout layers and BatchNorm)
    model_0.train() 

    # I) Forward Propagation (compute model predictions on training data)
    y_preds = model_0(X_train)

    # II) Loss Calculation (compute difference between predictions and targets)
    loss = loss_function(y_preds, y_train)

    # III) Optimization (reset gradients from previous iteration)
    optimizer.zero_grad()

    # IV) Backpropagation (compute gradients of loss with respect to weights)
    loss.backward()

    # V) Gradient Descent (update model weights using computed gradients)
    optimizer.step()

    # VI) Testing
    model_0.eval()
    with torch.inference_mode():
        # 1) Forward Propagation in Inference Mode
        test_pred = model_0(X_test)

        # 2) Loss Calculation of Test Data
        test_loss = loss_function(test_pred, y_test)

    if (epoch + 1) % (epochs / 10) == 0:
        epoch_count.append(epoch)
        loss_values.append(loss)
        test_loss_values.append(test_loss)
        print(f"Epoch {epoch+1}/{epochs} | Loss: {loss.item():.4f} | Test Loss: {test_loss.item():.4f}")

# VII) state_dict() of model
state = model_0.state_dict()
print("\nstate_dict()")
for key, value in state.items():
    print(f"\t{key}:\n\t\t{value[0]:.4f}")

In [None]:
y_preds_new = model_0(X_test)

In [None]:
plot_predictions(predictions=y_preds_new)

In [None]:
train_losses = [
    lv.detach().cpu().item() if torch.is_tensor(lv) else float(lv)
    for lv in loss_values
]
test_losses = [
    tv.detach().cpu().item() if torch.is_tensor(tv) else float(tv)
    for tv in test_loss_values
]
plt.figure(figsize=(5,3))
plt.plot(epoch_count, train_losses, label="Train Loss")
plt.plot(epoch_count, test_losses,  label="Test Loss")
plt.title("TRAINING & TEST LOSS CURVES")
plt.xlabel("EPOCHS")
plt.ylabel("LOSS")
plt.legend(prop={"size":8})
plt.show()

## Saving Model

In [None]:
from pathlib import Path

MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)

MODEL_NAME = "01_pytorch_workflow_model_0.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

torch.save(obj=model_0.state_dict(),f=MODEL_SAVE_PATH)

In [None]:
!ls -l models/

## Loading Model

In [None]:
# What to expect after loading original model
model_0.state_dict()

In [None]:
# Instantiation of model class to load saved state_dict
loaded_model_0 = LinearRegressionModel()

# Loading of state_dict (updates instance with updated params)
loaded_model_0.load_state_dict(
    torch.load(f=MODEL_SAVE_PATH)
)

In [None]:
# Checking state_dict values of loaded model
loaded_model_0.state_dict()

In [None]:
# Comparing loaded model with original model
loaded_model_0.eval()
with torch.inference_mode():
    loaded_model_preds = loaded_model_0(X_test)

model_0.eval()
with torch.inference_mode():
    y_preds = model_0(X_test)

print(y_preds == loaded_model_preds)

# Extra

## Alternative class structure

(Compare to LinearRegressionModel class above)

In [None]:
class LinearRegressionModelV2(torch.nn.Module):
    def __init__(self):
        super().__init__()
        # With torch.nn.Linear() model params are created; the Linear class applies a linear transformation to the incoming data
        self.linear_layer = torch.nn.Linear(in_features=1,out_features=1)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.linear_layer(x)

## Init of alternative class

In [None]:
torch.manual_seed(42)
model_1 = LinearRegressionModelV2()
model_1, model_1.state_dict()

In [None]:
next(model_1.parameters()).device