This is a copy of my [Pytorch-titanic Notebook](https://www.kaggle.com/code/declanmckenna/pytorch-titanic) but it will apply weight decay to our linear model. Skip to the end to see how to apply weight decay to a model. I've removed all the text prior to this section. Check out [the original notebook](https://www.kaggle.com/code/declanmckenna/pytorch-titanic) if you'd like to see how to create a model from scratch in Pytorch.

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

is_kaggle = "KAGGLE_WORKING_DIR" in os.environ or "/kaggle" in os.getcwd()
print("Running on Kaggle:", is_kaggle)

if is_kaggle:
    data_path = "/kaggle/input/titanic/"
else:
    data_path = os.getcwd() + "/"

In [None]:
import torch
np.set_printoptions(linewidth=140)
torch.set_printoptions(linewidth=140, sci_mode=False, edgeitems=7)
pd.set_option('display.width', 140)

In [None]:
df = pd.read_csv(data_path + "train.csv")
df

In [None]:
df.isna().sum()

In [None]:
modes = df.mode().iloc[0]
modes

In [None]:
df.fillna(modes, inplace=True)
df.isna().sum()

In [None]:
def substitue_na_with_modes(df: pd.DataFrame) -> pd.DataFrame:
    modes = df.mode().iloc[0]
    return df.fillna(modes)

In [None]:
df.describe(include=[object])

In [None]:
df.describe(include=[np.number])

In [None]:
df.Pclass.unique()

In [None]:
categorical_feature_names = ['Sex', 'Embarked', 'Pclass']
df = pd.get_dummies(df, columns=categorical_feature_names, dtype=int)
df.columns

In [None]:
dummy_column_names = ['Sex_male', 'Sex_female', 'Pclass_1', 'Pclass_2', 'Pclass_3', 'Embarked_C', 'Embarked_Q',
       'Embarked_S']
df[dummy_column_names].head()

In [None]:
def convert_categories_to_binary_values(df: pd.DataFrame) -> pd.DataFrame:
    categorical_feature_names = ['Sex', 'Embarked', 'Pclass']
    return pd.get_dummies(df, columns=categorical_feature_names, dtype=int)

In [None]:
import matplotlib
df.Fare.hist()

In [None]:
import math
df['LogFare'] = np.log(df['Fare'] + 1)
df.LogFare.hist()

In [None]:
from torch import tensor
target_tensor = tensor(df.Survived)
target_tensor

In [None]:
feature_names = ['Age', 'SibSp', 'Parch', 'LogFare'] + dummy_column_names
feature_df = df[feature_names]
feature_df

In [None]:
features = feature_df.values
feature_tensor = tensor(features, dtype=torch.float)
feature_tensor

In [None]:
max_values, max_indices = feature_tensor.max(dim=0)
max_values

In [None]:
feature_tensor = feature_tensor / max_values
feature_tensor

In [None]:
torch.manual_seed(442)
feature_count = feature_tensor.shape[1]
coefficients = torch.rand(feature_count) - 0.5
coefficients

In [None]:
weighted_values = feature_tensor * coefficients
weighted_values[:4]

In [None]:
predictions = weighted_values.sum(dim=1)
predictions[:10]

In [None]:
loss = torch.abs(predictions - target_tensor).mean()
loss

In [None]:
def create_predictions(features: torch.Tensor, coefficients: torch.Tensor) -> torch.Tensor:
    return (coefficients * features).sum(dim=1)

In [None]:
def calculate_loss(features: torch.Tensor, coefficients: torch.Tensor, targets: torch.Tensor) -> torch.Tensor:
    predictions = create_predictions(features, coefficients=coefficients)
    return torch.abs(predictions - targets).mean()

In [None]:
coefficients.requires_grad_()

In [None]:
loss = calculate_loss(feature_tensor, coefficients=coefficients, targets=target_tensor)
loss

In [None]:
loss.backward()
coefficients.grad

In [None]:
loss = calculate_loss(feature_tensor, coefficients=coefficients, targets=target_tensor)
loss.backward
with torch.no_grad():
    assert coefficients.grad is not None
    coefficients.sub_(coefficients.grad * 0.1)
    coefficients.grad.zero_()
    print(calculate_loss(feature_tensor, coefficients=coefficients, targets=target_tensor))

In [None]:
from random import Random
from numpy import int64
from fastai.data.transforms import RandomSplitter
from typing import Tuple, List, cast
from fastcore.foundation import L
from torch import Tensor

def split_data_with_fastai(df: pd.DataFrame) -> Tuple[Tensor,Tensor]:
    train_indices, validation_indices = RandomSplitter(seed=42)(df)
    return torch.tensor(train_indices, dtype=torch.int64), torch.tensor(validation_indices, dtype=torch.int64)

In [None]:
use_fastai_splitter = True
total_passengers = feature_tensor.size(0)
training_set_size = int(total_passengers * 0.8)

if use_fastai_splitter:
    train_indices, validation_indices = split_data_with_fastai(df)
else:
    randomized_indices = torch.randperm(total_passengers)
    train_indices = randomized_indices[:training_set_size]
    validation_indices = randomized_indices[training_set_size:]

training_features = feature_tensor[train_indices]
validation_features = feature_tensor[validation_indices]
training_targets = target_tensor[train_indices]
validation_targets = target_tensor[validation_indices]
len(training_features), len(validation_features)

In [None]:
def update_coefficients(coefficients, learning_rate):
    coefficients.sub_(coefficients.grad * learning_rate)
    coefficients.grad.zero_()

In [None]:
def one_epoch(coefficients, learning_rate):
    loss = calculate_loss(training_features, coefficients, training_targets)
    loss.backward()
    with torch.no_grad():
        update_coefficients(coefficients, learning_rate=learning_rate)
        
    print(f"{loss:.3f}", end="; ")

In [None]:
def generate_coefficients(features: torch.Tensor) -> torch.Tensor:
    coefficient_count = features.shape[1]
    coefficients = torch.rand(coefficient_count) - 0.5
    coefficients.requires_grad_()
    return coefficients

In [None]:
def train_model(epoch_count=30, learning_rate=0.1):
    coefficients = generate_coefficients(training_features)
    for i in range(epoch_count):
        one_epoch(coefficients, learning_rate=learning_rate)
    return coefficients

In [None]:
coefficients = train_model(epoch_count=18, learning_rate=0.2)
coefficients

In [None]:
def show_coeffs(): 
    coeff_array = [coeff.item() for coeff in coefficients]
    coeff_df = pd.DataFrame({'Feature': feature_names, 'Coefficient': coeff_array})
    display(coeff_df)
show_coeffs()

In [None]:
predictions = create_predictions(validation_features, coefficients=coefficients)
predictions[:10]

In [None]:
results = validation_targets.bool() == (predictions>0.5)
results.float().mean()

In [None]:
from torch import Tensor


def calculate_accuracy(coefficients, features: torch.Tensor) -> Tensor:
    predictions = create_predictions(features, coefficients=coefficients)
    results = validation_targets.bool() == (predictions>0.5)
    return results.float().mean()

In [None]:
import sympy
sympy.plot("1/(1+exp(-x))", xlim=(-5,5))

In [None]:
def create_predictions(features: torch.Tensor, coefficients: torch.Tensor) -> torch.Tensor:
    summed_weighted_values = (coefficients * features).sum(dim=1)
    return torch.sigmoid(summed_weighted_values)


In [None]:
coefficients = train_model(learning_rate=100)
calculate_accuracy(coefficients, features=validation_features)

## Weight Decay
Often our loss will go down but our validation loss will begin to increase. This is usually a sign of overfitting. One of those most basic ways to prevent overfitting is weight decay.

We add all the weights squared to our loss. This will hinder our training but helps prevent overfitting by forcing our weights to get smaller. Smaller weights mean less resolution in our models solutions, as demonstrated a solution that fits our training data too closely will over-fit

![overfitting-illustration](overfitting-example.webp)

Below is a simply implementation of weight decay

In [None]:
def calculate_loss(features: torch.Tensor, coefficients: torch.Tensor, targets: torch.Tensor, weight_decay: float) -> torch.Tensor:
    predictions = create_predictions(features, coefficients=coefficients)
    loss = torch.abs(predictions - targets).mean()
    wd_loss = loss + weight_decay * (coefficients ** 2).sum()
    return wd_loss

We also need to update the functions that call our calculate loss function to pass the weight decay value in. I've also updated our loss printing so we can see both the validation loss and the loss.

When your loss goes down but your validation loss goes up this is usually a sign of overfitting.

In [None]:
def one_epoch(coefficients, learning_rate, weight_decay:float):
    loss = calculate_loss(training_features, coefficients, training_targets, weight_decay)
    loss.backward()
    with torch.no_grad():
        update_coefficients(coefficients, learning_rate=learning_rate)
        validaton_loss = calculate_loss(validation_features, coefficients, validation_targets, weight_decay)
        
    print(f"loss: {loss:.3f}, val_loss: {validaton_loss}", end=";\n")
    
def train_model(epoch_count=30, learning_rate=0.1, weight_decay:float=0.0):
    coefficients = generate_coefficients(training_features)
    for i in range(epoch_count):
        one_epoch(coefficients, learning_rate=learning_rate, weight_decay=weight_decay)
    return coefficients

In [None]:
coefficients = train_model(learning_rate=100, weight_decay=0.001)
calculate_accuracy(coefficients, features=validation_features)

In this case our results got worse, adding weight decay will make your model train less accurately but it's a good weapon to have in cases where your model is overfitting, this simple linear model that has relevant engineered features isn't going to overfit so our weight decay implementation is merely an example rather than an improvement here.