In [22]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score, precision_score, recall_score
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

Plan for today is to talk about pytorch, next few ones we may talk about RNN and CNN. Aim is also to get us prepeared to do the project and presentation. Last year, people had an inssue with understanding attention.

We are using a famous open source dataset on the titanic

nn.Module  is needed, 

Pytorch requires models to be made as a class. need to define two functions: __init__ and forward, bare minimum, and then you can get metrics like in numpy

In [26]:
class SimpleLogisticRegression(nn.Module):
    def __init__(self, input_size, output_size):
        super(SimpleLogisticRegression, self).__init__()
        self.fc = nn.Linear(input_size, output_size) #weights are built into linear, this is like one layer of a NN
#for many layers you may habve self.fc1, self.fc3
    def forward(self, x):
        return self.fc(x) #this means we just pass it through the linear thing defined above 
    #if 3 lyaers: 
        #x = self.fc1(x)
        #x = self.fc2(x) 
        #x = self.fc3(x) 
        #return x? 

In [24]:
df = pd.read_csv("https://web.stanford.edu/class/archive/cs/cs109/cs109.1166/stuff/titanic.csv")
X, y1 = df.drop(columns=["Survived", "Name"]), df["Survived"]

numerical_cols = ['Age', 'Siblings/Spouses Aboard', 'Parents/Children Aboard', 'Fare']
categorical_cols = ['Sex', 'Pclass']

# Add random missing values
X.loc[X.sample(frac=0.02).index, numerical_cols] = np.nan
X.isnull().sum()

Pclass                      0
Sex                         0
Age                        18
Siblings/Spouses Aboard    18
Parents/Children Aboard    18
Fare                       18
dtype: int64

### Column Transformer
- ColumnTransformer applies transformers to columns of an array or pandas DataFrame.
- This estimator allows different columns or column subsets of the input to be transformed separately and the features generated by each transformer will be concatenated to form a single feature space.
- This is useful for heterogeneous or columnar data, to combine several feature extraction mechanisms or transformations into a single transformer.

In [25]:
# Fill Missing Values with Mean followed by Standard Scaling for Numerical Columns
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')), #are other default strategries that can be used in place of mean
    ('scaler', StandardScaler())
])
# has to be done in this order, steps is a list, pipeline is a scikit learn thing allows for a list of transformation 

# Fill Missing Values with Most Frequent followed by One Hot Encoding for Categorical Columns
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Combine the above two transformers into a single preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])
#condenses all the transformers, num and cat is just for identification third columns is  the list of the names of the columns. you never have to split the dataset 
X_train, X_test, y_train, y_test = train_test_split(X, y1, test_size=0.2, random_state=42)

X_train = preprocessor.fit_transform(X_train) #this makes it so there is only one fit_transform
X_test = preprocessor.transform(X_test)
np.isnan(X_train).sum(), np.isnan(X_test).sum()
preprocessor

## Introduction to PyTorch

- [PyTorch Docs](https://pytorch.org/tutorials/beginner/basics/intro.html)
- 

In [7]:
# Create a tensor from a (nested) list
a = torch.Tensor([[1, 2], [3, 4]])
print(a)

# this is just a higher dimensional array, nothing like the mathematical sense 


tensor([[1., 2.],
        [3., 4.]])


In [8]:
# Create a tensor with random values between 0 and 1 with the shape [2, 3, 4]
a = torch.rand(2, 3, 4) 
print(a)

tensor([[[0.9208, 0.0612, 0.1354, 0.6689],
         [0.9406, 0.2642, 0.2743, 0.8919],
         [0.0266, 0.5866, 0.9026, 0.0477]],

        [[0.4677, 0.0021, 0.0163, 0.7308],
         [0.5334, 0.6839, 0.8417, 0.2199],
         [0.6173, 0.9684, 0.4498, 0.2567]]])


In [9]:
shape = a.shape
print("Shape:", a.shape)

size = a.size() #alternate function to .shape
print("Size:", size)

dim1, dim2, dim3 = a.size()
print("Size:", dim1, dim2, dim3)

Shape: torch.Size([2, 3, 4])
Size: torch.Size([2, 3, 4])
Size: 2 3 4


### Numpy -> PyTorch and vice versa

In [10]:
np_arr = np.array([[1, 2], [3, 4]])
tensor = torch.from_numpy(np_arr) #you can also do torch.tensor(np_arry) but TA isnt sure if there would be anny issues, he norm does torch.tensor

print("Numpy array:", np_arr)
print("PyTorch tensor:", tensor)

Numpy array: [[1 2]
 [3 4]]
PyTorch tensor: tensor([[1, 2],
        [3, 4]])


In [11]:
tensor = torch.arange(4)
np_arr = tensor.numpy() #METRICS FOR SCIKIT LEARN ONLY USE NUMPY ARRAYS SO PRETTY IMPORTANT TO KNOW HOW TO GO BACK TO NUMPY ARRAY

print("PyTorch tensor:", tensor)
print("Numpy array:", np_arr)

PyTorch tensor: tensor([0, 1, 2, 3])
Numpy array: [0 1 2 3]


### Basic Operations

In [12]:
x1 = torch.rand(2, 3)
x2 = torch.rand(2, 3)
y1 = x1 + x2

print("X1", x1)
print("X2", x2)
print("Y", y1)

X1 tensor([[0.2641, 0.3510, 0.4234],
        [0.0588, 0.7163, 0.9811]])
X2 tensor([[0.7793, 0.3246, 0.1952],
        [0.2289, 0.3806, 0.9233]])
Y tensor([[1.0434, 0.6756, 0.6187],
        [0.2876, 1.0969, 1.9044]])


In [13]:
a = torch.arange(6)
print("X", a)

X tensor([0, 1, 2, 3, 4, 5])


In [14]:
torch.reshape(a, (2, 3))

tensor([[0, 1, 2],
        [3, 4, 5]])

In [15]:
W = torch.arange(9).view(3, 3) # We can also stack multiple operations in a single line
print("W", W)

W tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])


In [16]:
print(W[:, 1])   # Second column
print(W[0])      # First row
print(W[:2, -1]) # First two rows, last column
print(W[1:3, :]) # Middle two rows

tensor([1, 4, 7])
tensor([0, 1, 2])
tensor([2, 5])
tensor([[3, 4, 5],
        [6, 7, 8]])


HIGHLY recomends creating a NN from scratch from numpy,  will be long and slwow but very good learning experience, pytorch was created by the best ML people and its been optimise to work fast 

Now, let's convert our numpy/pandas array to Torch tensor.

In [17]:
X_train, X_test = torch.tensor(X_train, dtype=torch.float32), torch.tensor(X_test, dtype=torch.float32) #Just use float 32
y_train, y_test = torch.from_numpy(y_train.values), torch.from_numpy(y_test.values)

In [18]:
def compute_acc_metrics(y_true, y_pred):
    acc = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='weighted')
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    return {"Accuracy": acc, "F1 Score": f1, "Precision": precision, "Recall": recall}

In [19]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #pytorch can run on gpu, cuda is for nvida gpu, what is it for AMD ?
#Doesnt think pytorch supports AMD, is this why AMD doesnt have raytracing? 
num_classes = len(y1.unique())

# Initialize model, optimiser and loss function
model = SimpleLogisticRegression(X_train.shape[1], num_classes).to(device) #to(device) takes form cpu to gpu
optimiser = optim.Adam(model.parameters(), lr=0.01) #need to do this, idk who adam is but he is useful 
loss_fn = nn.CrossEntropyLoss().to(device) #generalised verlision of negative loglikilyhood , in loglikelyhood y can only be 0 or 1 
#For crossentropyloss, it expects the output to be softmaxed , will calculate soft max within it 


for epoch in range(100):
    # Transfer data to `device`
    inputs, labels = X_train.to(device), y_train.to(device) #takes from cpu to gpu
    
    # Forward pass
    outputs = model(inputs) #have not applied sigmoid function yet 
#google lossmxtrick pytorch, you dont have to explicitly call softmax, look at documentation for crossentroyloss to be extra certain
    # Compute loss
    loss = loss_fn(outputs, labels)

    # Compute gradients
    loss.backward() #change wieght based on loss function
    # Update weights
    optimiser.step() #this updates the weights  

    #LOOK UP ADAM OPTIMIZER, there is a paper on it  3 extra steps compared to gradient decent 
    # Reset gradients for next iteration
    optimiser.zero_grad(set_to_none=True) #sets to zero for next step  because internatlly python  gradient += gradient, there is use but not sure why 

    # Compute metrics    
    preds = F.softmax(outputs, dim=1).argmax(dim=1)
    train_metrics = compute_acc_metrics(labels, preds)
    
    
    if epoch % 10 == 0:
        print(f"Epoch {epoch} - Loss: {loss}")
        print(train_metrics)

        #this doesnt get into fine tuning for a model 

Epoch 0 - Loss: 1.6687376499176025
{'Accuracy': 0.25811001410437234, 'F1 Score': np.float64(0.24330861303738108), 'Precision': np.float64(0.5826803883327821), 'Recall': np.float64(0.25811001410437234)}
Epoch 10 - Loss: 1.2718100547790527
{'Accuracy': 0.7320169252468265, 'F1 Score': np.float64(0.7439613380226348), 'Precision': np.float64(0.7747922943695555), 'Recall': np.float64(0.7320169252468265)}
Epoch 20 - Loss: 0.9899502992630005
{'Accuracy': 0.7997179125528914, 'F1 Score': np.float64(0.8007707624657762), 'Precision': np.float64(0.8028659851343101), 'Recall': np.float64(0.7997179125528914)}
Epoch 30 - Loss: 0.8081504702568054
{'Accuracy': 0.8081805359661495, 'F1 Score': np.float64(0.8070427588818738), 'Precision': np.float64(0.8067639753394338), 'Recall': np.float64(0.8081805359661495)}
Epoch 40 - Loss: 0.6956546306610107
{'Accuracy': 0.8124118476727785, 'F1 Score': np.float64(0.8105712770958997), 'Precision': np.float64(0.8108643614160912), 'Recall': np.float64(0.8124118476727785)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize