In [70]:
import torch 
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
from torch.nn.functional import cross_entropy 
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split


In [71]:
class Linear_Classification:

    # initialize weights and biases
    def __init__(self,input_dim,output_dim, device='cpu', learning_rate=0.01):
        self.learning_rate = learning_rate
        self.device = device
        self.W = torch.randn(input_dim, output_dim, device=device, dtype=torch.float32)*0.01
        self.b = torch.zeros(output_dim, device=device, dtype=torch.float32)
         
    # forward pass
    def forward(self,X):
        X = torch.tensor(X, dtype=torch.float32)
        logits = torch.matmul(X, self.W) + self.b   # shape (N, C)
        return logits
    
    
    # Let's implement logsumexp trick for numerical stability
    def softmax(self,z):
        shifted = z - torch.max(z,dim=1, keepdims=True).values
        exp_scores = torch.exp(shifted)
        probs = exp_scores / torch.sum(exp_scores, dim=1, keepdims=True)
        return probs
    
    # compute loss using cross-entropy
    def compute_loss(self,probs,y_true):
        
        N = y_true.shape[0]
        
        correct_logprobs = -torch.log(probs[torch.arange(N), y_true])
        loss = torch.sum(correct_logprobs) / N
        return loss
        
    
    # Prediction function
    def predict(self,X):   
        logits = self.forward(X)
        probs = self.softmax(logits)
        return torch.argmax(probs, dim=1).cpu().numpy()
        
        
    def backward(self, X, y_true):
        
        y_true = torch.tensor(y_true, dtype=torch.long, device=self.device)

        N = X.shape[0]

        logits = self.forward(X)
        probs = self.softmax(logits)     # torch softmax

        # One-hot encode
        y_onehot = torch.zeros_like(probs)
        y_onehot[torch.arange(N), y_true] = 1

        dL_dlogits = (probs - y_onehot) / N

        dL_dW = X.T @ dL_dlogits
        dL_db = torch.sum(dL_dlogits, axis=0)
        
        with torch.no_grad():
        
            self.W -= self.learning_rate * dL_dW
            self.b -= self.learning_rate * dL_db
        
        
    

In [72]:
X, y = make_classification(n_samples=1000, n_features=10, n_classes=3, n_informative=5, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)


In [75]:
input_dim = X_train.shape[1]
output_dim = len(torch.unique(y_train))
model = Linear_Classification(input_dim, output_dim, learning_rate=0.001)
print("Initial Weights : ",model.W)

epochs = 10000
for epoch in range(epochs):

    logits = model.forward(X_train)
    probs = model.softmax(logits)
    loss = cross_entropy(probs, y_train)
    model.backward(X_train, y_train)

    if (epoch+1) % 100 == 0:
        print(f'Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}')
print(model.W)
# Evaluate the mode

Initial Weights :  tensor([[-0.0048, -0.0203, -0.0056],
        [-0.0046, -0.0006,  0.0080],
        [ 0.0024,  0.0024,  0.0053],
        [-0.0072, -0.0137,  0.0253],
        [ 0.0028,  0.0067,  0.0003],
        [-0.0048,  0.0013,  0.0048],
        [ 0.0007,  0.0055,  0.0036],
        [-0.0012, -0.0014, -0.0126],
        [ 0.0109, -0.0060, -0.0065],
        [ 0.0129, -0.0060, -0.0170]])


  X = torch.tensor(X, dtype=torch.float32)
  y_true = torch.tensor(y_true, dtype=torch.long, device=self.device)


Epoch 100/10000, Loss: 1.0738
Epoch 200/10000, Loss: 1.0566
Epoch 300/10000, Loss: 1.0425
Epoch 400/10000, Loss: 1.0307
Epoch 500/10000, Loss: 1.0206
Epoch 600/10000, Loss: 1.0119
Epoch 700/10000, Loss: 1.0043
Epoch 800/10000, Loss: 0.9976
Epoch 900/10000, Loss: 0.9917
Epoch 1000/10000, Loss: 0.9863
Epoch 1100/10000, Loss: 0.9815
Epoch 1200/10000, Loss: 0.9772
Epoch 1300/10000, Loss: 0.9733
Epoch 1400/10000, Loss: 0.9696
Epoch 1500/10000, Loss: 0.9663
Epoch 1600/10000, Loss: 0.9633
Epoch 1700/10000, Loss: 0.9605
Epoch 1800/10000, Loss: 0.9578
Epoch 1900/10000, Loss: 0.9554
Epoch 2000/10000, Loss: 0.9531
Epoch 2100/10000, Loss: 0.9510
Epoch 2200/10000, Loss: 0.9490
Epoch 2300/10000, Loss: 0.9472
Epoch 2400/10000, Loss: 0.9454
Epoch 2500/10000, Loss: 0.9437
Epoch 2600/10000, Loss: 0.9422
Epoch 2700/10000, Loss: 0.9407
Epoch 2800/10000, Loss: 0.9393
Epoch 2900/10000, Loss: 0.9380
Epoch 3000/10000, Loss: 0.9367
Epoch 3100/10000, Loss: 0.9355
Epoch 3200/10000, Loss: 0.9344
Epoch 3300/10000,