### Logistic Regression
This file contains the code for a Logistic Regression ML algorithm implemented in Pytorch. The goal of this algorithm is to take input data and classify it in one of two categories. Often, this is used for yes/no type questions. Once trained on existing data, logistic regression can be used to classify new pieces of data instantly.

In [None]:
# Imports
import torch
import torch.nn as nn

import numpy as np
import random
import time

import matplotlib.pyplot as plt

from datasets import load_dataset

In [None]:
# Setting torch to use GPU acceleration if possible.
device = torch.device("cpu")

if torch.cuda.is_available():
    device = torch.device("cuda")

torch.set_default_device(device)
print(f"Using device: {torch.get_default_device()}")

In [None]:
# ====================== DATA COLLECTION ======================

In [None]:
# Generate dummy data using numpy. The data here is guaranteed to have some correlation.
DATA_COUNT = 1000
X_SCALE = 5
BOUNDARY = 3

X = np.random.rand(DATA_COUNT, 1) * X_SCALE
Y = (X > BOUNDARY).astype(int)

In [None]:
# Create the train and test splits.
TRAIN_SPLIT = 0.8

splitIndex = int(DATA_COUNT * TRAIN_SPLIT)

trainX = X[:splitIndex]
trainY = Y[:splitIndex]

testX = X[splitIndex:]
testY = Y[splitIndex:]

In [None]:
# ====================== MODEL CONSTRUCTION ======================

In [None]:
class LogisticRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(1, 1)

    def forward(self, x):
        y = self.linear(x)
        return torch.sigmoid(y)

In [None]:
# Train an agent.
LEARNING_RATE = 1e-3
EPOCHS = 10000
BATCH_SIZE = 64
REPORT_INTERVAL = 100

agent = LogisticRegressionModel()

allLosses = []
agent.train()

lossFN = nn.BCELoss()
optimizer = torch.optim.Adam(agent.parameters(), lr = LEARNING_RATE)

for epoch in range(1, EPOCHS + 1):
    currentLoss = 0
    
    agent.zero_grad() # Reset Gradients.

    # Create batches.
    batches = list(range(len(trainX)))
    random.shuffle(batches)
    batches = np.array_split(batches, len(batches) // BATCH_SIZE)

    # Run through the batches.
    for i, batch in enumerate(batches):
        batchLoss = 0

        for index in batch:
            x = trainX[index]
            y = trainY[index]

            output = agent(torch.from_numpy(x).float())
            loss = lossFN(output, torch.from_numpy(y).float())
            batchLoss += loss

        # Batch complete. Optimise parameters.
        batchLoss.backward()
        nn.utils.clip_grad_norm_(agent.parameters(), 3)
        optimizer.step()
        optimizer.zero_grad()
        currentLoss += batchLoss.item() / len(batch)

    allLosses.append(currentLoss)

    if epoch % REPORT_INTERVAL == 0:
        print(f"Epoch #{epoch}: Average batch loss - {allLosses[-1]}")

In [None]:
# ====================== Model Evaluation ======================

In [None]:
# Plotting the loss of the model.
plt.figure()
plt.plot(allLosses)
plt.show()

In [None]:
# Testing the model.
totalLoss = 0.0

for idx in range(len(testX)):
    x = testX[idx]
    y = testY[idx]

    output = agent(torch.from_numpy(x).float())
    totalLoss += abs((output - torch.from_numpy(y).float()).item())

averageLoss = totalLoss / len(testX)
print(f"Average Loss: {averageLoss}")