In [1]:
import numpy as np
import pandas as pd

In [2]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

data = pd.read_csv('City_Types.csv')
columns_to_scale = list(data.select_dtypes(include='number').columns)
data['Date'] = pd.to_datetime(data['Date'])
data['month'] = data['Date'].dt.month
data['day'] = data['Date'].dt.day
data['weekday'] = data['Date'].dt.weekday
data = data.drop(columns=['Date'])
y = data['Type']
x = data.drop(columns=['Type'])
x = pd.get_dummies(x)
scaler = StandardScaler()
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1)
x_train[columns_to_scale] = scaler.fit_transform(x_train[columns_to_scale])
x_test[columns_to_scale] = scaler.transform(x_test[columns_to_scale])

In [7]:
def sigmoid(x):
    x = np.clip(x, -500, 500)
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return float(x > 0)

def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / np.sum(e_x)

def cross_entropy(pred):
    return -np.log(pred + 1e-9)

In [8]:
class Neuron:

    def __init__(self, nin: int, activation: str = 'relu'):
        self.activation = activation

        if activation == 'relu':
            # He initialization for ReLU
            limit = np.sqrt(2.0 / nin)
        else:
            # Xavier initialization for sigmoid/tanh
            limit = np.sqrt(6.0 / (nin + 1))

        self.weights = np.random.uniform(-limit, limit, nin).astype(np.float64)
        self.bias = np.random.normal()

        self.grads = np.zeros(nin, dtype=np.float64)
        self.gradb = np.float64(0.0)
        self.inputs = np.array([], dtype=np.float64)

        self.activ = 0
        self.delta = 0

    def __call__(self, x):
        interm = np.dot(self.weights, x) + self.bias
        self.inputs = np.array(x, dtype=np.float64)
        self.res = interm

        if self.activation == 'relu':
            res = relu(interm)
        else:  # sigmoid
            res = sigmoid(interm)

        self.activ = res
        return res

    def activation_derivative(self):
        if self.activation == 'relu':
            return relu_derivative(self.res)
        else:
            return sigmoid_derivative(self.activ)

    def __repr__(self):
        return f'Neuron(Weights:{self.weights}, Bias:{self.bias})'

class Layer:

    def __init__(self, nin: int, nout: int, activation: str = 'relu'):
        self.neurons = [Neuron(nin, activation=activation) for _ in range(nout)]
        self.activation = activation

    def __call__(self, x):
        res = np.array([n(x) for n in self.neurons])
        return res if len(res) == 2 else res

    def __repr__(self):
        return f'Layer: {[x for x in self.neurons]}'

    def parameters(self):
        return [x for x in self.neurons]

class MLP:

    def __init__(self, nin: int, nouts: list, activation: str = 'relu'):
        sz = [nin] + nouts
        self.nn = [Layer(sz[i], sz[i+1], activation=activation) for i in range(len(nouts))]
        self.hidden_activation = activation

    def __call__(self, x):
        for layer in self.nn:
            x = layer(x)
        return x

    def __repr__(self):
        return f'NN: {[x for x in self.nn]}'

    def parameters(self):
        return [p for x in self.nn for p in x.parameters()]

    def backprop(self, output: np.ndarray, pred: int):
        # Compute gradient for softmax + cross-entropy
        # The gradient is: predicted_probs - one_hot_true_label
        grad = output.copy()
        grad[pred] -= 1  # This is correct IF output contains the softmax probabilities

        # Backprop through output layer
        last_layer = self.nn[-1]
        for i, neuron in enumerate(last_layer.neurons):
            delta = grad[i]
            neuron.gradb = delta
            neuron.grads = delta * neuron.inputs
            neuron.delta = delta

        # Backprop through hidden layers
        for layer_idx in range(len(self.nn) - 2, -1, -1):
            layer = self.nn[layer_idx]
            layer_n = self.nn[layer_idx + 1]
            for i, neuron in enumerate(layer.neurons):
                downstream = sum(n.weights[i] * n.delta for n in layer_n.neurons)
                neuron.delta = downstream * neuron.activation_derivative()
                neuron.grads = neuron.delta * neuron.inputs
                neuron.gradb = neuron.delta

        # Gradient descent
        step = 0.01  # Changed from -0.01
        for neuron in self.parameters():
            neuron.weights -= step * neuron.grads  # Now we subtract (gradient descent)
            neuron.bias -= step * neuron.gradb

        # Reset gradients
        for neuron in self.parameters():
            neuron.grads = np.zeros_like(neuron.grads)
            neuron.gradb = 0.0

In [9]:
x_train = np.array(x_train)
y_train = np.array(y_train)
x_test = np.array(x_test)
y_test = np.array(y_test)

nin = len(x_train[0])
mlp = MLP(nin, [8, 2], activation='relu')

In [10]:
epochs = 50
losses = []
prediction = ['Industrial', 'Residential']

# Before training, reinitialize with smaller weights
for neuron in mlp.parameters():
    nin = len(neuron.weights)
    limit = np.sqrt(1.0 / nin)
    neuron.weights = np.random.uniform(-limit, limit, nin).astype(np.float64)
    neuron.bias = 0.0

for epoch in range(epochs):
    total_loss = 0
    correct = 0  # Track accuracy

    for x, y in zip(x_train, y_train):
        out = mlp(x)
        out = softmax(out)
        loss = cross_entropy(out[prediction.index(y)])
        total_loss += loss

        # Check if prediction is correct
        if np.argmax(out) == prediction.index(y):
            correct += 1

        mlp.backprop(out, prediction.index(y))

    avg_loss = total_loss / len(x_train)
    accuracy = correct / len(x_train)
    losses.append(avg_loss)

    if epoch % 10 == 0:  # Print every 10 epochs
        print(f"Epoch {epoch} | Loss: {avg_loss:.4f} | Accuracy: {accuracy:.4f}")

Epoch 0 | Loss: 0.0635 | Accuracy: 0.9767
Epoch 10 | Loss: 0.0000 | Accuracy: 1.0000
Epoch 20 | Loss: 0.0000 | Accuracy: 1.0000
Epoch 30 | Loss: 0.0000 | Accuracy: 1.0000
Epoch 40 | Loss: 0.0000 | Accuracy: 1.0000


In [11]:
def evaluate_model(mlp, x_test, y_test, prediction_classes):
    """
    Evaluate the model on test data

    Parameters:
    - mlp: trained MLP model
    - x_test: test features
    - y_test: test labels
    - prediction_classes: list of class names (e.g., ['Industrial', 'Residential'])

    Returns:
    - accuracy: test accuracy
    - predictions: list of predicted labels
    """
    correct = 0
    predictions = []
    test_loss = 0

    for x, y in zip(x_test, y_test):
        # Forward pass
        out = mlp(x)
        out = softmax(out)

        # Get prediction
        pred_idx = np.argmax(out)
        pred_label = prediction_classes[pred_idx]
        predictions.append(pred_label)

        # Calculate accuracy
        if pred_label == y:
            correct += 1

        # Calculate loss
        true_idx = prediction_classes.index(y)
        test_loss += cross_entropy(out[true_idx])

    accuracy = correct / len(x_test)
    avg_loss = test_loss / len(x_test)

    print(f"\n{'='*50}")
    print(f"TEST SET RESULTS")
    print(f"{'='*50}")
    print(f"Test Accuracy: {accuracy:.4f} ({correct}/{len(x_test)})")
    print(f"Test Loss: {avg_loss:.4f}")
    print(f"{'='*50}\n")

    # Confusion matrix
    from collections import Counter
    true_counts = Counter(y_test)
    pred_counts = Counter(predictions)

    print("True distribution:")
    for label, count in true_counts.items():
        print(f"  {label}: {count}")

    print("\nPredicted distribution:")
    for label, count in pred_counts.items():
        print(f"  {label}: {count}")

    # Calculate per-class accuracy
    print("\nPer-class accuracy:")
    for class_name in prediction_classes:
        class_correct = sum(1 for true, pred in zip(y_test, predictions)
                           if true == class_name and pred == class_name)
        class_total = sum(1 for true in y_test if true == class_name)
        if class_total > 0:
            class_acc = class_correct / class_total
            print(f"  {class_name}: {class_acc:.4f} ({class_correct}/{class_total})")

    return accuracy, predictions

# Use it like this:
accuracy, predictions = evaluate_model(mlp, x_test, y_test, prediction)


TEST SET RESULTS
Test Accuracy: 1.0000 (10541/10541)
Test Loss: 0.0000

True distribution:
  Residential: 5221
  Industrial: 5320

Predicted distribution:
  Residential: 5221
  Industrial: 5320

Per-class accuracy:
  Industrial: 1.0000 (5320/5320)
  Residential: 1.0000 (5221/5221)
