In [81]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score

# Data

In [82]:
datapath = "dataset/"

df = pd.read_csv(datapath + 'data.csv')

In [83]:
df.head()

Unnamed: 0,hPossesion,aPossesion,hshotsOnTarget,ashotsOnTarget,hshots,ashots,hfouls,afouls,hsaves,asaves,label
0,24,76,2,4,14,17,11,13,2,1,2
1,80,20,3,1,5,2,1,3,0,1,1
2,57,43,4,2,12,11,10,12,1,2,1
3,47,53,5,3,13,11,7,10,1,3,0
4,72,28,5,4,21,6,8,13,4,5,0


In [84]:
df.shape

(1272, 11)

# Nueral Net From Scratch

### Selected Data to Train

In [85]:
sample_df = df.sample(n=100, random_state=42)
sample_df.head()

Unnamed: 0,hPossesion,aPossesion,hshotsOnTarget,ashotsOnTarget,hshots,ashots,hfouls,afouls,hsaves,asaves,label
208,50,50,7,0,11,2,7,7,0,3,1
966,45,55,1,3,9,10,20,18,3,1,0
714,37,63,2,1,8,10,13,10,1,1,1
1192,64,36,15,3,20,6,15,13,1,7,0
584,28,72,0,5,3,22,12,4,3,0,2


In [86]:
train, test = np.split(sample_df.sample(frac=1, random_state=42), [int(0.8 * len(sample_df))])

train.shape, test.shape

  return bound(*args, **kwds)


((80, 11), (20, 11))

In [87]:
X = train.drop(columns=['label'])
y = train['label']

X_test = test.drop(columns=['label'])
y_test = test['label']

X.shape, y.shape

((80, 10), (80,))

## init weights

In [88]:
W = np.random.randn(10, 3)
W

array([[ 1.74309664, -1.31634789, -1.27157999],
       [-0.78829401, -0.74506356, -0.34445704],
       [-0.79847829, -0.15626524,  0.16488242],
       [-1.24779855, -0.17953347, -2.59314683],
       [ 0.06952611, -0.70690073,  0.40398383],
       [-0.29601795,  1.31828447,  0.09090467],
       [ 1.34418513, -1.06737633, -0.57267993],
       [-0.82199739, -0.37566125, -1.41810911],
       [-1.85040363,  1.83551714,  0.54407544],
       [ 1.74172506, -0.54731027,  0.13863231]])

In [89]:
W0 = np.random.randn(1, 3)
W0

array([[-0.79770891, -0.67167469, -2.34911904]])

## Forward Pass

In [90]:
# Forward pass
def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp_z / exp_z.sum(axis=1, keepdims=True)

## Original Evaluation

In [91]:
logits = (X @ W + W0).to_numpy()  # Convert to NumPy array to avoid pandas-specific issues
probs = softmax(logits)          # Predicted class probabilities
preds = np.argmax(probs, axis=1) # Class predictions (0, 1, or 2)

In [92]:
preds

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1])

In [93]:
accuracy = accuracy_score(y, preds)

print(f"Original Accuracy: {accuracy:.2f}")

Original Accuracy: 0.21


# Training

### Loss Fuction

In [94]:
def cross_entropy_loss(probs, y_true):
    n = y_true.shape[0]
    log_probs = -np.log(probs[range(n), y_true])
    return np.sum(log_probs) / n

### One-hot encoder
class 0 -> [1, 0, 0]

class 1 -> [0, 1, 0]

class 2 -> [0, 0, 1]


In [95]:
def one_hot(y, num_classes):
    return np.eye(num_classes)[y]

In [96]:
learning_rate = 0.1
epochs = 10000
num_classes = 3
n_samples, n_features = X.shape

for epoch in range(epochs):
    # Forward
    logits = np.dot(X, W) + dw0
    probs = softmax(logits)
    loss = cross_entropy_loss(probs, y)

    # Back Propagate
    y_onehot = one_hot(y, num_classes)
    dlogits = (probs - y_onehot) / n_samples
    dW = np.dot(X.T, dlogits)
    dw0 = np.sum(dlogits, axis=0, keepdims=True)

    # Update Weights
    W -= learning_rate * dW
    W0 -= learning_rate * dw0

    # Log
    if epoch % 1000 == 0:
        predictions = np.argmax(probs, axis=1)
        acc = np.mean(predictions == y)
        print(f"Epoch {epoch}, Loss: {loss:.4f}, Accuracy: {acc:.2f}")

Epoch 0, Loss: 129.9141, Accuracy: 0.21


Epoch 1000, Loss: 47.9710, Accuracy: 0.64
Epoch 2000, Loss: 78.1853, Accuracy: 0.64
Epoch 3000, Loss: 20.2618, Accuracy: 0.65
Epoch 4000, Loss: 11.4584, Accuracy: 0.74
Epoch 5000, Loss: 17.7257, Accuracy: 0.61
Epoch 6000, Loss: 67.6062, Accuracy: 0.61
Epoch 7000, Loss: 61.0441, Accuracy: 0.64
Epoch 8000, Loss: 24.6687, Accuracy: 0.51
Epoch 9000, Loss: 25.5388, Accuracy: 0.72
