In [13]:
# MUHAMMAD OAUN
# I21-2702

In [4]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [5]:
class SingleLayerNeuralNetwork:
    def __init__(self):
        self.parameters = None

    def SVM_LOSS(self, logits, y):
        diff = logits - logits[np.arange(y.shape[0]), y].reshape(-1, 1)
        diff[diff < 0] = 0
        loss = np.sum(diff)
        der = diff.copy()
        der[der > 0] = 1
        row_sum = np.sum(der, axis=1)
        row_sum = -1 * row_sum
        der[np.arange(y.shape[0]), y] = row_sum

        return loss, der

    def layers_size(self, X, y):
        input_layer = X.shape[1]
        output_layer = np.unique(y).shape[0]

        return input_layer, output_layer

    def init_parameters(self, input_layer, output_layer):
        W = np.random.randn(output_layer, input_layer) * 0.01
        b = np.zeros((output_layer, 1))

        self.parameters = {"W": W,
                            "b": b}


    def forward_propagation(self, X):
        W = self.parameters["W"]
        b = self.parameters["b"]

        print(W.shape)
        print(X.shape)
        print(b.shape)

        Logits = np.matmul(W, X.T) + b
        #Logits = self.softmax(Logits.T)

        return Logits.T
    
    def backward_propagation(self, logits, X, Y):
        _, der = self.SVM_LOSS(logits, Y)
        m = X.shape[0]  # Number of examples in the batch
        dW = np.dot(der.T, X) / m  # Compute the average gradient of weights
        #print(dW)
        db = np.sum(der, axis=0, keepdims=True) / m  # Compute the average gradient of biases
        #print(db)
        grads = {"dW": dW, "db": db.T}
        return grads

        
    def update_parameters(self, grads, learning_rate=1.2):
        W = self.parameters["W"]
        b = self.parameters["b"]

        dW = grads["dW"]
        db = grads["db"]

        #print(dW.shape)
        #print(W.shape)

        #print(db.shape)
        #print(b.shape)

        W = W - learning_rate * dW
        b = b - learning_rate * db

        #print(b)

        self.parameters = {"W": W,
                        "b": b}


    def fit(self, X, Y, num_iterations=10, learning_rate=1.2, print_cost=False):
        input_layer, output_layer = self.layers_size(X, Y)
        self.init_parameters(input_layer, output_layer)

        for i in range(num_iterations):
            Logits = self.forward_propagation(X)
            cost, der = self.SVM_LOSS(Logits, Y)

            grads = self.backward_propagation(Logits, X, Y)
            

            self.update_parameters(grads, learning_rate)

            if print_cost:
                print(f"Cost after iteration {i}: {cost}")
                #print(f"Grad after iteration {i}: {grads}")
                #print(f"Param after iteration {i}: {self.parameters['W'],self.parameters['b']}")


    
    def predict(self, X):
        logits = self.forward_propagation(X)
        #print(logits)
        predictions = np.argmax(logits, axis=1)
        return predictions

    def accuracy(self, X, y):
        predictions = self.predict(X)
        correct_predictions = np.sum(predictions == y)
        accuracy = correct_predictions / len(y) * 100
        return accuracy
    
    def print_weights(self):
        print("Weights:")
        print(self.parameters["W"])

    def print_biases(self):
        print("Biases:")
        print(self.parameters["b"])


## IRIS DATASET

In [6]:
iris = datasets.load_iris()
X = iris.data[:, :]
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)


In [7]:
model = SingleLayerNeuralNetwork()
model.fit(X_train, y_train, num_iterations=1000, learning_rate=0.2, print_cost=True)

(3, 4)
(112, 4)
(3, 1)
Cost after iteration 0: 10.8153325385089
(3, 4)
(112, 4)
(3, 1)
Cost after iteration 1: 1061.056228866568
(3, 4)
(112, 4)
(3, 1)
Cost after iteration 2: 266.2207537670678
(3, 4)
(112, 4)
(3, 1)
Cost after iteration 3: 569.0730950884132
(3, 4)
(112, 4)
(3, 1)
Cost after iteration 4: 422.22682803839626
(3, 4)
(112, 4)
(3, 1)
Cost after iteration 5: 177.67079560985667
(3, 4)
(112, 4)
(3, 1)
Cost after iteration 6: 627.510131609825
(3, 4)
(112, 4)
(3, 1)
Cost after iteration 7: 6.121791248858872
(3, 4)
(112, 4)
(3, 1)
Cost after iteration 8: 304.52575552972075
(3, 4)
(112, 4)
(3, 1)
Cost after iteration 9: 470.642259119181
(3, 4)
(112, 4)
(3, 1)
Cost after iteration 10: 247.61984868930003
(3, 4)
(112, 4)
(3, 1)
Cost after iteration 11: 133.19143607077007
(3, 4)
(112, 4)
(3, 1)
Cost after iteration 12: 335.4354519582922
(3, 4)
(112, 4)
(3, 1)
Cost after iteration 13: 412.15334840489527
(3, 4)
(112, 4)
(3, 1)
Cost after iteration 14: 264.34941008369714
(3, 4)
(112, 4)


In [8]:
pred = model.predict(X_test)
print(pred)
accuracy = model.accuracy(X_test, y_test)
print(f"Accuracy: {accuracy}%")
model.print_weights()
model.print_biases()

(3, 4)
(38, 4)
(3, 1)
[1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0 0 0 1 0 0 2 1
 0]
(3, 4)
(38, 4)
(3, 1)
Accuracy: 100.0%
Weights:
[[ 0.84300977  1.79430772 -2.41036618 -1.1247703 ]
 [ 0.73436511  0.28192335 -0.61957534 -1.19691508]
 [-1.58897777 -2.07903489  3.00063282  2.29579136]]
Biases:
[[ 0.37857143]
 [ 0.80178571]
 [-1.18035714]]


## WINE DATASET

In [6]:
from sklearn.datasets import load_wine

wine_data = load_wine()

In [7]:
from sklearn.utils import shuffle
X = wine_data.data[:, :]
y = wine_data.target
X,y = shuffle(X,y)

num_classes = len(np.unique(y))
print(num_classes)

3


In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [9]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [10]:
model_1 = SingleLayerNeuralNetwork()
model_1.fit(X_train, y_train, num_iterations=20, learning_rate=0.1, print_cost=True)

Cost after iteration 0: 4.915685574064403
Cost after iteration 1: 150.7123630363551
Cost after iteration 2: 58.96630768305625
Cost after iteration 3: 20.67228959329423
Cost after iteration 4: 5.6619810179947105
Cost after iteration 5: 0.8440026747391087
Cost after iteration 6: 0.0
Cost after iteration 7: 0.0
Cost after iteration 8: 0.0
Cost after iteration 9: 0.0
Cost after iteration 10: 0.0
Cost after iteration 11: 0.0
Cost after iteration 12: 0.0
Cost after iteration 13: 0.0
Cost after iteration 14: 0.0
Cost after iteration 15: 0.0
Cost after iteration 16: 0.0
Cost after iteration 17: 0.0
Cost after iteration 18: 0.0
Cost after iteration 19: 0.0


In [11]:
pred = model_1.predict(X_test)
print(pred)
print(pred.shape)
accuracy = model_1.accuracy(X_test, y_test)
print(f"Accuracy: {accuracy}%")
model_1.print_weights()
model_1.print_biases()

[2 0 2 0 0 1 0 2 1 2 2 2 2 2 2 2 1 2 0 2 0 1 2 1 0 2 1 1 2 1 1 1 0 1 1 1 2
 0 0 2 2 1 0 1 1]
(45,)
Accuracy: 91.11111111111111%
Weights:
[[ 4.00108593 -4.90803681  0.40744208 -6.80983041 -0.31425843  4.13065
   5.95672453 -3.20700494  1.83704085 -0.08267091  3.86014332  4.77624061
   7.20635975]
 [-7.01361599 -2.63930767 -4.44920775  1.33655055 -2.37432991  0.44734009
   1.38952815  0.43993809  1.73755153 -9.22958552  5.05058323  2.4230449
  -5.83205721]
 [ 3.00065607  7.51905722  4.07502216  5.49433117  2.68816105 -4.58363428
  -7.32471155  2.78874235 -3.57217485  9.29888546 -8.92640157 -7.20010211
  -1.40264839]]
Biases:
[[-0.3]
 [ 3. ]
 [-2.7]]


## MNIST DATASET

In [12]:
from sklearn.datasets import fetch_openml

mnist = fetch_openml('mnist_784', version=1)
X = mnist.data.astype('float32')
y = mnist.target.astype('int64')


X, y = shuffle(X, y, random_state=42)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [13]:
model_2 = SingleLayerNeuralNetwork()
model_2.fit(X_train, y_train, num_iterations=300, learning_rate=0.05, print_cost=True)


Cost after iteration 0: 63494.80920747011
Cost after iteration 1: 1012015904.9616638
Cost after iteration 2: 673834389.4892441
Cost after iteration 3: 519574556.26205075
Cost after iteration 4: 430716216.54915434
Cost after iteration 5: 373843109.44481367
Cost after iteration 6: 333488475.5425798
Cost after iteration 7: 303135380.7397265
Cost after iteration 8: 279141691.907535
Cost after iteration 9: 259604196.45626742
Cost after iteration 10: 243239717.6624964
Cost after iteration 11: 229350426.36659262
Cost after iteration 12: 217422667.82395172
Cost after iteration 13: 206886215.71591115
Cost after iteration 14: 197495194.52959386
Cost after iteration 15: 188996945.5321053
Cost after iteration 16: 181274082.98578715
Cost after iteration 17: 174228856.61889815
Cost after iteration 18: 167750888.44942194
Cost after iteration 19: 161823740.95490575
Cost after iteration 20: 156367571.53970617
Cost after iteration 21: 151281001.6277645
Cost after iteration 22: 146542096.59256023
Cost af

In [14]:
pred = model_2.predict(X_test)
print(pred[:25])
print(pred.shape)
accuracy = model_2.accuracy(X_test, y_test)
print(f"Accuracy: {accuracy}%")
model_2.print_weights()
model_2.print_biases()

[2 5 8 8 2 7 3 9 9 2 6 9 2 8 6 3 7 7 4 8 9 2 6 5 4]
(17500,)
Accuracy: 90.08571428571429%
Weights:
[[-0.00241931  0.02284115  0.02401911 ...  0.00556167  0.00946071
   0.00666538]
 [ 0.00799784  0.01886499 -0.00176263 ... -0.00212723  0.00739068
   0.00637852]
 [ 0.00297453 -0.00363587 -0.00919053 ...  0.0180557   0.00443084
   0.02002596]
 ...
 [-0.00043695  0.01819382 -0.0028485  ... -0.00079843  0.00840406
   0.00049359]
 [ 0.00349978 -0.0093987  -0.00577064 ...  0.00641164  0.00526581
   0.00245011]
 [-0.0008425  -0.00884144 -0.00493721 ...  0.0054911  -0.00091504
  -0.01235039]]
Biases:
[[-2081.05]
 [-2447.  ]
 [ 1990.3 ]
 [ 2012.05]
 [-1176.4 ]
 [ 2383.25]
 [-1649.45]
 [-1741.65]
 [ 2618.45]
 [   91.5 ]]


In [15]:
print(X_test.shape)
print(X_train.shape)

(17500, 784)
(52500, 784)


In [16]:
28*28

784

In [27]:
X_test.shape

(38, 4)

In [28]:

corr = np.sum(pred == y_test.reshape(-1, 1))
accuracy = correct_predictions / len(y_test) * 100

accuracy

NameError: name 'correct_predictions' is not defined

In [None]:
len(y_test)

38

In [None]:
pred

array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0], dtype=int64)

In [None]:
def softmax( z):
        exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)

array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1], dtype=int64)

In [None]:
corr = np.sum(pred == y_test.reshape(-1, 1))
corr

418

In [None]:
y_test

array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0])

In [None]:

def layers_size( X, y):
        input_layer = X.shape[1]
        output_layer = np.unique(y).shape[0]

        return input_layer, output_layer

def init_parameters( input_layer, output_layer):
    W = np.random.randn(output_layer, input_layer) * 0.01
    b = np.zeros((output_layer, 1))

    return {"W": W, "b": b}

def softmax( z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

def forward_propagation( X, parameters):
    W = parameters["W"]
    b = parameters["b"]

    Logits = np.matmul(W, X.T) + b
    Logits = softmax(Logits.T)

    return Logits

In [None]:
def SVM_LOSS( logits, y):

    diff = logits - logits[np.arange(y.shape[0]),y].reshape(-1,1)
    diff[diff < 0] = 0
    loss = np.sum(diff)
    der = diff.copy()
    der[der>0] = 1
    row_sum = np.sum(der,axis=1)
    row_sum = -1 * row_sum
    der[np.arange(y.shape[0]),y] = row_sum
    return loss, der

In [29]:
y.shape

(150,)

In [30]:
n_x = layers_size(X, y)[0]
n_y = layers_size(X, y)[1]
    
parameters = init_parameters(n_x, n_y)

A = forward_propagation(X,parameters)
print(A.shape)
logits = A
        
diff = logits - logits[np.arange(y.shape[0]),y].reshape(-1,1)
diff[diff < 0] = 0
loss = np.sum(diff)
der = diff.copy()
der[der>0] = 1
row_sum = np.sum(der,axis=1)
row_sum = -1 * row_sum
der[np.arange(y.shape[0]),y] = row_sum



NameError: name 'layers_size' is not defined

In [31]:
np.sum(der, axis=0, keepdims=True) / 112

NameError: name 'der' is not defined

In [32]:
np.sum(der,axis=0) 

NameError: name 'der' is not defined

In [199]:
logits[np.arange(y.shape[0]),y].reshape(-1,1)


array([[0.48184038],
       [0.48337662],
       [0.48338606],
       [0.48445518],
       [0.48198688],
       [0.48163853],
       [0.4839537 ],
       [0.48262567],
       [0.48517523],
       [0.48316758],
       [0.48079662],
       [0.48355758],
       [0.48338571],
       [0.48415562],
       [0.4781178 ],
       [0.47921173],
       [0.48050435],
       [0.48215538],
       [0.48052898],
       [0.48190621],
       [0.48189643],
       [0.48239881],
       [0.48214893],
       [0.48399159],
       [0.48440855],
       [0.48361975],
       [0.4835394 ],
       [0.48179987],
       [0.48169388],
       [0.48423702],
       [0.48409047],
       [0.48195928],
       [0.48041953],
       [0.47930146],
       [0.48348264],
       [0.48213006],
       [0.48026065],
       [0.48199597],
       [0.48471388],
       [0.48230155],
       [0.4821959 ],
       [0.48594843],
       [0.48435859],
       [0.48399194],
       [0.48335568],
       [0.48401587],
       [0.48187479],
       [0.483

In [208]:
logits - y.reshape(-1,1)

array([[ 0.48184038,  0.50384571,  0.47555898],
       [ 0.48337662,  0.50366969,  0.47828977],
       [ 0.48338606,  0.50357234,  0.47765146],
       [ 0.48445518,  0.50406794,  0.47838462],
       [ 0.48198688,  0.50392501,  0.47526019],
       [ 0.48163853,  0.50490579,  0.47392542],
       [ 0.4839537 ,  0.50405316,  0.47725565],
       [ 0.48262567,  0.50408536,  0.47625142],
       [ 0.48517523,  0.50377938,  0.47964892],
       [ 0.48316758,  0.50387568,  0.47751639],
       [ 0.48079662,  0.50410278,  0.4741191 ],
       [ 0.48355758,  0.5044043 ,  0.47664509],
       [ 0.48338571,  0.50360343,  0.47812819],
       [ 0.48415562,  0.50299305,  0.47889025],
       [ 0.4781178 ,  0.50335234,  0.47186653],
       [ 0.47921173,  0.50453842,  0.47093985],
       [ 0.48050435,  0.503882  ,  0.47376377],
       [ 0.48215538,  0.50394347,  0.47589708],
       [ 0.48052898,  0.50466574,  0.47353311],
       [ 0.48190621,  0.50434283,  0.47451146],
       [ 0.48189643,  0.50447126,  0.475

In [115]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [162]:
input_layer = X.shape[1]
output_layer = np.unique(y).shape[0]



W = np.random.randn(output_layer,input_layer) * 0.01
b = np.zeros((output_layer, 1))

print(W.shape)
print(X.shape)

print(b.shape)

parameters = {"W": W,
                "b": b}



W = parameters["W"]
b = parameters["b"]

Z = np.matmul(W, X.T) + b
A = sigmoid(Z.T)

print(A.shape)




(3, 4)
(150, 4)
(3, 1)
(150, 3)


In [163]:
A

array([[0.48502632, 0.51825599, 0.52607007],
       [0.48648305, 0.51661609, 0.52482756],
       [0.48630022, 0.51681645, 0.52406689],
       [0.48614099, 0.51672527, 0.52415038],
       [0.48481385, 0.51843166, 0.52582565],
       [0.48376116, 0.52079576, 0.52886857],
       [0.48592941, 0.51773928, 0.52445076],
       [0.48510928, 0.51801381, 0.52588194],
       [0.48703352, 0.51578759, 0.52295879],
       [0.48561933, 0.51674   , 0.5249579 ],
       [0.48407769, 0.5193021 , 0.52761288],
       [0.48497975, 0.51794729, 0.52544936],
       [0.48618665, 0.51619564, 0.52422639],
       [0.48719489, 0.5151984 , 0.52164975],
       [0.48377256, 0.52013723, 0.52852892],
       [0.48273279, 0.52224144, 0.52991904],
       [0.48472969, 0.52019189, 0.52778277],
       [0.48537899, 0.51856757, 0.52631898],
       [0.48350839, 0.52052614, 0.52956771],
       [0.48433048, 0.51957177, 0.52691343],
       [0.48439973, 0.5187509 , 0.52783303],
       [0.48495191, 0.51959891, 0.52705467],
       [0.

In [112]:
l = A

l[0,0]

0.5218844014174577

In [218]:
logits.shape

(150, 3)

In [239]:
y=y.reshape(-1,1)