In [35]:
import numpy as np
from sklearn.datasets import fetch_openml
import joblib

x, y = fetch_openml('mnist_784', version=1, return_X_y=True, as_frame= False)

y = y.reshape(-1,1).astype(int)

data = np.hstack((y, x))

np.random.seed(42)
np.random.shuffle(data)

datatrain = data[:50000]
datadev = data[50000:60000]
datatest = data[60000:]

print(datatrain.shape, datadev.shape, datatest.shape)

def feature(df):
    return np.hstack((np.ones((df.shape[0],1)), df[:,1:] / 255))

def target(data):
    y = data[:,0].reshape(-1).astype(int)
    y = np.eye(np.max(y) + 1)[y]
    return y

xtrain, xdev, xtest = tuple(feature(skup) for skup in [datatrain, datadev, datatest])

ytrain, ydev, ytest = tuple(target(skup) for skup in [datatrain, datadev, datatest])
print(xtrain.max())
xtrain.shape, xdev.shape, xtest.shape, ytrain.shape, ydev.shape, ytest.shape

(50000, 785) (10000, 785) (10000, 785)
1.0


((50000, 785),
 (10000, 785),
 (10000, 785),
 (50000, 10),
 (10000, 10),
 (10000, 10))

In [2]:
ytrain

array([[0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 1., 0., 0.]])

In [84]:
class Logit():
    
    def __init__(self, lr=0.1, regularizacija='', reg=0.01, maxIter=None, nIter = None):      
        '''Za regularizaciju uneti string 'l1' ili 'l2'
        reg je regularizacioni parametar lambda 
        learning rate: pocetna brzina ucenja (polovi se na svakih 1000 iteracija) '''
        
        self.lr = lr
        self.regularizacija = regularizacija
        self.reg = reg
        self.maxIter = maxIter
        self.preciznostTrain= None
        self.preciznostDev = None
        self.historyW = []


    def sigmoid(self, x, w = None):
        if w is None:
            w = self.w
        z = x @ w
        z = np.clip(z, -500, 500)
        p = 1 / (1 + np.exp(-z))
        return p

    def softmax(self,x, w = None):
        if w is None:
            w = self.w
        z = x @ w
        z -= np.max(z, axis = 1, keepdims = True)
        p = np.exp(z) / np.sum(np.exp(z), axis = 1, keepdims = True)
        return p
    
    def predict(self, x, y =None):
        """Racuna binarne predikcije za ulazne podatke x.
        Ako je prosleđen y, vraća dvojku (preciznost, predikcije)."""

        if self.aktivacija == 'sigmoid':
            p = self.sigmoid(x)
            pred = (p > 0.5).astype(int)
            if y is not None:
                return (np.mean(pred == y), pred)
            else:
                return pred

        elif self.aktivacija == 'softmax':
            p = self.softmax(x)
            pred = np.argmax(p, axis=1)
            if y is not None:
                return (np.mean(pred == np.argmax(y, axis=1)), pred)
            else:
                return pred
        
    def predictProba(self,x):
        """Vraca predikcije verovatnoca za ulazne podatke x. """
        
        if self.aktivacija == 'sigmoid':
            return self.sigmoid(x)
            
        elif self.aktivacija == 'softmax':
            return self.softmax(x)
        
    def fit(self, x, y, xdev, ydev, randomState = 42, aktivacija = 'sigmoid'):
        """Treniranje modela koristeći grupni gradijentni spust (batch gradient descent).
        Funkcija prati preciznost na trening i dev skupu.
        Svakih 100 iteracija se čuvaju trenutne težine u self.historyW.
        Ako preciznost na dev skupu opadne u odnosu na 
        4 evaluacije unazad, smatra se da je dostigao plato i
        model se vraca na težine iz te iteracije i vraca
        dvojku (preciznost na trening skupu, preciznost na dev skupu)."""
        
        m, n = x.shape
        np.random.seed(randomState)
        self.w = np.random.rand(n,y.shape[1]) - .5
        trainscore = []
        devscore = [0 for _ in range(4)]
        i = 0
        lr = self.lr
        epsilon=.0000001
        self.aktivacija = aktivacija
        
        while True:
            z = x @ self.w
            
            if self.aktivacija == 'sigmoid':
                p = self.sigmoid(x)
                gradijenti = (x.T @ (p - y)) / m
                l = -np.mean(y * np.log(p + epsilon) + (1 - y) * np.log(1 - p + epsilon))                
                
            elif self.aktivacija == 'softmax':
                z -= np.max(z, axis = 1, keepdims = True)
                p = np.exp(z) / np.sum(np.exp(z), axis = 1, keepdims = True)
                gradijenti = (x.T @ (p - y)) / m
                l = -np.mean(np.sum(y * np.log(p + epsilon), axis=1))
                
            else:
                raise ValueError("Aktivacija mora biti 'sigmoid' ili 'softmax'")

            
            if self.regularizacija.lower() == 'l1':
                l += (self.reg / m) * np.sum(np.abs(self.w[1:]))
                gradijenti[1:] += (self.reg / m) * np.sign(self.w[1:])
            elif self.regularizacija.lower() == 'l2':
                l += (self.reg / (2 * m)) * np.sum(np.square(self.w[1:]))
                gradijenti[1:] += (self.reg / m) * self.w[1:]

            self.w -= lr * gradijenti
            
            if i % 100 == 0:
                self.historyW.append(self.w.copy())
                preciznostTrain, _ = self.predict(x,y)
                trainscore.append(preciznostTrain)
                
                preciznostDev, _ = self.predict(xdev, ydev)
                devscore.append(preciznostDev)
            
            if i > 300 and (devscore[-1] - devscore[-5]) <= 0:
                print(f"Optimalni parametri su iz {i-300} iteracije")
                self.w = self.historyW[-4]
                break
            
            if self.maxIter is not None and i >= self.maxIter:
                print(f"Maksimalan broj iteracija ({self.maxIter}) dostignut.")
                break
            
            if i % 1000 == 0 and i > 0:
                lr *= 0.5
                print(f"Learning rate: {lr}, iteracija {i}")
                

            i += 1
        self.nIter = i - 300
        self.preciznostTrain, _ = self.predict(x,y)
        print("Preciznost na trening setu:", self.preciznostTrain)

        self.preciznostDev, _ = self.predict(xdev,ydev)
        print("Preciznost na dev setu:", self.preciznostDev)
        
        return self.preciznostTrain, self.preciznostDev

    
    def fitReg(self, x, y, xdev, ydev, listaRegularizacije):
        '''Fituje model za svaki parametar lambda iz liste, cuva rezultate na dev skupu
        na kraju fituje model sa lokalno optimalnim lambda parametrom i vraca recnik
        {lambda : rezultat na dev skupu}'''

        rezultati = []
        for i in range(len(listaRegularizacije)):
            self.reg = listaRegularizacije[i]
            _, devScore = self.fit(x, y, xdev, ydev)
            rezultati.append(devScore)
            
        self.w = self.historyW[np.argmax(rezultati)]
        self.reg = listaRegularizacije[np.argmax(rezultati)]
        self.fit(x, y, xdev, ydev)
        return {self.reg : rezultati[np.argmax(rezultati)]}

In [4]:
np.random.seed(42)
w = np.random.rand(xtrain.shape[1],y.shape[1]) - .5

In [5]:
model = Logit()

In [8]:
model.fit(xtrain[:1000],ytrain[:1000], xdev[:10000], ydev[:10000], aktivacija='softmax')

Learning rate: 0.05, iteracija 1000
Optimalni parametri su iz 1500 iteracije
Preciznost na trening setu: 0.978
Preciznost na dev setu: 0.8395


(np.float64(0.978), np.float64(0.8395))

In [25]:
x, y = fetch_openml('mnist_784', version=1, return_X_y=True, as_frame= False)

y = y.astype(int).reshape(-1,1)
y = np.where(y == 5, 1, 0)

y = y.reshape(-1,1)

data = np.hstack((y, x))

np.random.seed(42)
np.random.shuffle(data)

datatrain = data[:50000]
datadev = data[50000:60000]
datatest = data[60000:]

print(datatrain.shape, datadev.shape, datatest.shape)

def feature(df):
    return np.hstack((np.ones((df.shape[0],1)), df[:,1:] / 255))

def target(data):
    y = data[:,0].reshape(-1,1)
    return y

xtrain, xdev, xtest = tuple(feature(skup) for skup in [datatrain, datadev, datatest])

ytrain, ydev, ytest = tuple(target(skup) for skup in [datatrain, datadev, datatest])
print(xtrain.max())
xtrain.shape, xdev.shape, xtest.shape, ytrain.shape, ydev.shape, ytest.shape

(50000, 785) (10000, 785) (10000, 785)
1.0


((50000, 785), (10000, 785), (10000, 785), (50000, 1), (10000, 1), (10000, 1))

In [15]:
y

array([[1],
       [0],
       [0],
       ...,
       [0],
       [1],
       [0]])

In [66]:
model = Logit()

In [17]:
model.fit(xtrain[:1000],ytrain[:1000], xdev[:10000], ydev[:10000], aktivacija='sigmoid')

Learning rate: 0.05, iteracija 1000
Optimalni parametri su iz 1500 iteracije
Preciznost na trening setu: 0.983
Preciznost na dev setu: 0.9553


(np.float64(0.983), np.float64(0.9553))

In [18]:
class NeuralnaMreza(Logit):
    def __init__(self, slojevi, lr=0.1, regularizacija='', reg=0.01, maxIter=10000):
        """slojevi: prima listu"""
        super().__init__(lr=lr, regularizacija=regularizacija, reg=reg, maxIter=maxIter)
        self.slojevi = slojevi
        self.weights = []  # Lista težina za svaki sloj
        for i in range(len(slojevi) - 1):
            W = np.random.randn(slojevi[i], slojevi[i+1]) * 0.01
            self.weights.append(W)
            

In [75]:
import numpy as np
from sklearn.datasets import fetch_openml
import joblib

x, y = fetch_openml('mnist_784', version=1, return_X_y=True, as_frame= False)

y = y.reshape(-1,1).astype(int)

data = np.hstack((y, x))

np.random.seed(42)
np.random.shuffle(data)

datatrain = data[:50000]
datadev = data[50000:60000]
datatest = data[60000:]

print(datatrain.shape, datadev.shape, datatest.shape)

def feature(df):
    return np.hstack((np.ones((df.shape[0],1)), df[:,1:] / 255))

def target(data):
    y = data[:,0].reshape(-1).astype(int)
    y = np.eye(np.max(y) + 1)[y]
    return y

xtrain, xdev, xtest = tuple(feature(skup) for skup in [datatrain, datadev, datatest])

ytrain, ydev, ytest = tuple(target(skup) for skup in [datatrain, datadev, datatest])
print(xtrain.max())
xtrain.shape, xdev.shape, xtest.shape, ytrain.shape, ydev.shape, ytest.shape

(50000, 785) (10000, 785) (10000, 785)
1.0


((50000, 785),
 (10000, 785),
 (10000, 785),
 (50000, 10),
 (10000, 10),
 (10000, 10))

In [85]:
logit = Logit()

In [76]:
xtrain, ytrain = xtrain[:1000,1:], ytrain[:1000]

In [86]:
xtrain.shape, ytrain.shape

((1000, 784), (1000, 10))

In [87]:
logit.w =  np.random.rand(xtrain.shape[1],100) - .5
logit.hidden = logit.sigmoid(xtrain)

In [88]:
logit.hidden.shape

(1000, 100)

In [89]:
logit.w = np.random.rand(100,10) - .5
logit.softmax(logit.hidden).shape

(1000, 10)

In [90]:
y.shape[1]

1

In [100]:
def forwardPass(x, logit, hidden_dim=100, output_dim=10, randomState=42):
    np.random.seed(randomState)
    w1 = np.random.rand(x.shape[1], hidden_dim) - 0.5
    
    sloj1 = logit.sigmoid(x,w1)

    w2 = np.random.rand(hidden_dim, output_dim) - 0.5
    output = logit.softmax(sloj1,w2)

    return output, sloj1, w1, w2

In [154]:
m,n = xtrain.shape
np.random.seed(42)
w1 = np.random.rand(xtrain.shape[1], 100) - 0.5

a1 = logit.sigmoid(xtrain,w1)

w2 = np.random.rand(100,10) - 0.5
a2 = logit.softmax(a1,w2)
L = - np.sum(ytrain * np.log(a2 + .0000001))

In [155]:
L

np.float64(4219.572898444925)

In [158]:
dL_dZ2 = ytrain - a2

In [161]:
dL_dZ2.shape

(1000, 10)

In [139]:
w1.shape, a1.shape, w2.shape, a2.shape

((784, 100), (1000, 100), (100, 10), (1000, 10))

In [137]:
(z1.T @ dz2) / m

array([[ 0.18760051, -0.06810877, -0.04074528, -0.07777493, -0.01641848,
        -0.02564077, -0.01749238, -0.03245849,  0.11478655, -0.02374795],
       [ 0.01987831, -0.00806623, -0.00328452, -0.00207363, -0.00186262,
        -0.00228448, -0.00274195, -0.01056964,  0.01425981, -0.00325506],
       [ 0.26694007, -0.084962  , -0.0235122 , -0.0924703 , -0.0383996 ,
        -0.06646079, -0.03111365, -0.0406729 ,  0.13246422, -0.02181286],
       [ 0.20009542, -0.06200154, -0.02232641, -0.05035108, -0.02024361,
        -0.05235028, -0.03378417, -0.01953606,  0.09823011, -0.03773239],
       [ 0.18577108, -0.06648755, -0.03566436, -0.04098614, -0.0221159 ,
        -0.02697471, -0.04529576, -0.04965895,  0.1059092 , -0.00449691],
       [ 0.23910192, -0.03663388, -0.05549821, -0.08336278, -0.03986654,
        -0.04784672, -0.04416994, -0.05159616,  0.14892828, -0.02905598],
       [ 0.35650733, -0.08082392, -0.05401201, -0.09994293, -0.05585031,
        -0.05696678, -0.0629698 , -0.07561974

In [117]:
dz2.shape

(1000, 10)

In [102]:
logit = Logit()
np.random.seed(42)

# 1. Inicijalizacija W1 (784 → 100)
w1 = np.random.rand(xtrain.shape[1], 100) - 0.5  # (784, 100)

# 2. Hidden layer (sigmoid aktivacija)
hidden = logit.sigmoid(xtrain, w1)  # (1000, 100)

# 3. Inicijalizacija W2 (100 → 10)
w2 = np.random.rand(100, 10) - 0.5  # (100, 10)

# 4. Output layer (softmax)
output = logit.softmax(hidden, w2)  # (1000, 10)

In [27]:
y

array([[0],
       [0],
       [0],
       ...,
       [0],
       [0],
       [0]])

In [19]:
datatrain[:,0].reshape(-1,1)

array([[8],
       [4],
       [8],
       ...,
       [9],
       [2],
       [7]])

In [35]:
np.random.seed(42)
ytrain = target(datatrain)
logit.w = np.random.rand(xtrain.shape[1],ytrain.shape[1]) - .5

In [36]:
logit.w.shape

(784, 10)

In [47]:
print(xtrain.shape, ytrain.shape)

np.random.seed(42)
logit.w = np.random.rand(784,100) - .5

y

(1000, 784) (1000, 10)


array([[5],
       [0],
       [4],
       ...,
       [4],
       [5],
       [6]])

In [48]:
logit.w.shape, xtrain.shape

((784, 100), (1000, 784))

In [51]:
logit.z = xtrain @ logit.w
logit.z.shape

(1000, 100)

In [53]:
logit.hidden = logit.sigmoid(xtrain)
logit.hidden.shape

(1000, 100)

In [64]:
np.random.seed(42)
logit.w = np.random.rand(100,10) - .5

In [65]:
logit.w.shape, logit.hidden.shape

((100, 10), (1000, 100))

In [66]:
logit.z2 = logit.hidden @logit.w
logit.z2.shape

(1000, 10)

In [67]:
logit.softmax(logit.z2)

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 100 is different from 10)

In [None]:
xtrain.shape