In [715]:
import numpy as np
from sklearn.datasets import fetch_openml
import joblib

x, y = fetch_openml('mnist_784', version=1, return_X_y=True, as_frame= False)

y = y.astype(int).reshape(-1,1)
y = np.where(y == 5, 1, 0)

y = y.reshape(-1,1)

data = np.hstack((y, x))

np.random.seed(42)
np.random.shuffle(data)

datatrain = data[:50000]
datadev = data[50000:60000]
datatest = data[60000:]

print(datatrain.shape, datadev.shape, datatest.shape)

def feature(df):
    return np.hstack((np.ones((df.shape[0],1)), df[:,1:] / 255))

def target(data):
    y = data[:,0].reshape(-1,1)
    return y

xtrain, xdev, xtest = tuple(feature(skup) for skup in [datatrain, datadev, datatest])

ytrain, ydev, ytest = tuple(target(skup) for skup in [datatrain, datadev, datatest])
print(xtrain.max())
xtrain.shape, xdev.shape, xtest.shape, ytrain.shape, ydev.shape, ytest.shape

(50000, 785) (10000, 785) (10000, 785)


((50000, 785), (10000, 785), (10000, 785), (50000, 1), (10000, 1), (10000, 1))

In [831]:
class Logit():
    
    def __init__(self, lr=0.1, regularizacija='', reg=0.01, maxIter=None, nIter = None):      
        '''Za regularizaciju uneti string 'l1' ili 'l2'
        reg je regularizacioni parametar lambda 
        learning: pocetna brzina ucenja (polovi se na svakih 1000 iteracija) '''
        
        self.lr = lr
        self.regularizacija = regularizacija
        self.reg = reg
        self.maxIter = maxIter
        self.preciznostTrain= None
        self.preciznostDev = None
        self.historyW = []

    def predict(self, x, y =None):
        """Racuna binarne predikcije za ulazne podatke x.
        Ako je prosleđen y, vraća dvojku (preciznost, predikcije)."""
        
        z = np.clip(x @ self.w, -500, 500)
        p = 1 / (1 + np.exp(-z))
        pred = (p > 0.5).astype(int)
        if y is not None:
            return (np.mean(pred == y.reshape(-1,1)), pred)
        else:
            return pred
    def predictProba(self,x):
        """Vraca predikcije verovatnoca za ulazne podatke x. """
        
        z = np.clip(x @ self.w, -500, 500)
        return 1 / (1 + np.exp(-z))
        
    def fit(self, x, y, xdev, ydev, randomState = 42):
        """Treniranje modela koristeći grupni gradijentni spust (batch gradient descent).
        Funkcija prati preciznost na trening i dev skupu.
        Svakih 100 iteracija se čuvaju trenutne težine u self.historyW.
        Ako preciznost na dev skupu opadne u odnosu na 
        4 evaluacije unazad, smatra se da je dostigao plato i
        model se vraca na težine iz te iteracije i vraca
        dvojku (preciznost na trening skupu, preciznost na dev skupu)."""
        
        m, n = x.shape
        np.random.seed(randomState)
        self.w = np.random.rand(n,1) - .5
        trainscore = []
        devscore = [0 for _ in range(4)]
        i = 0
        lr = self.lr
        epsilon=.0000001
        
        while True:
            z = np.clip(x @ self.w, -500, 500)
            pred = 1 / (1 + np.exp(-z))
            gradijenti = (x.T @ (pred - y.reshape(-1, 1))) / m
            
            
            if self.regularizacija.lower() == 'l1':
                l = -np.mean(y * np.log(pred + epsilon) + (1 - y) * np.log(1 - pred + epsilon)) + (self.reg / m) * np.sum(np.abs(self.w[1:]))
                gradijenti[1:] += (self.reg / m) * np.sign(self.w[1:])
            elif self.regularizacija.lower() == 'l2':
                l = -np.mean(y * np.log(pred + epsilon) + (1 - y) * np.log(1 - pred + epsilon)) + (self.reg / (2 * m)) * np.sum(np.square(self.w[1:]))
                gradijenti[1:] += (self.reg / m) * self.w[1:]
               
            else:
                l = -np.mean(y * np.log(pred + epsilon) + (1 - y) * np.log(1 - pred + epsilon))

            self.w -= lr * gradijenti
            grad_norm = np.linalg.norm(gradijenti)
            
            if i % 100 == 0:
                self.historyW.append(self.w.copy())
                preciznostTrain, _ = self.predict(x,y)
                trainscore.append(preciznostTrain)
                
                preciznostDev, _ = self.predict(xdev, ydev)
                devscore.append(preciznostDev)
            
            if i > 300 and (devscore[-1] - devscore[-5]) <= 0:
                print(f"Optimalni parametri su iz {i-300} iteracije")
                self.w = self.historyW[-4]
                break
            
            if self.maxIter is not None and i >= self.maxIter:
                print(f"Maksimalan broj iteracija ({self.maxIter}) dostignut.")
                break
            
            if i % 1000 == 0 and i > 0:
                lr *= 0.5
                print(f"Learning rate: {lr}, iteracija {i}")
                

            i += 1
        self.nIter = i
        self.preciznostTrain, _ = self.predict(x,y)
        print("Preciznost na trening setu:", self.preciznostTrain)

        self.preciznostDev, _ = self.predict(xdev,ydev)
        print("Preciznost na dev setu:", self.preciznostDev)
        
        return self.preciznostTrain, self.preciznostDev

    
    def fitReg(self, x, y, xdev, ydev, listaRegularizacije):
        '''Radi grid search za regularizacioni parametar lambda iz liste
        Vraca recnik lokalno optimalnih parametar a{lambda : rezultat na dev skupu} '''
        
        rezultati = []
        for i in range(len(listaRegularizacije)):
            self.reg = listaRegularizacije[i]
            _, devScore = self.fit(x, y, xdev, ydev)
            rezultati.append(devScore)
            
        self.w = self.historyW[np.argmax(rezultati)]
        self.reg = listaRegularizacije[np.argmax(rezultati)]
        return {self.reg : rezultati[np.argmax(rezultati)]}

In [763]:
logit = Logit()

In [765]:
logit.fit(xtrain, ytrain, xdev, ydev)

Learning rate: 0.05, iteracija 1000
Learning rate: 0.025, iteracija 2000
Optimalni parametri su iz 2100 iteracije
Preciznost na trening setu: 0.96416
Preciznost na dev setu: 0.9628


(0.96416, 0.9628)

In [787]:
print(logit.nIter)

2400


In [791]:
joblib.dump(logit, 'logitNumpy.pkl')

['logitNumpy.pkl']

In [793]:
joblib.load('logitNumpy.pkl')

<__main__.Logit at 0x1fe91853d70>

In [860]:
logitL1 = Logit(regularizacija = 'l1')

In [862]:
regul = np.logspace(-.5,1.2,5)

In [864]:
logitL1.fitReg(xtrain, ytrain, xdev, ydev, listaRegularizacije = regul)

Learning rate: 0.05, iteracija 1000
Learning rate: 0.025, iteracija 2000
Optimalni parametri su iz 2100 iteracije
Preciznost na trening setu: 0.96414
Preciznost na dev setu: 0.9629
Learning rate: 0.05, iteracija 1000
Learning rate: 0.025, iteracija 2000
Optimalni parametri su iz 2100 iteracije
Preciznost na trening setu: 0.96418
Preciznost na dev setu: 0.9629
Learning rate: 0.05, iteracija 1000
Learning rate: 0.025, iteracija 2000
Optimalni parametri su iz 2100 iteracije
Preciznost na trening setu: 0.96418
Preciznost na dev setu: 0.9629
Learning rate: 0.05, iteracija 1000
Learning rate: 0.025, iteracija 2000
Optimalni parametri su iz 2600 iteracije
Preciznost na trening setu: 0.9648
Preciznost na dev setu: 0.9632
Learning rate: 0.05, iteracija 1000
Learning rate: 0.025, iteracija 2000
Learning rate: 0.0125, iteracija 3000
Optimalni parametri su iz 3000 iteracije
Preciznost na trening setu: 0.96508
Preciznost na dev setu: 0.9634


{15.848931924611133: 0.9634}

In [None]:
joblib.dump(logitL1, 'logitL1Numpy.pkl')

In [866]:
logitL2 = Logit(regularizacija = 'l2')

In [867]:
np.logspace(-.5,1.2,5)

array([ 0.316228,  0.841395,  2.238721,  5.956621, 15.848932])

In [None]:
logitL2.fitReg(xtrain, ytrain, xdev, ydev, np.logspace(-.5,1.2,5))

Learning rate: 0.05, iteracija 1000
Learning rate: 0.025, iteracija 2000
Optimalni parametri su iz 2100 iteracije
Preciznost na trening setu: 0.96414
Preciznost na dev setu: 0.9628
Learning rate: 0.05, iteracija 1000


In [None]:
joblib.dump(logitL2,'logit2Numpy.pkl')

In [None]:
print(logit.predict(xdev, ydev)[0],logitL1.predict(xdev, ydev)[0],logitL2.predict(xdev, ydev)[0])

In [None]:
print(logit.predict(xtest, ytest)[0],logitL1.predict(xtest, ytest)[0],logitL2.predict(xtest, ytest)[0])

In [None]:
logit.nIter, logitL1.nIter, logitL2.nIter

In [None]:
model.preciznostTrain, model.preciznostDev

In [23]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [55]:
model = LogisticRegression(penalty = None,max_iter=5000)
model.fit(xtrain, ytrain.reshape(-1,))

ypredSklearn = model.predict(xtrain)

print(accuracy_score(ypredSklearn, ytrain))
print(logit.predict(xtrain, ytrain)[0],logitL1.predict(xtrain, ytrain)[0],logitL2.predict(xtrain, ytrain)[0])


ypredSklearn.shape, pred.shape

1668
0.97822
0.95486


((50000,), (50000, 1))

In [None]:
print(accuracy_score(ypredSklearn, ytrain))

In [25]:
print(accuracy_score(model.predict(xdev), ydev))

0.9752


`SA L1 REGULARIZACIJOM`

In [272]:
np.random.seed(42)
wL1 = np.random.rand(n,1) - .5

z = xtrain @ wL1
pred = 1 / (1 + np.exp(-z))
print(pred[:10])

reg = 0.01

[[0.377692]
 [0.345481]
 [0.045587]
 [0.225315]
 [0.251538]
 [0.185555]
 [0.04456 ]
 [0.590041]
 [0.683375]
 [0.449965]]


In [274]:
z = xtrain @ wL1
pred = 1 / (1 + np.exp(-z))
l = -(np.mean(ytrain * np.log(pred+.00000001) + (1 - ytrain) * np.log(1 - pred+.00000001))) + (reg / (2 * m)) * np.sum(np.square(wL1[1:]))
print(l)
gradijenti = (xtrain.T @ (pred - ytrain)) / m
gradijenti[1:] += (reg / m) * wL1[1:]
wL1 -= lr * gradijenti
z = xtrain @ wL1
pred = 1 / (1 + np.exp(-z))
l = -(np.mean(ytrain * np.log(pred+.00000001) + (1 - ytrain) * np.log(1 - pred+.00000001))) + (reg / (2 * m)) * np.sum(np.square(wL1[1:]))
print(l)

0.7199605642678382
0.693246248758746


`SA L2 REGULARIZACIJOM`

In [234]:
np.random.seed(42)
wL2 = np.random.rand(n,1) - .5

z = xtrain @ wL2
pred = 1 / (1 + np.exp(-z))
print(pred[:10])

reg = 0.01

[[0.377692]
 [0.345481]
 [0.045587]
 [0.225315]
 [0.251538]
 [0.185555]
 [0.04456 ]
 [0.590041]
 [0.683375]
 [0.449965]]


In [236]:
z = xtrain @ wL2
pred = 1 / (1 + np.exp(-z))
l = -(np.mean(ytrain * np.log(pred+.00000001) + (1 - ytrain) * np.log(1 - pred+.00000001))) + (reg / (2 * m)) * np.sum(np.square(wL2[1:]))
print(l)
gradijenti = (xtrain.T @ (pred - ytrain)) / m
gradijenti[1:] += (reg / m) * wL2[1:]
wL2 -= lr * gradijenti
z = xtrain @ wL2
pred = 1 / (1 + np.exp(-z))
l = -(np.mean(ytrain * np.log(pred+.00000001) + (1 - ytrain) * np.log(1 - pred+.00000001))) + (reg / (2 * m)) * np.sum(np.square(wL2[1:]))
print(l)

0.7199605642678382
0.693246248758746


In [238]:
epsilon = .00000001

In [240]:
for i in range (5000):
    lr = 0.01
    z = np.clip(xtrain @ wL2, -500, 500)
    pred = 1 / (1 + np.exp(-z))
    lreg = -(np.mean(ytrain * np.log(pred + epsilon) + (1 - ytrain) * np.log(1 - pred + epsilon))) + (reg / (2 * m)) * np.sum(np.square(wL2[1:]))
    l = -(np.mean(ytrain * np.log(pred + epsilon) + (1 - ytrain) * np.log(1 - pred + epsilon)))
    gradijenti = (xtrain.T @ (pred - ytrain)) / m
    gradijenti[1:] += (reg / m) * wL2[1:]
    wL2 -= lr * gradijenti
    l2grad = np.linalg.norm(gradijenti)
    if i % 500 == 0:
        lr = lr *.5

    if i % 100 == 0:
        print(f"Iteracija {i}, loss: {lreg}, bez regulacija {l}, velicina gradijenta{l2grad}")

    if l2grad < 0.005:
        break

Iteracija 0, loss: 0.693246248758746, bez regulacija 0.6932394927805211, velicina gradijenta1.5678472463373083
Iteracija 100, loss: 0.3715283624719221, bez regulacija 0.37152163323677806, velicina gradijenta0.22202711087051855
Iteracija 200, loss: 0.3315630025942949, bez regulacija 0.3315562950442293, velicina gradijenta0.18340975055768716
Iteracija 300, loss: 0.3022317064889122, bez regulacija 0.3022250147781215, velicina gradijenta0.15970628872153794
Iteracija 400, loss: 0.27967621337138837, bez regulacija 0.2796695328617582, velicina gradijenta0.1411211785335187
Iteracija 500, loss: 0.2618293777510655, bez regulacija 0.2618227050988574, velicina gradijenta0.12640736488019033
Iteracija 600, loss: 0.24734376759698307, bez regulacija 0.24733710035600923, velicina gradijenta0.11455557844955358
Iteracija 700, loss: 0.23533252181008382, bez regulacija 0.23532585813965953, velicina gradijenta0.10481776875003264
Iteracija 800, loss: 0.22519721640933715, bez regulacija 0.22519055488871156, v

In [263]:
z = xtrain @ wL2
pred = 1 / (1 + np.exp(-z))
predtrain = (pred > 0.5).astype(int)
preciznost = np.mean(predtrain == ytrain)
print("Accuracy na trening skupu sa L2 regularizacijom:", preciznost)
zdev = np.clip(xdev @ wL2, -500, 500)
pred = 1 / (1 + np.exp(-zdev))
predicted_classes = (pred >= 0.5).astype(int)

# Računaj tačnost poređenjem predviđenih klasa sa stvarnim oznakama
accuracy = np.mean(predicted_classes == ydev)
print("Accuracy na dev skupu sa L2 regularizacijom:", accuracy)

Accuracy na trening skupu sa L2 regularizacijom: 0.95484
Accuracy na dev skupu sa L2 regularizacijom: 0.9568


In [251]:
model = LogisticRegression(penalty = 'l2',max_iter=5000)
model.fit(xtrain, ytrain.reshape(-1,))

ypredSklearn = model.predict(xtrain)

print(abs(ypredSklearn - predtrain.reshape(-1,)).sum())

print(accuracy_score(ypredSklearn, ytrain))

print(accuracy_score(predtrain, ytrain))

ypredSklearn.shape, pred.shape

1619
0.97754
0.95484


((50000,), (10000, 1))

In [259]:
model = LogisticRegression (solver="liblinear",penalty = 'l2',max_iter=5000)
model.fit(xtrain, ytrain.reshape(-1,))

ypredSklearn = model.predict(xtrain)

print(abs(ypredSklearn - predtrain.reshape(-1,)).sum())

print(accuracy_score(ypredSklearn, ytrain))

print(accuracy_score(predtrain, ytrain))

ypredSklearn.shape, pred.shape

1609
0.9777
0.95484


((50000,), (10000, 1))

In [260]:
model = LogisticRegression (solver="liblinear",penalty = 'l1',max_iter=5000)
model.fit(xtrain, ytrain.reshape(-1,))

ypredSklearn = model.predict(xtrain)

print(abs(ypredSklearn - predtrain.reshape(-1,)).sum())

print(accuracy_score(ypredSklearn, ytrain))

print(accuracy_score(predtrain, ytrain))

ypredSklearn.shape, pred.shape

1633
0.97818
0.95484


((50000,), (10000, 1))