In [13]:
import numpy as np
from sklearn.datasets import fetch_openml
import joblib

x, y = fetch_openml('mnist_784', version=1, return_X_y=True, as_frame= False)

y = y.astype(int).reshape(-1,1)
y = np.where(y == 5, 1, 0)

y = y.reshape(-1,1)

data = np.hstack((y, x))

np.random.seed(42)
np.random.shuffle(data)

datatrain = data[:50000]
datadev = data[50000:60000]
datatest = data[60000:]

print(datatrain.shape, datadev.shape, datatest.shape)

def feature(df):
    return np.hstack((np.ones((df.shape[0],1)), df[:,1:] / 255))

def target(data):
    y = data[:,0].reshape(-1,1)
    return y

xtrain, xdev, xtest = tuple(feature(skup) for skup in [datatrain, datadev, datatest])

ytrain, ydev, ytest = tuple(target(skup) for skup in [datatrain, datadev, datatest])
print(xtrain.max())
xtrain.shape, xdev.shape, xtest.shape, ytrain.shape, ydev.shape, ytest.shape

(50000, 785) (10000, 785) (10000, 785)
1.0


((50000, 785), (10000, 785), (10000, 785), (50000, 1), (10000, 1), (10000, 1))

In [14]:
class Logit():
    
    def __init__(self, lr=0.1, regularizacija='', reg=0.01, maxIter=None, nIter = None):      
        '''Za regularizaciju uneti string 'l1' ili 'l2'
        reg je regularizacioni parametar lambda 
        learning rate: pocetna brzina ucenja (polovi se na svakih 1000 iteracija) '''
        
        self.lr = lr
        self.regularizacija = regularizacija
        self.reg = reg
        self.maxIter = maxIter
        self.preciznostTrain= None
        self.preciznostDev = None
        self.historyW = []

    def predict(self, x, y =None):
        """Racuna binarne predikcije za ulazne podatke x.
        Ako je prosleđen y, vraća dvojku (preciznost, predikcije)."""
        
        z = np.clip(x @ self.w, -500, 500)
        p = 1 / (1 + np.exp(-z))
        pred = (p > 0.5).astype(int)
        if y is not None:
            return (np.mean(pred == y.reshape(-1,1)), pred)
        else:
            return pred
    def predictProba(self,x):
        """Vraca predikcije verovatnoca za ulazne podatke x. """
        
        z = np.clip(x @ self.w, -500, 500)
        return 1 / (1 + np.exp(-z))
        
    def fit(self, x, y, xdev, ydev, randomState = 42):
        """Treniranje modela koristeći grupni gradijentni spust (batch gradient descent).
        Funkcija prati preciznost na trening i dev skupu.
        Svakih 100 iteracija se čuvaju trenutne težine u self.historyW.
        Ako preciznost na dev skupu opadne u odnosu na 
        4 evaluacije unazad, smatra se da je dostigao plato i
        model se vraca na težine iz te iteracije i vraca
        dvojku (preciznost na trening skupu, preciznost na dev skupu)."""
        
        m, n = x.shape
        np.random.seed(randomState)
        self.w = np.random.rand(n,1) - .5
        trainscore = []
        devscore = [0 for _ in range(4)]
        i = 0
        lr = self.lr
        epsilon=.0000001
        
        while True:
            z = np.clip(x @ self.w, -500, 500)
            pred = 1 / (1 + np.exp(-z))
            gradijenti = (x.T @ (pred - y.reshape(-1, 1))) / m
            
            
            if self.regularizacija.lower() == 'l1':
                l = -np.mean(y * np.log(pred + epsilon) + (1 - y) * np.log(1 - pred + epsilon)) + (self.reg / m) * np.sum(np.abs(self.w[1:]))
                gradijenti[1:] += (self.reg / m) * np.sign(self.w[1:])
            elif self.regularizacija.lower() == 'l2':
                l = -np.mean(y * np.log(pred + epsilon) + (1 - y) * np.log(1 - pred + epsilon)) + (self.reg / (2 * m)) * np.sum(np.square(self.w[1:]))
                gradijenti[1:] += (self.reg / m) * self.w[1:]
               
            else:
                l = -np.mean(y * np.log(pred + epsilon) + (1 - y) * np.log(1 - pred + epsilon))

            self.w -= lr * gradijenti
            grad_norm = np.linalg.norm(gradijenti)
            
            if i % 100 == 0:
                self.historyW.append(self.w.copy())
                preciznostTrain, _ = self.predict(x,y)
                trainscore.append(preciznostTrain)
                
                preciznostDev, _ = self.predict(xdev, ydev)
                devscore.append(preciznostDev)
            
            if i > 300 and (devscore[-1] - devscore[-5]) <= 0:
                print(f"Optimalni parametri su iz {i-300} iteracije")
                self.w = self.historyW[-4]
                break
            
            if self.maxIter is not None and i >= self.maxIter:
                print(f"Maksimalan broj iteracija ({self.maxIter}) dostignut.")
                break
            
            if i % 1000 == 0 and i > 0:
                lr *= 0.5
                print(f"Learning rate: {lr}, iteracija {i}")
                

            i += 1
        self.nIter = i - 300
        self.preciznostTrain, _ = self.predict(x,y)
        print("Preciznost na trening setu:", self.preciznostTrain)

        self.preciznostDev, _ = self.predict(xdev,ydev)
        print("Preciznost na dev setu:", self.preciznostDev)
        
        return self.preciznostTrain, self.preciznostDev

    
    def fitReg(self, x, y, xdev, ydev, listaRegularizacije):
        '''Fituje model za svaki parametar lambda iz liste, cuva rezultate na dev skupu
        na kraju fituje model sa lokalno optimalnim lambda parametrom i vraca recnik
        {lambda : rezultat na dev skupu}'''

        rezultati = []
        for i in range(len(listaRegularizacije)):
            self.reg = listaRegularizacije[i]
            _, devScore = self.fit(x, y, xdev, ydev)
            rezultati.append(devScore)
            
        self.w = self.historyW[np.argmax(rezultati)]
        self.reg = listaRegularizacije[np.argmax(rezultati)]
        self.fit(x, y, xdev, ydev)
        return {self.reg : rezultati[np.argmax(rezultati)]}

logit = Logit()

logit.fit(xtrain, ytrain, xdev, ydev)

joblib.dump(logit, 'logitNumpy.pkl')

logitL1 = Logit(regularizacija = 'l1')

regul = np.logspace(0,1.2,5)

logitL1.fitReg(xtrain, ytrain, xdev, ydev, listaRegularizacije = regul)

joblib.dump(logitL1, 'logitL1Numpy.pkl')

logitL2 = Logit(regularizacija = 'l2')

logitL2.fitReg(xtrain, ytrain, xdev, ydev, np.logspace(0,1.2,5))

joblib.dump(logitL2,'logitL2Numpy.pkl')

In [15]:
logit, logitL1, logitL2 = joblib.load('logitNumpy.pkl'), joblib.load('logitL1Numpy.pkl'), joblib.load('logitL2Numpy.pkl')

In [16]:
print(logit.nIter, logit.preciznostDev)

2100 0.9628


In [17]:
print(logitL1.nIter, logitL1.preciznostDev, logitL1.regularizacija, logitL1.reg)

3000 0.9634 l1 15.848931924611133


In [18]:
print(logitL2.nIter, logitL2.preciznostDev, logitL2.regularizacija, logitL2.reg)

2100 0.9629 l2 3.9810717055349722


In [19]:
print(logit.predict(xdev, ydev)[0],logitL1.predict(xdev, ydev)[0],logitL2.predict(xdev, ydev)[0])

0.9628 0.9634 0.9629


In [20]:
print(logit.predict(xtest, ytest)[0],logitL1.predict(xtest, ytest)[0],logitL2.predict(xtest, ytest)[0])

0.9626 0.9635 0.9625


In [21]:
logit.nIter, logitL1.nIter, logitL2.nIter

(2100, 3000, 2100)

In [22]:
print(np.__version__)

2.2.3


In [23]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [24]:
model = LogisticRegression(penalty = None,max_iter=5000, random_state= 42)
model.fit(xtrain, ytrain.reshape(-1,))
modelL1 = LogisticRegression (solver="liblinear",penalty = 'l1',max_iter=5000)
modelL1.fit(xtrain, ytrain.reshape(-1,))
modelL2 = LogisticRegression(penalty = 'l2',max_iter=5000, random_state= 42)
modelL2.fit(xtrain, ytrain.reshape(-1,))
ypredSklearn = model.predict(xtrain)
ypredSklearnL1 = modelL1.predict(xtrain)
ypredSklearnL2 = modelL2.predict(xtrain)

print(accuracy_score(ypredSklearn, ytrain), accuracy_score(ypredSklearnL1, ytrain), accuracy_score(ypredSklearnL2,ytrain) )
print(logit.preciznostTrain,logitL1.preciznostTrain,logitL2.preciznostTrain)



0.9782 0.97816 0.97754
0.96416 0.96508 0.96412


In [25]:
ypredSklearnDev = model.predict(xdev)
ypredSklearnDevL1 = modelL1.predict(xdev)
ypredSklearnDevL2 = modelL2.predict(xdev)
print(accuracy_score(ypredSklearnDev, ydev), accuracy_score(ypredSklearnDevL1, ydev), accuracy_score(ypredSklearnDevL2, ydev))
print(logit.preciznostDev,logitL1.preciznostDev,logitL2.preciznostDev)

0.9731 0.9732 0.9732
0.9628 0.9634 0.9629


In [27]:
ypredSklearnTest = model.predict(xtest)
ypredSklearnTestL1 = modelL1.predict(xtest)
ypredSklearnTestL2 = modelL2.predict(xtest)
print(accuracy_score(ypredSklearnTest, ytest),accuracy_score(ypredSklearnTestL1, ytest),accuracy_score(ypredSklearnTestL2, ytest))
print(logit.predict(xtest,ytest)[0],logitL1.predict(xtest, ytest)[0], logitL2.predict(xtest, ytest)[0])

0.9747 0.9752 0.9735
0.9626 0.9635 0.9625
