In [None]:
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
from numpy import linalg as la

sti_til_metadata = '../Frukter/filnavn_og_klasser.csv'
bildemappe = '../Frukter/bilder/'
df_bildedata = pd.read_csv(sti_til_metadata)

banan_df = df_bildedata.loc[df_bildedata['klasse']==3.0]
appelsin_df = df_bildedata.loc[df_bildedata['klasse']==7.0]

terskel = 240

def generer_maske_via_bin_segmentering(bildesti, terskel=240):
    graaskalabilde = Image.open(bildesti).convert('L')
    graaskalabilde_np = np.array(graaskalabilde)
    binaert_bilde = ~(graaskalabilde_np > terskel)
    return np.asarray(np.nonzero(binaert_bilde))

def finn_singulerverdier(koordinater_maske, normaliser=True):
    _, S, _ = la.svd(koordinater_maske-koordinater_maske.mean(axis=1)[:, np.newaxis], full_matrices=False)
    if normaliser:
        S = S / np.sqrt(len(koordinater_maske[0, :])-1)
    return S


X_b = np.zeros((banan_df.shape[0], 2))
for i in range(banan_df.shape[0]):
    bildesti_banan = bildemappe + banan_df.iloc[i, 0]
    koordinater_maske_banan = generer_maske_via_bin_segmentering(bildesti_banan, terskel)
    S = finn_singulerverdier(koordinater_maske_banan, normaliser=True)
    X_b[i, :] = S

X_a = np.zeros((appelsin_df.shape[0], 2))
for i in range(appelsin_df.shape[0]):
    bildesti_appelsin = bildemappe + appelsin_df.iloc[i, 0]
    koordinater_maske_appelsin = generer_maske_via_bin_segmentering(bildesti_appelsin, terskel)
    S = finn_singulerverdier(koordinater_maske_appelsin, normaliser=True)
    X_a[i, :] = S
    
    
for i in range(X_b.shape[0]):
    X_b[i, :] = X_b[i, :]/np.sum(X_b[i, :])
    
for i in range(X_a.shape[0]):
    X_a[i, :] = X_a[i, :]/np.sum(X_a[i, :])
    
    
sing_val = 0

trening_test_ratio = 0.7

n_b = int(banan_df.shape[0]*trening_test_ratio)
n_a = int(appelsin_df.shape[0]*trening_test_ratio)

X_train = np.vstack((X_a[:n_a], X_b[:n_b]))[:, sing_val]
X_test = np.vstack((X_a[n_a:], X_b[n_b:]))[:, sing_val]

y_train = np.hstack((np.ones(X_a[:n_a].shape[0]), np.zeros(X_b[:n_b].shape[0])))
y_test = np.hstack((np.ones(X_a[n_a:].shape[0]), np.zeros(X_b[n_b:].shape[0])))


training_data = {'filnavn': np.hstack((appelsin_df.iloc[:n_a]['filnavn'].values, banan_df.iloc[:n_b]['filnavn'].values)),
                 'egenskap': X_train,
                 'klasse': y_train}
test_data = {'filnavn': np.hstack((appelsin_df.iloc[n_a:]['filnavn'].values, banan_df.iloc[n_b:]['filnavn'].values)),
                 'egenskap': X_test,
                 'klasse': y_test}

df_train = pd.DataFrame(training_data)
df_test = pd.DataFrame(test_data)

df_train.to_csv('../oppgave1_trening.csv',index=False)
df_test.to_csv('../oppgave1_test.csv',index=False)

class klassifiseringsmodell:
    def __init__(self, random_state=1):
        self.rgen = np.random.RandomState(random_state)
        self.w = self.rgen.normal(loc=0.0, scale=0.01, size=1)
        self.b = self.rgen.normal(loc=0.0, scale=0.01, size=1)
        self.errors = []
        self.x_min = 0
        self.x_max = 1
        
    def lin_alg(self, X):
        X_bar = (X-self.x_min)/(self.x_max - self.x_min)
        return np.dot(X_bar, self.w) + self.b
    
    def sigmoid(self, z):
        z = np.clip(z, -100, 100)
        return 1.0 / (1.0 + np.exp(-z))
    
    def predict_num(self, X):
        return self.sigmoid(self.lin_alg(X))
    
    def predict(self, X):
        return np.round(self.predict_num(X))
    
    def tapsfunksjon(self, y, y_hat):
        return (y - y_hat) ** 2
    
    def gradient(self, xi, y, y_hat):
        gradC_w = -2*y_hat*(1-y_hat)*(y - y_hat)*xi
        gradC_b = -2*y_hat*(1-y_hat)*(y - y_hat)
        return gradC_w, gradC_b
    
    def gradientnedstigning(self, X, y, eta=0.01, n_epoker=50):
        for _ in range(n_epoker):
            cost = 0
            gradC_w = 0
            gradC_b = 0
            for xi, yi in zip(X, y):
                y_hat = self.predict_num(xi)
                grad = self.gradient(xi, yi, y_hat)
                gradC_w += grad[0]
                gradC_b += grad[1]
                cost += self.tapsfunksjon(yi, y_hat)
            self.w += -eta*gradC_w
            self.b += -eta*gradC_b
            self.errors_.append(cost / len(y))
        return self
    
    def stokastisk_gradientnedstigning(self, X, y, eta=0.01, n_epoker=50):
        for _ in range(n_epoker):
            cost = 0
            r = self.rgen.permutation(len(y))
            X = X[r]
            y = y[r]
            for xi, yi in zip(X, y):
                y_hat = self.predict_num(xi)
                grad = self.gradient(xi, yi, y_hat)
                self.w += -eta*grad[0]
                self.b += -eta*grad[1]
                cost += self.tapsfunksjon(yi, y_hat)
            self.errors_.append(cost / len(y))
        return self
    
    def fit(self, X, y, alg='GD', eta=0.01, n_epoker=50):
        self.errors_ = []
        match alg:
            case 'GD':
                self.gradientnedstigning(X, y, eta, n_epoker)
            case 'SGD':
                self.stokastisk_gradientnedstigning(X, y, eta, n_epoker)
        return self   
    
    
