In [1]:
# Please note that this code needs only to be run in a fresh runtime.
# However, it can be rerun afterwards too.
!pip install -q gdown httpimport

In [2]:
# Standard IPython notebook imports
import itertools
import io
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.optimize as sopt
import scipy.stats as sstats
import seaborn as sns
import sklearn.ensemble
import sklearn.tree
from sklearn import datasets
from tqdm.auto import tqdm

import httpimport

# In this way we can import functions straight from github
with httpimport.github_repo(
    "janchorowski", "nn_assignments", module="common", branch="nn18"
):
    from common.gradients import check_gradient
    from common.plotting import plot_mat

sns.set_style("whitegrid")

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [14]:
train_df = pd.read_csv("/content/drive/MyDrive/petfinder-pawpularity-score/train.csv")
train_df["C"] = np.ones(9912)
test_df = pd.read_csv("/content/drive/MyDrive/petfinder-pawpularity-score/test.csv")
test_df["Pawpularity"] = pd.read_csv("/content/drive/MyDrive/petfinder-pawpularity-score/sample_submission.csv")["Pawpularity"]
test_df["C"] = np.ones(8)
test_df

Unnamed: 0,Id,Subject Focus,Eyes,Face,Near,Action,Accessory,Group,Collage,Human,Occlusion,Info,Blur,Pawpularity,C
0,4128bae22183829d2b5fea10effdb0c3,1,0,1,0,0,1,1,0,0,1,0,1,67.75,1.0
1,43a2262d7738e3d420d453815151079e,0,1,0,0,0,0,1,1,0,0,0,0,59.15,1.0
2,4e429cead1848a298432a0acad014c9d,0,0,0,1,0,1,1,1,0,1,1,1,20.02,1.0
3,80bc3ccafcc51b66303c2c263aa38486,1,0,1,0,0,0,0,0,0,0,1,0,94.53,1.0
4,8f49844c382931444e68dffbe20228f4,1,1,1,0,1,1,0,1,0,1,1,0,89.82,1.0
5,b03f7041962238a7c9d6537e22f9b017,0,0,1,1,1,1,1,1,1,0,1,0,65.5,1.0
6,c978013571258ed6d4637f6e8cc9d6a3,1,0,0,0,1,1,0,1,0,1,1,1,71.42,1.0
7,e0de453c1bffc20c22b072b34b54e50f,1,0,1,0,0,0,0,0,1,0,0,1,5.85,1.0


In [5]:
def MSE(preds, test_Y):
    return(np.sqrt(np.mean((preds - test_Y)**2)))  #funkcja licząca wielkość błędu

KNN:

In [6]:
def KNN(train_X, train_Y, test_X, ks, verbose=False):
    """
    Compute predictions for various k
    Args:
        train_X: array of shape Ntrain x D
        train_Y: array of shape Ntrain
        test_X: array of shape Ntest x D
        ks: list of integers
    Returns:
        preds: dict k: predictions for k
    """
    # Cats data to float32
    train_X = train_X.astype(np.float32)
    test_X = test_X.astype(np.float32)

    # Alloc space for results
    preds = {}

    if verbose:
        print("Computing distances... ", end='')
    #
    # TODO: fill in an efficient distance matrix computation
    #    
    #dists = np.linalg.norm((train_X[:, None] - test_X), axis = -1)
    dists = np.sqrt((train_X**2).sum(1)[:,None] + (test_X**2).sum(1) - 2*train_X.dot(test_X.T))

    if verbose:
        print("Sorting... ", end='')
    
    # TODO: findes closest trainig points
    # Hint: use argsort
    closest = np.argsort(dists, axis = 0)

    if verbose:
        print("Computing predictions...", end='')
    
    targets = train_Y[closest]

    for k in ks:
        predictions = sstats.mode(targets[:k])
        predictions = predictions[0].ravel()
        preds[k] = predictions
    if verbose:
        print("Done")
    return preds

In [44]:
train_x = np.array(train_df[['Subject Focus', 'Eyes', "Face",	"Near",	"Action", "Accessory", "Group", "Collage", "Human", "Occlusion", "Info", "Blur", "C"]])
train_y = np.array(train_df["Pawpularity"])

test_x = np.array(test_df[['Subject Focus', 'Eyes', "Face",	"Near",	"Action", "Accessory", "Group", "Collage", "Human", "Occlusion", "Info", "Blur", "C"]])
test_y = np.array(test_df["Pawpularity"])

preds_KNN = KNN(train_x, train_y, test_x, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19])

print(MSE(preds_KNN[1], test_y))
print(MSE(preds_KNN[3], test_y))
print(MSE(preds_KNN[5], test_y))
print(MSE(preds_KNN[7], test_y))
print(MSE(preds_KNN[9], test_y))
print(MSE(preds_KNN[11], test_y))
print(MSE(preds_KNN[13], test_y))
print(MSE(preds_KNN[15], test_y))
print(MSE(preds_KNN[17], test_y))
print(MSE(preds_KNN[19], test_y))

M = np.mean(train_y)

print(MSE(M, test_y)) #testujemy jaki byłby błąd gdybyśmy olali features i po prostu używali średniej wartości pawpularity

33.940402914520625
45.01586886865564
45.19832353085676
49.19055752885914
48.832094466651746
49.911656454179116
59.36169598318431
45.772081556337376
45.772081556337376
45.98894921608886
36.10768937034517


KNN cosine similarity:

In [10]:
def KNN_cos(train_X, train_Y, test_X, ks, verbose=False):
    """
    Compute predictions for various k
    Args:
        train_X: array of shape Ntrain x D
        train_Y: array of shape Ntrain
        test_X: array of shape Ntest x D
        ks: list of integers
    Returns:
        preds: dict k: predictions for k
    """
    # Cats data to float32
    train_X = train_X.astype(np.float32)
    test_X = test_X.astype(np.float32)

    # Alloc space for results
    preds = {}

    if verbose:
        print("Computing distances... ", end='')
    
    A_mA = train_X - train_X.mean(1)[:, None]
    B_mB = test_X - test_X.mean(1)[:, None]
    ssA = (A_mA**2).sum(1)
    ssB = (B_mB**2).sum(1)
    sims = np.dot(A_mA, B_mB.T) / np.sqrt(np.dot(ssA[:, None],ssB[None]))

    if verbose:
        print("Sorting... ", end='')
    
    # TODO: findes closest trainig points
    # Hint: use argsort
    closest = np.argsort(-sims, axis = 0)

    if verbose:
        print("Computing predictions...", end='')
    
    targets = train_Y[closest]

    for k in ks:
        predictions = sstats.mode(targets[:k])
        predictions = predictions[0].ravel()
        preds[k] = predictions
    if verbose:
        print("Done")
    return preds

In [16]:
preds_KNN = KNN_cos(train_x, train_y, test_x,[1, 3, 5, 7, 9, 11, 13, 15, 17, 19])
print(MSE(preds_KNN[1], test_y))
print(MSE(preds_KNN[3], test_y))
print(MSE(preds_KNN[5], test_y))
print(MSE(preds_KNN[7], test_y))
print(MSE(preds_KNN[9], test_y))
print(MSE(preds_KNN[11], test_y))
print(MSE(preds_KNN[13], test_y))
print(MSE(preds_KNN[15], test_y))
print(MSE(preds_KNN[17], test_y))
print(MSE(preds_KNN[19], test_y))

39.50194235730694
48.618679023601615
49.36244473281282
55.40506249432447
55.255279838219984
55.255279838219984
47.994228298827764
46.958635521062575
45.997591784788035
45.997591784788035


logistic regression

In [85]:
def logreg_loss(Theta, X, Y):

    ThetaR = Theta.reshape(X.shape[1], 1)

    a = X @ ThetaR
    sigma = 1 / (1 + np.exp(-a))

    nll = -sum(Y * np.log(sigma) + (1 - Y) * np.log(1 - sigma))
    grad = X.T @ (sigma - Y)

    return nll, grad.reshape(Theta.shape)

train_y_logreg = np.zeros(train_y.shape)
train_y_logreg[train_y > 33] = 1   #klasyfikujemy jako 1 jeśli pawpularity jest duże, i jako 0 jeśli jest małe
train_y_logreg = train_y_logreg.reshape(-1,1)

Theta0 = np.ones((13,))
ThetaOpt = sopt.fmin_l_bfgs_b(
    lambda Theta: logreg_loss(Theta, train_x, train_y_logreg), np.array(Theta0)
)[0]
ThetaOpt


array([-0.06875703, -0.19080116,  0.28091435,  0.03097823, -0.05782879,
        0.06751073,  0.2590542 , -0.05887729,  0.00855425,  0.145315  ,
        0.03942742, -0.2626748 , -0.20935007])

In [86]:
h = test_x @ ThetaOpt #jeśli wynik większy bądź równy 0 to klasyfikujemy jako 1
h

array([ 0.21201238, -0.19997433,  0.01138342,  0.04223467, -0.05244684,
        0.36038303, -0.40523483, -0.2513133 ])

In [88]:
preds_logreg = np.full(test_y.shape, 20)
preds_logreg[h >= 0] = 40  #z tymi wartościami (20, 40) można pokombinować
preds_logreg

array([40, 20, 40, 40, 20, 40, 20, 20])

In [89]:
print(MSE(preds_logreg, test_y))

41.89992183763593


General Robust loss function

In [90]:
def rho(x, alpha, c): #the general robust loss function
    if alpha == 0 or alpha == 2:
        alpha = alpha + 0.000000001
    return(np.abs(alpha-2) / alpha * (((x/c)**2 / np.abs(alpha-2) + 1)**(alpha/2) - 1))

def d_rho(x, alpha, c): #and its derivative
    if alpha == 0 or alpha == 2:
        alpha = alpha + 0.000000001
    return(x / c**2 * ((x/c)**2 / np.abs(alpha-2) + 1)**(alpha/2 - 1))

In [98]:
def linreg_general_loss(Theta, X, Y, alpha, c):
    ThetaR = np.array(Theta)
    err = (Y - X @ ThetaR)
    loss = np.mean(rho(err, alpha, c))

    grad_err = d_rho(err, alpha, c)
    grad = np.array([np.mean(-grad_err * X[:, i]) for i in range(Theta.shape[0])])
    return loss, grad.reshape(Theta.shape)

In [96]:
train_y

array([63, 42, 28, ..., 20, 20, 30])

In [101]:
Theta0 = np.ones((13,))



for alpha in [-3, -2,-1, 0, 1, 2, 3, 4]:
    for c in [1.]:
        ThetaOpt, _, dic = sopt.fmin_l_bfgs_b(
            lambda Theta: linreg_general_loss(Theta, train_x, train_y, alpha=alpha, c=c), np.array(Theta0)
        )
        pred = test_x @ ThetaOpt
        print(MSE(pred, test_y))


62.74256400711534
40.95203363923424
39.86751045563913
40.61718324999028
38.31332798394285
36.38205652401728
34.503142357944526
33.086588687390154
