# PART I

## Load and Preprocess Data

In [None]:
import numpy as np
import pandas as pd
from IA2_skeleton_code import normalize

import matplotlib.pyplot as plt

df_train = pd.read_csv("IA2-train.csv")
df_val = pd.read_csv("IA2-dev.csv")

# Preprocess
numerical_feas = ["Age", "Annual_Premium", "Vintage"]

df_norm_train = pd.DataFrame(df_train)
df_norm_val = pd.DataFrame(df_val)

for col in numerical_feas:
    mu = df_train[col].mean()
    sigma = df_train[col].std()

    df_norm_train[col] = normalize(df_train, col, mu, sigma)
    df_norm_val[col] = normalize(df_val, col, mu, sigma)

## Training

In [None]:
from IA2_skeleton_code import sigmoid

def LR_L2_train(train_data, val_data, _lambda, alpha, isNoisy):
    features = train_data.columns.drop("Response")
    X_train = train_data[features]
    y_train = train_data["Response"].to_frame()
    
    X_val = val_data[features]
    y_val = val_data["Response"].to_frame()
    
    y_train = y_train.rename(columns={"Response": 0})
    y_val = y_val.rename(columns={"Response": 0})
    
    n = len(X_train)
    n_features = len(X_train.columns.values)
    
    # Found through experimenting (might need to be changed for large lambda?)
    if isNoisy:
        epsilon = 0.0000000002
    else:
        epsilon = 0.0000000001
            
    # TODO: randomized?
    w = (np.ones(n_features) * 0.2).reshape(n_features,1)
    acc_train = []
    acc_val = []
    grad_vals = []
    
    converged = False
    iter = 1
    while not converged:
        log_odds = sigmoid(X_train.dot(w))
        grad = ((y_train - log_odds).T.dot(X_train)).T / n
        w += alpha * grad
        
        # Extra info
        # log_loss_pos = y_train.T.dot(np.log(log_odds))[0][0]
        # log_loss_neg = (1 - y_train.T).dot(1 - np.log(log_odds))[0][0]
        # log_loss_avg = -(log_loss_pos + log_loss_neg) / n
        # reg = _lambda * np.linalg.norm(w) ** 2
        # log_loss = log_loss_avg + reg
        # log_losses.append(log_loss_avg)
        # grad_l2 = np.linalg.norm(grad.values)
        # w_l2 = np.linalg.norm(w.values)
        
        # Regularization
        w0 = w[0][0]     # Exclude w0
        w -= (alpha * _lambda) * w
        w[0][0] = w0
        
        y_pred_train = y_train - (sigmoid(X_train.dot(w)).rename(columns={"Response":0}) >= 0.5).astype(int)
        acc = (y_pred_train == 0).sum()[0] / n
        acc_train.append(acc)
        
        y_pred_val = y_val - (sigmoid(X_val.dot(w)).rename(columns={"Response":0}) >= 0.5).astype(int)
        acc = (y_pred_val == 0).sum()[0] / len(X_val)
        acc_val.append(acc)
        
        # Check for convergence
        
        magLw = np.linalg.norm(grad.values) / n # magnitude of gradient (change in weights)
        grad_vals.append(magLw)
        
        if iter % 1000 == 0:
            print("Iteration 1000, magLw = %f, change rate = %f" % (magLw, abs(grad_vals[-1] - grad_vals[-2])))
            
        # Check for difference between cur and past gradient, if minimal change then model has converged
        if (len(grad_vals) > 2) and (abs(grad_vals[-1] - grad_vals[-2]) < epsilon):
            print("learning_rate = %f, lambda = %f, converged at iter #%d" % (alpha, _lambda, iter))
            converged = True # set model to converged, return weights and train/val acc
            
        iter += 1
    
    return w, acc_train, acc_val

_lambdas = [10**(x) for x in range(-4,3)] # generate values of lambda 10^i, i in [-4,2]
lrs = [0.05, 0.04, 0.03, 0.035, 0.08, 0.001, 0.0001, 0.000004]

acc_train_lmb = {} # dictionary to store train acc at each iteration for all models, indexed by lambda
acc_val_lmb = {} # dictionary to store val acc at each iteration for all models, indexed by lambda
w_train_lmb = {} # dictionary to store weights for all models, indexed by learning rate

for lmbd, lr in zip(_lambdas, lrs):
    w,acc_train,acc_val = LR_L2_train(df_norm_train, df_norm_val, lmbd, lr, False) # train model with current lambda value
    acc_train_lmb[str(lmbd)] = acc_train # store training accuracy, indexed by lambda value
    acc_val_lmb[str(lmbd)] = acc_val # store val accuracy, indexed by lambda value
    w_train_lmb[str(lmbd)] = w # store final training weights, indexed by lambda value


In [None]:
import numpy as np
from IA2_skeleton_code import plot_losses,sparsity_graph

plot_losses(acc_train_lmb, acc_val_lmb, "L2_accuracy_per_iteration.jpg")

acc_train = []
acc_val = []
for i in acc_train_lmb.keys():
    acc_train.append(acc_train_lmb[str(i)][-1])
    acc_val.append(acc_val_lmb[str(i)][-1])

plt.scatter(np.log10(list(acc_train_lmb.keys())), acc_train, label="Train accuracy")
plt.scatter(np.log10(list(acc_val_lmb.keys())), acc_val, label="Validation accuracy")
plt.rcParams['font.size'] = 12
plt.rcParams['axes.titlesize'] = 12
plt.rcParams["legend.edgecolor"] = 'black'
plt.rcParams["legend.fontsize"] = 10
plt.legend()
plt.savefig("L2_accuracy_per_lambda.jpg")

sparsity_graph(w_train_lmb, "SparsityL2.jpg")

In [None]:
from IA2_skeleton_code import LR_L2_train_torch

_lambdas = [10**(x) for x in range(-4,3)] # generate values of lambda 10^i, i in [-4,2]
lrs = [0.05, 0.04, 0.03, 0.035, 0.08, 0.001, 0.0001, 0.000004]

acc_train_lmb = {} # dictionary to store train acc at each iteration for all models, indexed by lambda
acc_val_lmb = {} # dictionary to store val acc at each iteration for all models, indexed by lambda
w_train_lmb = {} # dictionary to store weights for all models, indexed by learning rate

for lmbd, lr in zip(_lambdas, lrs):
    w,acc_train,acc_val = LR_L2_train_torch(df_norm_train, df_norm_val, lmbd, lr, False) # train model with current lambda value
    acc_train_lmb[str(lmbd)] = acc_train # store training accuracy, indexed by lambda value
    acc_val_lmb[str(lmbd)] = acc_val # store val accuracy, indexed by lambda value
    w_train_lmb[str(lmbd)] = w # store final training weights, indexed by lambda value


In [None]:
from IA2_skeleton_code import plot_losses,sparsity_graph

# plot_losses(acc_train_lmb, acc_val_lmb, "L2_accuracy_per_iteration.jpg")

acc_train = []
acc_val = []
for i in acc_train_lmb.keys():
    acc_train.append(acc_train_lmb[str(i)][-1])
    acc_val.append(acc_val_lmb[str(i)][-1])

log10_lambdas = [np.log10(float(x)) for x in acc_train_lmb.keys()]

plt.scatter(log10_lambdas, acc_train, label="Train accuracy")
plt.scatter(log10_lambdas, acc_val, label="Validation accuracy")
plt.rcParams['font.size'] = 12
plt.rcParams['axes.titlesize'] = 12
plt.rcParams["legend.edgecolor"] = 'black'
plt.rcParams["legend.fontsize"] = 10
plt.legend()
plt.savefig("L2_accuracy_per_lambda.jpg")

sparsity_graph(w_train_lmb, "SparsityL2.jpg")

In [None]:
best_lmbd_val = ""
max_acc_val = 0
for lmbda,acc in acc_val_lmb.items():
    if acc[-1] > max_acc_val:
        best_lmbd_val = lmbda
        max_acc_val = acc[-1]
        
print("Best model has %f accuracy and lambda as %s" % (max_acc_val, best_lmbd_val))

best_lmbd_val = ""
max_acc_train = 0
for lmbda,acc in acc_train_lmb.items():
    if acc[-1] > max_acc_train:
        best_lmbd_val = lmbda
        max_acc_train = acc[-1]
        
print("Best model has %f accuracy and lambda as %s" % (max_acc_val, best_lmbd_val))

### Top 5 features

In [None]:
tmp_w_train_lmb = w_train_lmb.copy()
top_5_features = {}

for lamb in _lambdas:
    i = 0
    top_5_features[str(lamb)] = {}
    while i < 5:
        idx_max = tmp_w_train_lmb[str(lamb)].idxmax()
        top_5_features[str(lamb)][idx_max[0]] = float(tmp_w_train_lmb[str(lamb)].loc[idx_max[0]])
        tmp_w_train_lmb[str(lamb)] = tmp_w_train_lmb[str(lamb)].drop(idx_max[0])
        i += 1
        # w_train_lmb[str(lamb)]

for lamb in _lambdas:
    print("Top 5 features of model trained by lambda = %s:" % str(lamb))
    print(top_5_features[str(lamb)])
    print()

# top_5_features
w_train_lmb["0.0001"]

# PART II

In [None]:
from IA2_skeleton_code import LR_L2_train

df_train_noisy = pd.read_csv("IA2-train-noisy.csv")

df_norm_train_noisy = df_train_noisy
df_norm_val_noisy = df_val.copy()

for col in numerical_feas:
    mu = df_norm_train_noisy[col].mean()
    sigma = df_norm_train_noisy[col].std()

    df_norm_train_noisy[col] = normalize(df_train_noisy, col, mu, sigma)
    df_norm_val_noisy[col] = normalize(df_val, col, mu, sigma)

acc_noisy_train_lmb = {} # dictionary to store train acc at each iteration for all models, indexed by lambda
acc_noisy_val_lmb = {} # dictionary to store val acc at each iteration for all models, indexed by lambda
w_noisy_train_lmb = {} # dictionary to store weights for all models, indexed by learning rate

_lambdas = [10**(x) for x in range(-4,3)] # generate values of lambda 10^i, i in [-4,2]
lrs = [0.05, 0.04, 0.03, 0.035, 0.08, 0.001, 0.0001, 0.000004]

for lmbd, lr in zip(_lambdas, lrs):
    w,acc_train,acc_val = LR_L2_train(df_norm_train_noisy, df_norm_val_noisy, lmbd, lr, True) # train model with current lambda value
    acc_noisy_train_lmb[str(lmbd)] = acc_train # store training accuracy, indexed by lambda value
    acc_noisy_val_lmb[str(lmbd)] = acc_val # store val accuracy, indexed by lambda value
    w_noisy_train_lmb[str(lmbd)] = w # store final training weights, indexed by lambda value



In [None]:
from IA2_skeleton_code import plot_losses,sparsity_graph

plot_losses(acc_noisy_train_lmb, acc_noisy_val_lmb, "L2_noisy_accuracy_per_iteration.jpg")

acc_train = []
acc_val = []
for i in _lambdas:
    acc_train.append(acc_noisy_train_lmb[str(i)][-1])
    acc_val.append(acc_noisy_val_lmb[str(i)][-1])

fig2 = plt.figure(figsize=(8,6))

plt.scatter(np.log10(_lambdas), acc_train, label="Train accuracy")
plt.scatter(np.log10(_lambdas), acc_val, label="Validation accuracy")
plt.legend()
plt.rcParams['font.size'] = 12
plt.rcParams['axes.titlesize'] = 12
plt.rcParams["legend.edgecolor"] = 'black'
plt.rcParams["legend.fontsize"] = 10
plt.savefig("L2_noisy_accuracy_per_lambda.jpg")

sparsity_graph(w_noisy_train_lmb, "L2_noisy_sparsity.jpg")

# PART III

In [None]:
from IA2_skeleton_code import LR_L1_train

_lambdas = [10**(x) for x in range(-4,3)] # generate values of lambda 10^i, i in [-4,2]
lrs = [0.01, 0.03, 0.02, 0.08, 0.08, 0.5, 0.0001] # Learning rates that work best for corresponding lambda

train_acc_lmb = {} # dictionary to store train acc at each iteration for all models, indexed by lambda
val_acc_lmb = {} # dictionary to store val acc at each iteration for all models, indexed by lambda
train_w_lmb = {} # dictionary to store weights for all models, indexed by learning rate

for lmbd, lr in zip(_lambdas, lrs): # Iterate over lambdas and correlated learning rates
    tmpW, tmpTA, tmpVA = LR_L1_train(df_norm_train, df_norm_val, lmbd, lr) # train model with current lambda value
    train_acc_lmb[str(lmbd)] = tmpTA # store training accuracy, indexed by lambda value
    val_acc_lmb[str(lmbd)] = tmpVA # store val accuracy, indexed by lambda value
    train_w_lmb[str(lmbd)] = tmpW # store final training weights, indexed by lambda value
    
plot_losses(train_acc_lmb, val_acc_lmb, "L1_Accuracy.jpg") # plot accuracy train & val

sparsity_graph(train_w_lmb, "SparsityL1.jpg") # plot sparsity

In [None]:
acc_train = []
acc_val = []
for i in _lambdas:
    acc_train.append(acc_train_lmb[str(i)][-1])
    acc_val.append(acc_val_lmb[str(i)][-1])

plt.scatter(np.log10(_lambdas), acc_train, label="Train accuracy")
plt.scatter(np.log10(_lambdas), acc_val, label="Validation accuracy")
plt.legend()
plt.savefig("L1_accuracy_per_lambda.jpg")

In [None]:
type(train_w_lmb['1'])