In [None]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

from implementations import *
from evaluation import *
import datetime

## Load the training data into feature matrix, class labels, and event ids:

In [None]:
from proj1_helpers import *
DATA_TRAIN_PATH = 'data/train.csv'
y, tX, ids = load_csv_data(DATA_TRAIN_PATH)

In [None]:
tX.shape

## Properties of dataset

In [None]:
from collections import Counter
Counter(y)

## Collect data

In [None]:
evaluation_data = []

## 1. Linear regression using gradient descent

In [None]:
# Parameters
#max_iters = [10, 100, 500, 750, 1000, 1500]
#gammas = [0.01, 0.049, 0.05, 0.053, 0.07, 0.1]
max_iters = [100]
gammas = [0.1]
initial_w = np.array(np.zeros(31))

for max_iter in max_iters:
    for gamma in gammas:
        evaluation_result = cross_val(tX, y, 10, 10, least_squares_GD, initial_w=initial_w, max_iters=max_iter, gamma=gamma)
        
        evaluation_data.append(evaluation_result)
        print(evaluation_result)

## 2. Linear regression using stochastic gradient descent

In [None]:
# Parameters
#max_iters = [10, 100, 500, 750, 1000, 1500]
#gammas = [0.01, 0.049, 0.05, 0.053, 0.07, 0.1]
max_iters = [100]
gammas = [0.1]
initial_w = np.array(np.zeros(30 * 10 + 1))

for max_iter in max_iters:
    for gamma in gammas:
        evaluation_result = cross_val(tX, y, 10, 10, least_squares_SGD, initial_w=initial_w, max_iters=max_iter, gamma=gamma)
        
        evaluation_data.append(evaluation_result)
        print(evaluation_result)

## 3. Least squares regression using normal equations

In [None]:
degrees = np.arange(start=1, stop=15, step=1)

for degree in degrees:
    evaluation_result = cross_val(tX, y, 10, degree, least_squares)

    evaluation_data.append(evaluation_result)
    print(evaluation_result)

## 4. Ridge regression using normal equations

In [None]:
# Parameters
#lambdas = [0, 0.005, 0.01, 0.1, 0.5, 1, 10, 55, 100]
lambdas = [1000]
poly_degrees = [1, 3, 5, 7, 10, 12]

for degree in poly_degrees:
    for lambda_ in lambdas:
        evaluation_result = cross_val(tX, y, 10, degree, ridge_regression, lambda_=lambda_)

        evaluation_data.append(evaluation_result)
        print(evaluation_result)

## 5. Logistic regression using gradient descent or SGD

In [None]:
# Parameters
max_iters = [100, 300, 1000]
gammas = [0.01, 0.05, 0.1, 0.5]
poly_degrees = [1, 3, 5, 7, 10, 12]

for degree in poly_degrees:
    for max_iter in max_iters:
        for gamma in gammas:
            initial_w = np.array(np.zeros(30 * degree + 1))
            evaluation_result = cross_val(tX, y, 10, degree, logistic_regression, max_iters=max_iter, gamma=gamma, initial_w=initial_w)

            evaluation_data.append(evaluation_result)
            print(evaluation_result)

## 6. Regularized logistic regression using gradient descent or SGD

In [None]:
# Parameters
max_iters = [100, 300, 1000]
gammas = [0.01, 0.05, 0.1, 0.5]
lambdas = [0.01, 0.05, 0.1, 0.5]
poly_degrees = [1, 3, 5, 7, 10, 12]

for degree in poly_degrees:
    for max_iter in max_iters:
        for gamma in gammas:
            for lambda_ in lambdas:
                initial_w = np.array(np.zeros(30 * degree + 1))
                evaluation_result = cross_val(tX, y, 10, degree, reg_logistic_regression, max_iters=max_iter, gamma=gamma, initial_w=initial_w, lambda_=lambda_)

                evaluation_data.append(evaluation_result)
                print(evaluation_result)

## Final overview

In [None]:
for el in evaluation_data:
    print(el)

In [None]:
import pickle 
print(len(evaluation_data))
pickle.dump(evaluation_data, open("save.p", "wb"))