In [None]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

## Load the training data into feature matrix, class labels, and event ids:

In [None]:
from proj1_helpers import *
DATA_TRAIN_PATH = "trainFile.csv"#"C:/Users/Martin/Desktop/Workspace/Machine_Learning/data/data/trainFile.csv" # TODO: download train data and supply path here 
y, tX, ids = load_csv_data(DATA_TRAIN_PATH)
def standardize(x):
    centered_data = x - np.mean(x, axis=0)
    std_data = centered_data / np.std(centered_data, axis=0)
    return std_data
tX = standardize(tX)
print("done")


## Others Usefull function

In [None]:



def compute_loss_logisitic(y,tx,w):
    result = tx.T@(delta(tx@w) -y)
    return result

def compute_loss_regression(y,tx,w, lambda_):
    return np.sum( np.log((1-y)/2 + y*delta(tx@w))) + (lambda_/2)*np.sum(w**2)

def compute_loss(y, tx, w):
    N = y.shape[0]
    e = y - (tx @ w)
    result = 1/(2*N) * (np.transpose(e) @ e)
    return result

def compute_gradient(y, tx, w):
    solution =  -(1/y.shape[0])*(np.transpose(tx) @ (y - (tx @ w)))
    return solution


def compute_stoch_gradient(y, tx, w):
    #Here N  =1
    solution =  -(1/2)*(tx.T.dot(y - (tx @ w)))
    return solution

def delta(x):
    return 1/(1+np.exp(-x))

def build_poly(x, degree):
    """polynomial basis functions for input data x, for j=0 up to j=degree."""
    result = np.zeros((x.shape[0], x.shape[1]*(degree+1)))
    
    for i in range(degree+1):
        result[:,x.shape[1]*i:x.shape[1]*(i+1)] = np.power(x,i)
    return result








## Least squares using Gradient Descent

In [None]:

def least_squares_GD(y, tx, initial_w, max_iters, gamma):
    w = initial_w
    for n_iter in range(max_iters):
        loss = compute_loss(y,tx,w)
        gradient = compute_gradient(y,tx,w)
        w = w -gamma*gradient

    return w, loss


## Least squares using Stochastic Gradient Descent

In [None]:
def least_squares_SGD(y, tx, initial_w,max_iters, gamma):
    w = initial_w
    losses=[]
    for n_iter in range(max_iters):
        i = np.random.randint(y.shape[0])
        gradient = compute_stoch_gradient(y[i],tx[i],w)
        w = w - gamma*gradient
    return w, compute_loss(y,tx,w)



## Least squares and solving equation

In [None]:
def least_squares(y, tx):
    x = tx
    gram = x.T @ x
    if np.linalg.det(gram)!=0:
        w = np.linalg.inv(gram)@x.T@y
    else:
        w = np.linealt.solv(gram,x.T@y)
    return w, compute_loss(y,tx,w)
## Least squares using Stochastic Gradient Descent


## Ridge Regression

In [None]:
def ridge_regression(y, tx, lambda_):
    #Using L2
    lambdaPrime = lambda_*(2*tx.shape[0])
    x = tx
    gramLambda = x.T@x + np.eye(x.shape[1])*lambdaPrime
    if np.linalg.det(gramLambda)!=0:
        w = np.linalg.inv(gramLambda)@x.T@y
    else:
         w = np.linealt.solv(gramLambda,x.T@y)
    return w, compute_loss(y,tx,w)


## Logistic Regression

In [None]:

def logistic_regression(y, tx, initial_w,max_iters, gamma):
    w = initial_w
    for n_it in range(max_iters):
        #loss = compute_loss_logisitic(y,tx,w)
        gradient = compute_loss_logisitic(y,tx,w)
        w = w - gamma*gradient
    return w, compute_loss(y,tx,w)


## Regressive Logistic Regression

In [None]:
def reg_logistic_regression(y, tx, lambda_,initial_w, max_iters, gamma):
    w = initial_w
    for n_iter in range(max_iters):
        loss = compute_loss_regression(y,tx,w, lambda_)
        gradient = compute_gradient(y,tx,w)
        w = w - gamma*gradient
    return w, compute_loss(y,tx,w)

## Using everything

In [None]:
#Weight : Linear regression using gradient descent 
inital_w = np.zeros(30)
loss = []
weightGD, lossGD = least_squares_GD(y, tX, inital_w, 50, 0.1)
loss.append(lossGD)
weightSGD, lossSGD = least_squares_SGD(y, tX, inital_w, 50, 0.01)
loss.append(lossSGD)
weightLS, lossLS = least_squares(y, tX)
loss.append(lossLS)
weightRR, lossRR = ridge_regression(y, tX, 1)
loss.append(lossRR)

weightsLR,_ = logistic_regression(y, tX, inital_w, 50, 0.00000001)
loss.append(compute_loss(y,tX,weightsLR))


weightsRLR,_ = reg_logistic_regression(y, tX, 1, inital_w, 50, 0.1)
loss.append(compute_loss(y,tX,weightsRLR))
print(loss)
#plot(lossSGD)
print("DONE")
print("DONE")

## Regressive Logistic Regression

## Generate predictions and save ouput in csv format for submission:

## Generate predictions and save ouput in csv format for submission:

In [None]:
from proj1_helpers import *

DATA_TEST_PATH = 'testFile.csv' # TODO: download train data and supply path here 
print("done")
_, tX_test, ids_test = load_csv_data(DATA_TEST_PATH)
print("done")

In [None]:
OUTPUT_PATH = 'result.csv' # TODO: fill in desired name of output file for submission
print("done")

y_pred = predict_labels(weightLS, tX_test)
print("done")

create_csv_submission(ids_test, y_pred, OUTPUT_PATH)
print("done")