In [1]:
import numpy as np
from helpers import *
from implementations import *
from helpers_analysis import *

In [2]:
x_train, x_test, y_train, train_ids, test_ids = load_cleaned_csv_data("dataset", sub_sample=False)
X_train, X_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.2, random_state=42)

# Gradient Descent

In [None]:
# Define a range of gamma values to test
gamma_values = [0.001, 0.005, 0.01, 0.015, 0.02]

# Initialize variables to store the best parameters
best_gamma = None
best_threshold = None
best_f1 = 0
best_w = None

# Loop over gamma values
for gamma in gamma_values:
    print(f"Testing gamma: {gamma}")
    # Initialize weights
    initial_w = np.zeros(X_train.shape[1])
    # Train the model
    w, loss = mean_squared_error_gd(y_train, X_train, initial_w, max_iters=1000, gamma=gamma)

    # Compute predicted scores on the validation set
    y_scores = X_test @ w

    # Optimize threshold for the current gamma
    threshold, f1 = optimize_threshold(y_test, y_scores)

    print(f"Gamma: {gamma}, Best Threshold: {threshold}, F1 Score: {f1}")

    # Update best parameters if current F1 is better
    if f1 > best_f1:
        best_f1 = f1
        best_gamma = gamma
        best_threshold = threshold
        best_w = w.copy()

# After finding the best gamma and threshold, make predictions on the test set
y_scores = X_test @ best_w
y_prediction = np.where(y_scores >= best_threshold, 1, -1)

# Compute final accuracy and F1 score
final_accuracy = accuracy(y_test, y_prediction)
final_f1_score = f1_score(y_test, y_prediction)

print("Best Gamma:", best_gamma)
print("Best Threshold:", best_threshold)
print("Final Accuracy:", final_accuracy)
print("Final F1 Score:", final_f1_score)


# Stochastic Gradient Descent

In [None]:
# Define a range of gamma values to test
# TODO a changer
gamma_values = [0.001, 0.005, 0.01, 0.015, 0.02]

# Initialize variables to store the best parameters
best_gamma = None
best_threshold = None
best_f1 = 0
best_w = None

# Loop over gamma values
for gamma in gamma_values:
    print(f"Testing gamma: {gamma}")
    # Initialize weights
    initial_w = np.zeros(X_train.shape[1])
    # Train the model
    w, loss = mean_squared_error_sgd(y_train, X_train, initial_w, max_iters=1000, gamma=gamma)

    # Compute predicted scores on the validation set
    y_scores = X_test @ w

    # Optimize threshold for the current gamma
    threshold, f1 = optimize_threshold(y_test, y_scores)

    print(f"Gamma: {gamma}, Best Threshold: {threshold}, F1 Score: {f1}")

    # Update best parameters if current F1 is better
    if f1 > best_f1:
        best_f1 = f1
        best_gamma = gamma
        best_threshold = threshold
        best_w = w.copy()

# After finding the best gamma and threshold, make predictions on the test set
y_scores = X_test @ best_w
y_prediction = np.where(y_scores >= best_threshold, 1, -1)

# Compute final accuracy and F1 score
final_accuracy = accuracy(y_test, y_prediction)
final_f1_score = f1_score(y_test, y_prediction)

print("Best Gamma:", best_gamma)
print("Best Threshold:", best_threshold)
print("Final Accuracy:", final_accuracy)
print("Final F1 Score:", final_f1_score)


# Least Squares

In [None]:
w, loss = least_squares(y_train, X_train)

y_scores = X_test @ w

# Optimize threshold for the current gamma
threshold, f1 = optimize_threshold(y_test, y_scores)

# After finding the best gamma and threshold, make predictions on the test set
y_scores = X_test @ w
y_prediction = np.where(y_scores >= threshold, 1, -1)

# Compute final accuracy and F1 score
final_accuracy = accuracy(y_test, y_prediction)
final_f1_score = f1_score(y_test, y_prediction)

print("Best Threshold:", threshold)
print("Final Accuracy:", final_accuracy)
print("Final F1 Score:", final_f1_score)

# Ridge Regression

In [10]:
w, _ = ridge_regression(y_train, X_train, lambda_=0.001)

# Logistic Regression

In [None]:
# Define a range of gamma values to test
gamma_values = [0.05, 0.7]#[0.001, 0.005, 0.01, 0.05, 0.1]

# Initialize variables to store the best parameters
best_gamma = None
best_threshold = None
best_f1 = 0
best_w = None

# Loop over gamma values
for gamma in gamma_values:
    print(f"Testing gamma: {gamma}")
    # Initialize weights
    initial_w = np.zeros(X_train.shape[1])
    # Train the model
    w, loss = logistic_regression(y_train, X_train, initial_w, max_iters=1000, gamma=gamma)

    # Compute predicted scores on the validation set
    y_scores = X_test @ w

    # Optimize threshold for the current gamma
    threshold, f1 = optimize_threshold(y_test, y_scores)

    print(f"Gamma: {gamma}, Best Threshold: {threshold}, F1 Score: {f1}")

    # Update best parameters if current F1 is better
    if f1 > best_f1:
        best_f1 = f1
        best_gamma = gamma
        best_threshold = threshold
        best_w = w.copy()

# After finding the best gamma and threshold, make predictions on the test set
y_scores = X_test @ best_w
y_prediction = np.where(y_scores >= best_threshold, 1, -1)

# Compute final accuracy and F1 score
final_accuracy = accuracy(y_test, y_prediction)
final_f1_score = f1_score(y_test, y_prediction)

print("Best Gamma:", best_gamma)
print("Best Threshold:", best_threshold)
print("Final Accuracy:", final_accuracy)
print("Final F1 Score:", final_f1_score)
