# Logistic Regression Training

In [None]:
import pickle

import numpy as np
import os
from sklearn.linear_model import LogisticRegressionCV
from sklearn.linear_model import LogisticRegression

from sklearn.metrics import classification_report

from data_processing.data import CLASS_NAMES, load_discretized_data

FEATURES = 0
TARGETS = 1

## Setup and load data

In [None]:
data_dir = "./data_processing/voxels/"

# Set random seeds
seed = 71
np.random.seed(seed)

# Load data
train, _ = load_discretized_data(data_dir, prefix='', binary=True)

#select subset of data
examples_limit = -1
if examples_limit == -1:
    examples_limit = train[TARGETS].shape[0]

## Build model

Use standard `LogisticRegression` without regularization:

In [None]:
model = LogisticRegression(
        solver='saga', 
        n_jobs=-1, 
        class_weight='None', #'balanced'
        penalty='l2', #regularization (penalization)
        C=1.0, # regularization parameter
        max_iter=4000,
        tol=1e-4
        )

Use cross-validation and a grid-search for best regularization, `LogisticRegressionCV`:

In [None]:
model_cv = LogisticRegressionCV(
        solver='saga', 
        n_jobs=-1, 
        class_weight='None', #'balanced'
        penalty='l2', #regularization (penalization)
        Cs=10, #inverse regularization strength (if int, set in a scale)
        cv=5, #cross-validation fold
        max_iter=10000,
        tol=1e-4
        )

## Train the model

In [None]:
%%time
model.fit(train[FEATURES][:examples_limit],
          train[TARGETS][:examples_limit])

model_cv.fit(train[FEATURES][:examples_limit],
          train[TARGETS][:examples_limit])


log_dir = "logistic/logs/"
if not os.path.exists(log_dir):
    os.makedirs(log_dir)

model_filename = os.path.join(log_dir, 'logistic_model.pkl')
pickle.dump(model, open(model_filename, 'wb'))
model_filename = os.path.join(log_dir, 'logistic_cv_model.pkl')
pickle.dump(model_cv, open(model_filename, 'wb'))

In [None]:
model_cv.C_

## Evaluate model

In [None]:
import logistic.eval
%load_ext autoreload
%autoreload 1
%aimport logistic.eval

In [None]:
logistic.eval.eval(model_file='logistic/logs/logistic_model.pkl', data=train, name="LogisticRegression", examples_limit=examples_limit)

In [None]:
logistic.eval.eval(model_file='logistic/logs/logistic_cv_model.pkl', data=train, name="LogisticCVRegression", examples_limit=examples_limit)