In [1]:
# Import libraries and needed classes

import sys
import os

import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer

from Code.LogisticRegression.logisticReg.logisticReg import LogisticRegression
from Code.utils.dataset import Dataset
import Code.utils.store_model as store_model

In [2]:
# Set random seed to control randomness

np.random.seed(42)

In [3]:
# Read datasets

dataset = Dataset('../../Dataset/DatasetsGerados/dataset_training_input.csv',
                      '../../Dataset/DatasetsGerados/dataset_training_output.csv',
                      '../../Dataset/DatasetsGerados/dataset_validation_input.csv',
                      '../../Dataset/DatasetsGerados/dataset_validation_output.csv',
                      '../../Dataset/DatasetsGerados/dataset_test_input.csv',
                      '../../Dataset/DatasetsGerados/dataset_test_output.csv')

X_train, y_train, X_validation, y_validation, X_test, y_test, ids = dataset.get_datasets('Text', 'Label', sep='\t', rem_punctuation=False)

In [4]:
# Create model

n_features = X_train.shape[1]

model = LogisticRegression(n_features)

In [5]:
# Train model

model.gradient_descent(X_train, y_train, alpha=0.01, iters=50)

0.6931471805599454
0.3428615279865149
0.2620090674546095
0.22083443675010897
0.19500488795252569


In [16]:
# Calculate model cost if y_test is available

if y_test is not None:
    print("Final cost:", model.cost_function(X_test, y_test))

Final cost: 0.18036299181483534


In [7]:
# Test model

out = model.predict_many(X_test)
out = out.reshape(out.shape[0], 1)

In [8]:
# Store results

store_results = './Results/log_regression_results.csv'

# Ensure the directory exists
os.makedirs(os.path.dirname(store_results), exist_ok=True)

results = dataset.merge_results(ids, out)
results.to_csv(store_results, sep='\t', index=False)

In [9]:
# Store model

model_filepath = './Model/log_regression'
model_key = 'log_regression'

store_model.store_model(model_filepath, model_key, model)

In [10]:
# Retrieve model

model_filepath = './Model/log_regression'
model_key = 'log_regression'

model = store_model.retrieve_model(model_filepath, model_key)