In [11]:
import numpy as np

#data_short = np.load('data/lorfs_100.npz')
#data_medium = np.load('data/lorfs_400.npz')
#data_long = np.load('data/lorfs_1000.npz')

short_meta = np.load('Data/lorfs_100_meta.npz')
medium_meta = np.load('Data/lorfs_400_meta.npz')
long_meta = np.load('Data/lorfs_1000_meta.npz')

In [15]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
import joblib  # For saving the model
import os

def run_and_save_logreg(length, target, description):
    """
    Fits a Logistic Regression model to the data and saves the model.

    Parameters:
    - length: The feature data (pandas Series or array-like).
    - target: The target data (pandas Series or array-like).
    - model_name: Name of the model for saving.
    - save_dir: Directory to save the model files.

    Returns:
    - None
    """
    # Create the dataframe
    df = pd.DataFrame({'length': length, 'target': target})

    # Splitting the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(df[['length']], df['target'], test_size=0.2, random_state=42)
    print(X_test.head())

    # Initialize and fit the logistic regression model
    logreg = LogisticRegression()
    logreg.fit(X_train, y_train)

    # Predict on the test set
    y_pred = logreg.predict(X_test)

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    print(f'Accuracy of logistic regression classifier on test set: {accuracy:.2f}')
    print(f'F1 of logistic regression classifier on test set: {f1:.2f}')

    # Define model name based on a description
    description_formatted = description.replace(' ', '_').lower()  # Format the description
    model_name = f"{description_formatted}_model"  # Create model name based on description

    # Save the model in the specified directory
    model_dir = "Models/Logreg"  # Specify the directory
    model_path = os.path.join(model_dir, f"{model_name}.joblib")  # Construct the full path for the model
    os.makedirs(model_dir, exist_ok=True)  # Ensure the directory exists
    joblib.dump(logreg, model_path)  # Save the logistic regression model
    print(f"Model saved to {model_path}")  # Print the save location


In [16]:

print('Short')
run_and_save_logreg(short_meta['length'], short_meta['target'], 'Short Dataset')
print('Medium')
run_and_save_logreg(medium_meta['length'], medium_meta['target'], 'Medium Dataset')
print('Long')
run_and_save_logreg(long_meta['length'], long_meta['target'], 'Long Dataset')

Short
        length
240703      35
190305      39
94151       31
99108       89
100638      53
Accuracy of logistic regression classifier on test set: 0.59
F1 of logistic regression classifier on test set: 0.56
Model saved to Models/Logreg/short_dataset_model.joblib
Medium
        length
320242     257
369963     355
258623     100
257662     124
221553     192
Accuracy of logistic regression classifier on test set: 0.68
F1 of logistic regression classifier on test set: 0.65
Model saved to Models/Logreg/medium_dataset_model.joblib
Long
       length
5610      749
43436     592
46104     517
13529     594
46370     859
Accuracy of logistic regression classifier on test set: 0.53
F1 of logistic regression classifier on test set: 0.44
Model saved to Models/Logreg/long_dataset_model.joblib
