## Export Pickle. Logistic Regression Model

As decided in the Modelling Part we selected the Logistic Regression Model for the Streamlit UI.

In [2]:
import pickle
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [4]:
# Define training function
def train_models(models, X_train, y_train):
    """
    Trains multiple classification models.
    """

    trained_models = {}

    for model_name, model_instance in models:
        model = model_instance
        model.fit(X_train, y_train)
        trained_models[model_name] = model

    return trained_models

# Define training data saving function
def save_trained_data(trained_models, scaler, X_train, y_train):
    """
    Saves training data for multiple classification models, including the scaler.
    """
    for model_name, model in trained_models.items():
        with open(f"{model_name}.pkl", "wb") as f:
            pickle.dump({'model': model, 'scaler': scaler, 'X_train': X_train, 'y_train': y_train}, f)


In [6]:

# Load dataset
dataset_encoded = pd.read_csv('../data/cleaned/dataset_encoded.csv')

# Divide the training data
X = dataset_encoded.drop(['stroke'], axis=1)  # features
y = dataset_encoded['stroke']  # target

# Split the data again into training and testing sets (80/20 split)
X_train, _, y_train, _ = train_test_split(X, y, test_size=0.2, random_state=0, stratify=y)

# Standardizing(scaling) the data
std = StandardScaler()
X_train_std = std.fit_transform(X_train)

# Define the model
model = LogisticRegression(random_state=0, max_iter=10000, class_weight='balanced')

# Train model
model.fit(X_train_std, y_train)

# Save the trained model and associated data
save_trained_data({'Logistic_Regression': model}, std, X_train_std, y_train)
