In [1]:

import pickle
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression

In [2]:
# Load the features
with open('features.pkl', 'rb') as f:
    features = pickle.load(f)

# Load the target
with open('target.pkl', 'rb') as f:
    target = pickle.load(f)

# Load the test features
with open('test_features.pkl', 'rb') as f:
    test_features = pickle.load(f)

# Load the test target
with open('test_target.pkl', 'rb') as f:
    test_target = pickle.load(f)

# Load the validation features
with open('validation_features.pkl', 'rb') as f:
    validation_features = pickle.load(f)

# Load the validation target
with open('validation_target.pkl', 'rb') as f:
    validation_target = pickle.load(f)
    

In [3]:
#Created a function calculate_rmse_and_predict() to calculate the rmse value and predict the target values
def calculate_rmse_and_predict(model, features, actual):
    predicted = model.predict(features)
    residuals = actual - predicted
    rmse = np.sqrt(np.mean(residuals**2))
    return rmse, predicted

In [4]:
#Created a function train_and_evaluate_logistic_regression() for logistic regression model
def train_and_evaluate_logistic_regression(features, target, test_features, test_target):
    log_reg_clf = LogisticRegression(random_state=42, max_iter=1000)
    log_reg_clf.fit(features, target)
    log_reg_test_predictions = log_reg_clf.predict(test_features)
    log_reg_test_accuracy = accuracy_score(test_target, log_reg_test_predictions)
    print(f"Logistic Regression Test Accuracy: {log_reg_test_accuracy}")
    return log_reg_clf, log_reg_test_accuracy

In [5]:
log_reg_clf, log_reg_test_accuracy = train_and_evaluate_logistic_regression(features, target, test_features, test_target)

Logistic Regression Test Accuracy: 0.7095674026207144


In [6]:
# Calculate RMSE and predictions for test data
log_reg_rmse_test, log_reg_test_predictions = calculate_rmse_and_predict(log_reg_clf, test_features, test_target)
print(f"Logistic Regression RMSE on test data: {log_reg_rmse_test}")

# Calculate RMSE and predictions for validation data
log_reg_rmse_validation, log_reg_validation_predictions = calculate_rmse_and_predict(log_reg_clf, validation_features, validation_target)
print(f"Logistic Regression RMSE on validation data: {log_reg_rmse_validation}")

Logistic Regression RMSE on test data: 0.5389179876189749
Logistic Regression RMSE on validation data: 0.5342105920462471


In [7]:
model_results = {}

# Save the accuracy, RMSE on test data, and RMSE on validation data as a pickle file
model_results['Logistic Regression'] = {
    'Accuracy': log_reg_test_accuracy,
    'RMSE (test set)': log_reg_rmse_test,
    'RMSE (Val set)': log_reg_rmse_validation
}

with open('model_results.pkl', 'wb') as f:
    pickle.dump(model_results, f)
