In [2]:
import joblib
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

#Load train-test splits
X_train, X_test, y_train, y_test = joblib.load("../data/processed/train_test_split.pkl")

#initialize and train model
model = LogisticRegression(random_state=42)
model.fit(X_train, y_train)

#Predict
y_pred = model.predict(X_test)

#Evaluate
print("Accuracy.", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n",confusion_matrix(y_test,y_pred))

#Save Model
joblib.dump(model, "../models/logistic_regression_model.pkl")

Accuracy. 0.7663551401869159

Classification Report:
               precision    recall  f1-score   support

           0       0.79      0.94      0.86        80
           1       0.58      0.26      0.36        27

    accuracy                           0.77       107
   macro avg       0.69      0.60      0.61       107
weighted avg       0.74      0.77      0.73       107


Confusion Matrix:
 [[75  5]
 [20  7]]


['../models/logistic_regression_model.pkl']

In [4]:
#Trying out Random Forest and Support Vector Machine Models

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

models = {"Logistic Regression": LogisticRegression(random_state=42),
          "Random Forest": RandomForestClassifier(random_state=42),
          "SVM":SVC(random_state=42)
        }
for name,model in models.items():
    model.fit(X_train,y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy: {acc:.4f}")

Logistic Regression Accuracy: 0.7664
Random Forest Accuracy: 0.6262
SVM Accuracy: 0.7196
