In [28]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, f1_score, accuracy_score, precision_score, recall_score, roc_auc_score

In [29]:
mean1 = 55
std_dev1 = 10
num_samples = 500

column1_numbers = np.random.normal(mean1, std_dev1, num_samples)
column1_numbers = np.clip(column1_numbers, 30, 120)
column1_numbers = np.round(column1_numbers).astype(int)

mean2 = 18
std_dev2 = 3

column2_numbers = np.random.normal(mean2, std_dev2, num_samples)
column2_numbers = np.clip(column2_numbers, 12, 26)
column2_numbers = np.round(column2_numbers).astype(int)

column3_numbers = np.random.randint(2, size=num_samples)
column3_numbers[column1_numbers > mean1] = 1 

data = {
    'Miles_Per_week': column1_numbers, 
    'Farthest_run': column2_numbers,
    'Qualified_Boston_Marathon': column3_numbers
}

df = pd.DataFrame(data)

In [30]:
df

Unnamed: 0,Miles_Per_week,Farthest_run,Qualified_Boston_Marathon
0,81,16,1
1,45,17,0
2,44,22,1
3,53,19,0
4,53,15,1
...,...,...,...
495,60,13,1
496,61,13,1
497,53,21,1
498,33,20,1


In [31]:
X = df.iloc[:, 0:2]

In [32]:
y = df.iloc[:, 2]

In [33]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [34]:
logreg_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('logreg', LogisticRegression())
])

In [35]:
svm_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', SVC(kernel='rbf'))
])

In [36]:
models = {
    "Logistic Regression": logreg_pipeline,
    "SVM": svm_pipeline
}

In [41]:
for name, model in models.items():
    print(f"Model: {name}")

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    print("Accuracy: ", accuracy_score(y_test, y_pred))
    print("Precision: ", precision_score(y_test, y_pred))
    print("Recall: ", recall_score(y_test, y_pred))
    print("F1 Score: ", f1_score(y_test, y_pred))
    
    print("Confusion Matrix: ", confusion_matrix(y_test, y_pred))
    print("Classification Report: ", classification_report(y_test, y_pred))

Model: Logistic Regression
Accuracy:  0.78
Precision:  0.8414634146341463
Recall:  0.8846153846153846
F1 Score:  0.8625
Confusion Matrix:  [[ 9 13]
 [ 9 69]]
Classification Report:                precision    recall  f1-score   support

           0       0.50      0.41      0.45        22
           1       0.84      0.88      0.86        78

    accuracy                           0.78       100
   macro avg       0.67      0.65      0.66       100
weighted avg       0.77      0.78      0.77       100

Model: SVM
Accuracy:  0.74
Precision:  0.8513513513513513
Recall:  0.8076923076923077
F1 Score:  0.8289473684210527
Confusion Matrix:  [[11 11]
 [15 63]]
Classification Report:                precision    recall  f1-score   support

           0       0.42      0.50      0.46        22
           1       0.85      0.81      0.83        78

    accuracy                           0.74       100
   macro avg       0.64      0.65      0.64       100
weighted avg       0.76      0.74      0.