In [10]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error, accuracy_score, precision_score, recall_score, confusion_matrix, f1_score
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd

# Load the preprocessed CSV file
df = pd.read_csv('dataset.csv')

# Separate features (X) and target variable (y)
X = df.drop(['num', 'dataset'], axis=1)
y = df['num']

In [11]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [13]:
model = LogisticRegression(random_state=30)

# Train the model
model.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test_scaled)
y_pred = np.round(y_pred)

In [15]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)

acc = accuracy_score(y_test, y_pred)


print(f'Mean Squared Error: {mse: .2f}')
print(f'Accuracy: {acc:.2f}')


# Calculate confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)

# Extract values from the confusion matrix
true_negative, false_positive, false_negative, true_positive = conf_matrix.ravel()

# Calculate specificity
specificity = true_negative / (true_negative + false_positive)
print(f'Specificity: {specificity:.2f}')

# Calculate precision
precision = precision_score(y_test, y_pred)
print(f'Precision: {precision:.2f}')

# Calculate recall
recall = recall_score(y_test, y_pred)
print(f'Recall: {recall:.2f}')

# Calculate false positive rate
false_positive_rate = false_positive / (false_positive + true_negative)
print(f'False Positive Rate: {false_positive_rate:.2f}')

# Calculate F1 score
f1 = f1_score(y_test, y_pred)
print(f'F1 Score: {f1:.2f}')

Mean Squared Error:  0.14
Accuracy: 0.86
Specificity: 0.86
Precision: 0.86
Recall: 0.86
False Positive Rate: 0.14
F1 Score: 0.86


In [16]:
from joblib import dump
dump(model, 'model.pkl')
dump(scaler, 'scaler.pkl')

['scaler.pkl']