In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix


file_path = '/content/drive/MyDrive/ml@2/logatta.csv'




print(data.head())

categorical_cols = ['BusinessTravel', 'Education', 'MaritalStatus', 'OverTime', 'Gender']
encoder = OrdinalEncoder()
data[categorical_cols] = encoder.fit_transform(data[categorical_cols])


scaler = StandardScaler()
numerical_cols = ['Age', 'EmployeeNumber', 'DailyRate']
data[numerical_cols] = scaler.fit_transform(data[numerical_cols])


X = data.drop('accepted for the interview', axis=1)
y = data['accepted for the interview']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

models = {
    'Logistic Regression': LogisticRegression(),
    'Naive Bayes': GaussianNB(),
    'KNN': KNeighborsClassifier()
}

results = {}

for model_name, model in models.items():

    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    accuracy = model.score(X_test, y_test)
    results[model_name] = {
        'accuracy': accuracy,
        'classification_report': classification_report(y_test, y_pred),
        'confusion_matrix': confusion_matrix(y_test, y_pred)
    }

    print(f"\n{model_name} Accuracy: {accuracy}")
    print(f"Classification Report:\n{results[model_name]['classification_report']}")
    print(f"Confusion Matrix:\n{results[model_name]['confusion_matrix']}")


accuracy_values = [results[model]['accuracy'] for model in models]
model_names = list(models.keys())

plt.bar(model_names, accuracy_values, color=['blue', 'green', 'red'])
plt.xlabel('Model')
plt.ylabel('Accuracy')
plt.title('Model Comparison')
plt.show()


with open('model_comparison_results.txt', 'w') as f:
    for model_name, result in results.items():
        f.write(f"{model_name} Results:\n")
        f.write(f"Accuracy: {result['accuracy']}\n")
        f.write(f"Classification Report:\n{result['classification_report']}\n")
        f.write(f"Confusion Matrix:\n{result['confusion_matrix']}\n\n")


In [None]:

import seaborn as sns


accuracy_values = [results[model]['accuracy'] for model in models]
model_names = list(models.keys())

plt.figure(figsize=(8, 5))
plt.bar(model_names, accuracy_values, color=['blue', 'green', 'red'])
plt.xlabel('Model')
plt.ylabel('Accuracy')
plt.title('Model Comparison - Accuracy')
plt.show()


plt.figure(figsize=(15, 5))

for i, (model_name, result) in enumerate(results.items()):
    plt.subplot(1, len(models), i+1)
    cm = result['confusion_matrix']
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Pred: No', 'Pred: Yes'], yticklabels=['True: No', 'True: Yes'])
    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')

plt.tight_layout()
plt.show()


for model_name, result in results.items():
    print(f"\n{model_name} Classification Report Visualization")

    class_report = classification_report(y_test, model.predict(X_test), output_dict=True)
    df_report = pd.DataFrame(class_report).transpose()


    plt.figure(figsize=(6, 4))
    sns.heatmap(df_report.iloc[:-1, :-1], annot=True, cmap='Blues', fmt='.2f', cbar=False)
    plt.title(f'{model_name} - Classification Report')
    plt.xlabel('Metrics')
    plt.ylabel('Classes')
    plt.show()
