In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.impute import SimpleImputer

# Load the dataset
df = pd.read_csv('/content/dataset1.csv')

for column in df.columns:
    if df[column].dtype == object:  # Check if the column is of object (string) type
        try:
            df[column] = pd.to_numeric(df[column], errors='coerce')  # Convert to numeric, replacing non-convertibles with NaN
        except:
            print(f"Could not convert column {column} to numeric.")
            df = df.drop(column, axis=1)
# Fill NaN values (if any) with a suitable strategy, e.g., mean
df.fillna(df.mean(), inplace=True)

# Preprocess the data
imputer = SimpleImputer(strategy='mean')
X = df.drop('PlacedOrNot', axis=1)
y = df['PlacedOrNot']
X = imputer.fit_transform(X)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the models
models = {
    'Logistic Regression': LogisticRegression(),
    'Random Forest': RandomForestClassifier(),
    'AdaBoost': AdaBoostClassifier(),
    'XGBoost': GradientBoostingClassifier(),
    'Naive Bayes': GaussianNB()
}

model_results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    model_results[name] = {
        'Confusion Matrix': confusion_matrix(y_test, y_pred),
        'Accuracy': accuracy_score(y_test, y_pred),
        'Precision': precision_score(y_test, y_pred),
        'Recall': recall_score(y_test, y_pred),
        'F1 Score': f1_score(y_test, y_pred)
    }

# Print the results
for name, results in model_results.items():
    print(f'Model: {name}')
    print(f'Confusion Matrix:\n{results["Confusion Matrix"]}')
    print(f'Accuracy: {results["Accuracy"]}')
    print(f'Precision: {results["Precision"]}')
    print(f'Recall: {results["Recall"]}')
    print(f'F1 Score: {results["F1 Score"]}')
    print()

Model: Logistic Regression
Confusion Matrix:
[[195  85]
 [ 81 233]]
Accuracy: 0.7205387205387206
Precision: 0.7327044025157232
Recall: 0.7420382165605095
F1 Score: 0.7373417721518987

Model: Random Forest
Confusion Matrix:
[[267  13]
 [ 55 259]]
Accuracy: 0.8855218855218855
Precision: 0.9522058823529411
Recall: 0.8248407643312102
F1 Score: 0.8839590443686006

Model: AdaBoost
Confusion Matrix:
[[262  18]
 [ 57 257]]
Accuracy: 0.8737373737373737
Precision: 0.9345454545454546
Recall: 0.8184713375796179
F1 Score: 0.8726655348047538

Model: XGBoost
Confusion Matrix:
[[269  11]
 [ 58 256]]
Accuracy: 0.8838383838383839
Precision: 0.9588014981273408
Recall: 0.8152866242038217
F1 Score: 0.8812392426850258

Model: Naive Bayes
Confusion Matrix:
[[232  48]
 [ 81 233]]
Accuracy: 0.7828282828282829
Precision: 0.8291814946619217
Recall: 0.7420382165605095
F1 Score: 0.7831932773109243

