In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [2]:

# Load the training dataset
data = pd.read_csv('train.csv')

In [3]:
data.head(4)

Unnamed: 0,sl_no,gender,ssc_p,ssc_b,hsc_p,hsc_b,hsc_s,degree_p,degree_t,workex,etest_p,specialisation,mba_p,status,salary
0,1,0,67.0,Others,91.0,Others,Commerce,58.0,Sci&Tech,No,55.0,Mkt&HR,58.8,Placed,270000.0
1,2,0,79.33,Central,78.33,Others,Science,77.48,Sci&Tech,Yes,86.5,Mkt&Fin,66.28,Placed,200000.0
2,3,0,65.0,Central,68.0,Central,Arts,64.0,Comm&Mgmt,No,75.0,Mkt&Fin,57.8,Placed,250000.0
3,4,0,56.0,Central,52.0,Central,Science,52.0,Sci&Tech,No,66.0,Mkt&HR,59.43,Not Placed,


In [5]:
data.shape

(215, 15)

In [6]:
# Drop irrelevant columns
data.drop(columns=['sl_no', 'salary'], inplace=True)

In [7]:
# Encode categorical variables
le = LabelEncoder()
categorical_cols = ['ssc_b', 'hsc_b', 'hsc_s', 'degree_t', 'specialisation', 'workex', 'status']
for col in categorical_cols:
    data[col] = le.fit_transform(data[col])


In [8]:
# Define features and target
X = data.drop(columns=['status'])
y = data['status']


In [9]:
# Scale numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [10]:
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)


In [11]:
# Initialize classifiers
log_reg = LogisticRegression()
decision_tree = DecisionTreeClassifier()
random_forest = RandomForestClassifier()
svm = SVC(probability=True)
k_nn = KNeighborsClassifier()

In [12]:
# Train models
log_reg.fit(X_train, y_train)
decision_tree.fit(X_train, y_train)
random_forest.fit(X_train, y_train)
svm.fit(X_train, y_train)
k_nn.fit(X_train, y_train)


In [13]:
# Evaluate models
def evaluate_model(model, name):
    y_pred = model.predict(X_test)
    print(f'\n{name} Performance:')
    print('Accuracy:', accuracy_score(y_test, y_pred))
    print(classification_report(y_test, y_pred))
    print(confusion_matrix(y_test, y_pred))

evaluate_model(log_reg, 'Logistic Regression')
evaluate_model(decision_tree, 'Decision Tree')
evaluate_model(random_forest, 'Random Forest')
evaluate_model(svm, 'SVM')
evaluate_model(k_nn, 'K-Nearest Neighbors')


Logistic Regression Performance:
Accuracy: 0.8307692307692308
              precision    recall  f1-score   support

           0       0.78      0.67      0.72        21
           1       0.85      0.91      0.88        44

    accuracy                           0.83        65
   macro avg       0.81      0.79      0.80        65
weighted avg       0.83      0.83      0.83        65

[[14  7]
 [ 4 40]]

Decision Tree Performance:
Accuracy: 0.8
              precision    recall  f1-score   support

           0       0.68      0.71      0.70        21
           1       0.86      0.84      0.85        44

    accuracy                           0.80        65
   macro avg       0.77      0.78      0.77        65
weighted avg       0.80      0.80      0.80        65

[[15  6]
 [ 7 37]]

Random Forest Performance:
Accuracy: 0.8
              precision    recall  f1-score   support

           0       0.83      0.48      0.61        21
           1       0.79      0.95      0.87        4

In [14]:
# Ensemble Model - Voting Classifier
voting_clf = VotingClassifier(estimators=[
    ('log_reg', log_reg),
    ('decision_tree', decision_tree),
    ('random_forest', random_forest),
    ('svm', svm),
    ('k_nn', k_nn)
], voting='hard')



voting_clf.fit(X_train, y_train)
evaluate_model(voting_clf, 'Voting Classifier')




Voting Classifier Performance:
Accuracy: 0.7846153846153846
              precision    recall  f1-score   support

           0       0.77      0.48      0.59        21
           1       0.79      0.93      0.85        44

    accuracy                           0.78        65
   macro avg       0.78      0.70      0.72        65
weighted avg       0.78      0.78      0.77        65

[[10 11]
 [ 3 41]]
