In [74]:
import numpy as np  
import pandas as pd  
import matplotlib.pyplot as plt  
import plotly.express as px  
from plotly.offline import init_notebook_mode, iplot  
init_notebook_mode(connected=True)  

from sklearn.preprocessing import StandardScaler  
from sklearn.decomposition import PCA  
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, RandomizedSearchCV  
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report 
import pickle  

from sklearn.ensemble import VotingClassifier, StackingClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.model_selection import GridSearchCV

In [75]:
dataSet = pd.read_csv('college_place.csv')

In [76]:
dataSet.shape

(2966, 8)

In [77]:
dataSet.head()

Unnamed: 0,Age,Gender,Stream,Internships,CGPA,Hostel,HistoryOfBacklogs,PlacedOrNot
0,22,Male,Electronics And Communication,1,8,1,1,1
1,21,Female,Computer Science,0,7,1,1,1
2,22,Female,Information Technology,1,6,0,0,1
3,21,Male,Information Technology,0,8,0,1,1
4,22,Male,Mechanical,0,8,1,0,1


In [78]:
dataSet.dtypes

Age                   int64
Gender               object
Stream               object
Internships           int64
CGPA                  int64
Hostel                int64
HistoryOfBacklogs     int64
PlacedOrNot           int64
dtype: object

In [None]:
dataSet.shape

(2966, 8)

In [80]:
dataSet['Gender']=dataSet['Gender'].map({'Male':1,'Female':0})



In [81]:
dataSet['Stream'] = dataSet['Stream'].map({'Electronics And Communication': 1,  
                                 'Computer Science': 2,  
                                'Information Technology': 3,  
                                'Mechanical':4,  
                                'Electrical':5,  
                                'Civil':6})  

In [82]:
X=dataSet.iloc[:,0:7]
Y=dataSet.iloc[:,-1]

In [92]:

X = np.array(X)  
Y = np.array(Y)  


data = np.column_stack((X, Y))  


np.random.shuffle(data)


split_ratio = 0.7
train_size = int(len(data) * split_ratio)


train_data = data[:train_size]
test_data = data[train_size:]

X_train = train_data[:, :-1]  
Y_train = train_data[:, -1]  

X_test = test_data[:, :-1]    
Y_test = test_data[:, -1]  

In [94]:
decisionTreeModel=DecisionTreeClassifier(random_state=0)
decisionTreeModel.fit(X_train,Y_train)


decisionTreeModel_prediction=decisionTreeModel.predict(X_test)

decisionTreeModel_accuracy=accuracy_score(Y_test,decisionTreeModel_prediction)

print(f"Decision Tree Accuracy: {decisionTreeModel_accuracy:.4f}")
print("Decision Tree Report:\n", classification_report(Y_test, decisionTreeModel_prediction))

Decision Tree Accuracy: 0.8708
Decision Tree Report:
               precision    recall  f1-score   support

           0       0.82      0.90      0.86       381
           1       0.92      0.85      0.88       509

    accuracy                           0.87       890
   macro avg       0.87      0.87      0.87       890
weighted avg       0.88      0.87      0.87       890



In [85]:
RandomForestModel=RandomForestClassifier(max_depth=10,random_state=0)
RandomForestModel.fit(X_train,Y_train)

RandomForestModel_prediction=RandomForestModel.predict(X_test)

RandomForestModel_accuracy=accuracy_score(Y_test,RandomForestModel_prediction)

print(f"Random Forest Accuracy: {RandomForestModel_accuracy:.4f}")
print("Random Forest Report:\n", classification_report(Y_test, RandomForestModel_prediction))

Random Forest Accuracy: 0.8685
Random Forest Report:
               precision    recall  f1-score   support

           0       0.78      0.96      0.86       379
           1       0.97      0.80      0.87       511

    accuracy                           0.87       890
   macro avg       0.87      0.88      0.87       890
weighted avg       0.89      0.87      0.87       890



In [95]:
ensemble_model=VotingClassifier(estimators=[
    ('decision_tree',decisionTreeModel),
    ('random_forest',RandomForestModel)],voting='hard')

ensemble_model.fit(X_train,Y_train)

ensemble_model_prediction=ensemble_model.predict(X_test)

ensemble_model_accuracy=accuracy_score(Y_test,ensemble_model_prediction)

print(f"Ensemble Model Accuracy: {ensemble_model_accuracy:.4f}")
print("Ensemble Model Report:\n", classification_report(Y_test, ensemble_model_prediction))

Ensemble Model Accuracy: 0.8820
Ensemble Model Report:
               precision    recall  f1-score   support

           0       0.80      0.97      0.88       381
           1       0.97      0.82      0.89       509

    accuracy                           0.88       890
   macro avg       0.89      0.89      0.88       890
weighted avg       0.90      0.88      0.88       890



In [96]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report

dt_param_grid = {
    'max_depth': [5, 10, 20, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

rf_param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [5, 10, 20, None],
    'min_samples_split': [2, 5, 10]
}

dt = DecisionTreeClassifier(random_state=0)
rf = RandomForestClassifier(random_state=0)

dt_grid = GridSearchCV(dt, dt_param_grid, cv=5, n_jobs=-1, verbose=1)
rf_grid = GridSearchCV(rf, rf_param_grid, cv=5, n_jobs=-1, verbose=1)

dt_grid.fit(X_train, Y_train)
rf_grid.fit(X_train, Y_train)

best_dt = dt_grid.best_estimator_
best_rf = rf_grid.best_estimator_

voting_clf = VotingClassifier(estimators=[
    ('decision_tree', best_dt),
    ('random_forest', best_rf)
], voting='hard')

voting_clf.fit(X_train, Y_train)
y_pred_voting = voting_clf.predict(X_test)

accuracy_voting = accuracy_score(Y_test, y_pred_voting)
print(f"Voting Classifier (Tuned Decision Tree + Tuned RF) Accuracy: {accuracy_voting:.4f}")
print("Classification Report:\n", classification_report(Y_test, y_pred_voting))

print("Best Decision Tree Parameters:", dt_grid.best_params_)
print("Best Random Forest Parameters:", rf_grid.best_params_)


Fitting 5 folds for each of 36 candidates, totalling 180 fits
Fitting 5 folds for each of 36 candidates, totalling 180 fits
Voting Classifier (Tuned Decision Tree + Tuned RF) Accuracy: 0.8910
Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.99      0.89       381
           1       0.99      0.82      0.90       509

    accuracy                           0.89       890
   macro avg       0.90      0.90      0.89       890
weighted avg       0.91      0.89      0.89       890

Best Decision Tree Parameters: {'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 10}
Best Random Forest Parameters: {'max_depth': 10, 'min_samples_split': 2, 'n_estimators': 300}
