In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [5]:
student=pd.read_csv('Cleaned_Dataset.csv')

In [6]:
student.head()

Unnamed: 0.1,Unnamed: 0,Age,Gender,Stream,Internships,CGPA,Hostel,HistoryOfBacklogs,PlacedOrNot
0,0,22,Male,Electronics And Communication,1,8.0,1,1,1
1,1,21,Female,Computer Science,0,7.0,1,1,1
2,2,22,Female,Information Technology,1,6.0,0,0,1
3,3,21,Male,Information Technology,0,8.0,0,1,1
4,4,22,Male,Mechanical,0,8.0,1,0,1


In [7]:
student.shape

(2499, 9)

In [8]:
student=student.drop(columns=['Unnamed: 0'])

In [9]:
student.shape


(2499, 8)

In [10]:
Y = student['PlacedOrNot']
X = student.drop(['PlacedOrNot'],axis=1)

In [11]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1)

In [12]:
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier  
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [13]:
ohe = OneHotEncoder()
ohe.fit(X[['Gender','Stream','Hostel','HistoryOfBacklogs']])

In [29]:
column_trans=make_column_transformer((OneHotEncoder(handle_unknown='ignore'),['Gender','Stream','Hostel','HistoryOfBacklogs']),
                                    remainder='passthrough')

In [30]:
classifier = RandomForestClassifier(n_estimators=500, max_depth=20, min_samples_split= 5, min_samples_leaf= 2, max_features='sqrt', class_weight='balanced', random_state=42) 


     

In [31]:
pipe=make_pipeline(column_trans,classifier)

In [32]:
pipe.fit(X_train,Y_train)

The format of the columns of the 'remainder' transformer in ColumnTransformer.transformers_ will change in version 1.7 to match the format of the other transformers.
At the moment the remainder columns are stored as indices (of type int). With the same ColumnTransformer configuration, in the future they will be stored as column names (of type str).



In [33]:
Y_pred=pipe.predict(X_test)

In [34]:
print("Accuracy: ",accuracy_score(Y_test,Y_pred))

Accuracy:  0.9


In [59]:
new_data = pd.DataFrame({
    'Age':21,
    'Gender': "Male",
    'Stream': "Civil",
    'Internships': 0,
    'CGPA': 6,
    'Hostel': "No",
    'HistoryOfBacklogs': "No",
},index=[0])


In [62]:
p= pipe.predict(new_data)
prob= pipe.predict_proba(new_data)[0][1]
if p==1:
    print('Placed')
    print(f"You are likely to be placed with a probability of {prob[0][1]:.2f}")
else:
    if prob>=0.5:
        print(f"There is still a fair chance of being placed (probability: {prob:.2f})")
    else:
        print(f"It is unlikely for you being placed (probability: {prob:.2f})")

It is unlikely for you being placed (probability: 0.05)


In [63]:
def suggest_adaptive_improvements(new_data, pipe, cgpa_col='CGPA', intern_col='Internships'):
    def get_prob(data):
        return pipe.predict_proba(data)[0][1]

    current_prob = round(get_prob(new_data), 2)
    current_cgpa = new_data[cgpa_col].values[0]
    current_intern = new_data[intern_col].values[0]

    print(f"Current placement probability: {current_prob:.2f}")

    # Define thresholds
    if current_prob < 0.5:
        target_prob = 0.5
        stage = "to get a fair chance"
    elif current_prob < 0.8:
        target_prob = 0.8
        stage = "to be surely placed"
    else:
        print("Already highly likely to be placed. Exploring how high it can go...")
        target_prob = min(current_prob + 0.1, 1.0)
        stage = "for further improvement"

    improved = False

    # Try CGPA increase only
    data_cgpa = new_data.copy()
    cgpa_val = current_cgpa
    while cgpa_val <= 10.0:
        data_cgpa[cgpa_col] = cgpa_val
        data_cgpa[intern_col] = current_intern
        prob = round(get_prob(data_cgpa), 2)
        if prob >= target_prob:
            print(f"- Increase CGPA to {cgpa_val:.2f} (Internship = {current_intern}) {stage} [prob: {prob:.2f}]")
            improved = True
            break
        cgpa_val = round(cgpa_val + 0.1, 2)

    # Try internship only (if not already done)
    if current_intern == 0:
        data_intern = new_data.copy()
        data_intern[intern_col] = 1
        data_intern[cgpa_col] = current_cgpa
        prob = round(get_prob(data_intern), 2)
        if prob >= target_prob:
            print(f"- Doing an internship alone {stage} (probability: {prob:.2f})")
            improved = True

    # Try both
    data_both = new_data.copy()
    data_both[intern_col] = 1
    cgpa_val = current_cgpa
    while cgpa_val <= 10.0:
        data_both[cgpa_col] = cgpa_val
        prob = round(get_prob(data_both), 2)
        if prob >= target_prob:
            print(f"- Internship + CGPA to {cgpa_val:.2f} {stage} [prob: {prob:.2f}]")
            improved = True
            break
        cgpa_val = round(cgpa_val + 0.1, 2)

    if not improved:
        print(f"Even with 10.0 CGPA and Internship, probability stays below {target_prob:.2f}.")

In [47]:
suggest_adaptive_improvements(new_data, pipe)

Current placement probability: 0.13
- Increase CGPA to 7.60 (Internship = 1) to get a fair chance [prob: 0.93]
- Internship + CGPA to 7.60 to get a fair chance [prob: 0.93]


In [56]:
X_temp=X.copy()
X_temp['PlacedOrNot']=Y
print(X_temp.groupby('Internships')['PlacedOrNot'].mean())

Internships
0    0.444144
1    0.513011
2    0.792332
Name: PlacedOrNot, dtype: float64


In [57]:
X_temp=X.copy()
X_temp['PlacedOrNot']=Y
print(X_temp.groupby('CGPA')['PlacedOrNot'].mean())

CGPA
5.0    0.037037
6.0    0.180556
7.0    0.271951
8.0    1.000000
9.0    1.000000
Name: PlacedOrNot, dtype: float64


In [58]:
X_temp=X.copy()
X_temp['PlacedOrNot']=Y
print(X_temp.groupby('Stream')['PlacedOrNot'].mean())

Stream
Civil                            0.428044
Computer Science                 0.535545
Electrical                       0.526132
Electronics And Communication    0.560773
Information Technology           0.557627
Mechanical                       0.435393
Name: PlacedOrNot, dtype: float64
