<a href="https://colab.research.google.com/github/SatyamKumarCS/ML_Models/blob/main/Placement_Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [175]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder,StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [176]:
feature_df=pd.read_csv('/content/indian_engineering_student_placement.csv')
target_df=pd.read_csv('/content/placement_targets.csv')
df = pd.merge(feature_df, target_df, on='Student_ID')
df.head()

Unnamed: 0,Student_ID,gender,branch,cgpa,tenth_percentage,twelfth_percentage,backlogs,study_hours_per_day,attendance_percentage,projects_completed,...,certifications_count,sleep_hours,stress_level,part_time_job,family_income_level,city_tier,internet_access,extracurricular_involvement,placement_status,salary_lpa
0,1,Male,ECE,8.74,74.0,75.0,0,3.8,71.1,7,...,5,6.5,8,Yes,Medium,Tier 2,Yes,Medium,Placed,14.95
1,2,Female,ECE,7.8,75.3,69.7,0,6.3,69.5,5,...,1,7.1,8,Yes,Medium,Tier 3,Yes,Low,Placed,14.91
2,3,Female,IT,6.95,62.8,68.3,0,1.5,62.5,8,...,3,6.1,2,No,Low,Tier 2,Yes,High,Placed,17.73
3,4,Male,ECE,7.46,57.9,51.4,1,4.7,64.6,6,...,2,7.3,7,No,Medium,Tier 1,Yes,Low,Placed,14.52
4,5,Male,IT,6.86,61.3,73.5,2,5.2,75.9,3,...,1,6.0,7,No,Medium,Tier 1,Yes,Medium,Placed,15.91


In [177]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 25 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   Student_ID                   5000 non-null   int64  
 1   gender                       5000 non-null   object 
 2   branch                       5000 non-null   object 
 3   cgpa                         5000 non-null   float64
 4   tenth_percentage             5000 non-null   float64
 5   twelfth_percentage           5000 non-null   float64
 6   backlogs                     5000 non-null   int64  
 7   study_hours_per_day          5000 non-null   float64
 8   attendance_percentage        5000 non-null   float64
 9   projects_completed           5000 non-null   int64  
 10  internships_completed        5000 non-null   int64  
 11  coding_skill_rating          5000 non-null   int64  
 12  communication_skill_rating   5000 non-null   int64  
 13  aptitude_skill_rat

In [178]:
df.drop('salary_lpa',axis=1,inplace=True)

In [179]:
df['extracurricular_involvement'].fillna(df['extracurricular_involvement'].mode()[0],inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['extracurricular_involvement'].fillna(df['extracurricular_involvement'].mode()[0],inplace=True)


In [180]:
df.drop('Student_ID',axis=1,inplace=True)

In [181]:
df.describe()


Unnamed: 0,cgpa,tenth_percentage,twelfth_percentage,backlogs,study_hours_per_day,attendance_percentage,projects_completed,internships_completed,coding_skill_rating,communication_skill_rating,aptitude_skill_rating,hackathons_participated,certifications_count,sleep_hours,stress_level
count,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0
mean,8.283798,74.5374,74.54306,0.3436,4.04112,72.04156,5.5294,2.1326,3.7286,3.0254,4.1164,3.7042,2.836,6.955,6.0202
std,1.008391,10.266752,10.244481,0.612547,1.967754,7.720029,2.053772,1.143713,1.278464,1.412358,0.716208,1.603252,1.781724,1.147881,2.851213
min,5.0,50.0,50.0,0.0,0.0,44.7,0.0,0.0,1.0,1.0,1.0,0.0,0.0,4.0,1.0
25%,7.61,67.4,67.6,0.0,2.7,66.6,4.0,1.0,3.0,2.0,4.0,3.0,2.0,6.2,4.0
50%,8.32,74.7,74.8,0.0,4.0,72.05,6.0,2.0,4.0,3.0,4.0,4.0,3.0,7.0,6.0
75%,9.02,82.0,81.7,1.0,5.4,77.2,7.0,3.0,5.0,4.0,5.0,5.0,4.0,7.8,9.0
max,10.0,100.0,100.0,5.0,10.0,99.2,8.0,4.0,5.0,5.0,5.0,6.0,9.0,9.0,10.0


In [182]:
cat_cols = df.select_dtypes(include="object").columns
df[cat_cols] = df[cat_cols].fillna("Unknown")
encoder = LabelEncoder()
for col in cat_cols:
    df[col] = encoder.fit_transform(df[col])

In [183]:
X = df.drop(["placement_status"], axis=1)
y = df["placement_status"]

In [184]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

In [185]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [186]:
model = Sequential()
model.add(Dense(128, activation="relu", input_shape=(X_train.shape[1],)))
model.add(Dense(64,activation="relu"))
model.add(Dense(32,activation="relu"))
model.add(Dense(1, activation="sigmoid"))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [187]:
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

In [188]:
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
pred = model.fit(X_train, y_train, epochs=100, verbose=1, validation_split=0.2, callbacks=[early_stopping])

Epoch 1/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.7863 - loss: 0.4446 - val_accuracy: 0.8875 - val_loss: 0.2830
Epoch 2/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8971 - loss: 0.2478 - val_accuracy: 0.8800 - val_loss: 0.2778
Epoch 3/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9031 - loss: 0.2430 - val_accuracy: 0.8788 - val_loss: 0.2797
Epoch 4/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9080 - loss: 0.2181 - val_accuracy: 0.8838 - val_loss: 0.2747
Epoch 5/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9071 - loss: 0.2174 - val_accuracy: 0.8763 - val_loss: 0.2895
Epoch 6/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9130 - loss: 0.2028 - val_accuracy: 0.8838 - val_loss: 0.2900
Epoch 7/100
[1m100/10

In [189]:
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)
accuracy = accuracy_score(y_test, y_pred)
print(f'Model Accuracy on Test Set: {accuracy*100:.2f}%')

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Model Accuracy on Test Set: 88.00%
