In [2]:

import numpy as np
import pandas as pd

In [3]:
np.random.seed(42)

In [6]:
n=500
df = pd.DataFrame({
    "Age": np.random.randint(18, 30, size=n),   
    "CGPA": np.round(np.random.uniform(0, 100, size=n),2),
    "Internship_count": np.random.randint(0, 4, n),
    "Technical_skills": np.random.randint(50, 100, n),
    "Attendance": np.random.randint(60, 100, n),
})

In [8]:
df["Placed"] = np.where(
    (df["CGPA"] >= 7.5) & (df["Technical_skills"] >= 75), "Yes", "No"
)

In [9]:
df.shape

(500, 6)

In [13]:
#Bootstrap Resampling[+200}
bootstrap=df.sample(n=200, replace=True, random_state=42)
df_aug=pd.concat([df,bootstrap], ignore_index=True)

In [21]:
df_aug

Unnamed: 0,Age,CGPA,Internship_count,Technical_skills,Attendance,Placed
0,20,79.86,2,61,98,No
1,22,96.78,2,75,75,Yes
2,29,85.69,0,68,92,No
3,18,11.76,2,94,80,Yes
4,24,73.97,1,56,68,No
...,...,...,...,...,...,...
695,29,83.95,0,65,66,No
696,24,47.65,2,95,70,Yes
697,18,9.23,0,63,87,No
698,20,72.17,0,91,65,Yes


In [22]:
#Noise Injection
noise=df.sample(n=150, random_state=42).copy()
noise['CGPA'] += np.random.normal(0, 0.1,150)
noise['Technical_skills'] += np.random.normal(-3, 4,150)
noise['Attendance'] += np.random.normal(0, 2,150)

#Logical Constraints
noise['CGPA'] = noise['CGPA'].clip(6.0,9.5)
noise['Technical_skills'] = noise['Technical_skills'].clip(55,100)
noise['Attendance'] = noise['Attendance'].clip(65,100)
df_aug=pd.concat([df_aug,noise], ignore_index=True)
df_aug

Unnamed: 0,Age,CGPA,Internship_count,Technical_skills,Attendance,Placed
0,20,79.86,2,61.000000,98.000000,No
1,22,96.78,2,75.000000,75.000000,Yes
2,29,85.69,0,68.000000,92.000000,No
3,18,11.76,2,94.000000,80.000000,Yes
4,24,73.97,1,56.000000,68.000000,No
...,...,...,...,...,...,...
845,23,9.50,3,91.751660,80.875177,Yes
846,28,9.50,1,61.116991,90.868971,No
847,20,9.50,0,56.649790,67.655722,No
848,27,9.50,2,55.000000,69.819252,No


In [15]:
from imblearn.over_sampling import SMOTE

In [17]:
x=df_aug.drop("Placed", axis=1)
y=df_aug["Placed"]

In [18]:
x

Unnamed: 0,Age,CGPA,Internship_count,Technical_skills,Attendance
0,20,79.86,2,61,98
1,22,96.78,2,75,75
2,29,85.69,0,68,92
3,18,11.76,2,94,80
4,24,73.97,1,56,68
...,...,...,...,...,...
695,29,83.95,0,65,66
696,24,47.65,2,95,70
697,18,9.23,0,63,87
698,20,72.17,0,91,65


In [19]:
y

0       No
1      Yes
2       No
3      Yes
4       No
      ... 
695     No
696    Yes
697     No
698    Yes
699    Yes
Name: Placed, Length: 700, dtype: object

In [20]:
smote=SMOTE(sampling_strategy='0.8', random_state=42)