In [1]:
import pandas as pd
import random

random.seed(42)

data = []

branches = ["CS", "IT", "ENTC", "MECH", "CIVIL"]
skills = ["Low", "Medium", "High"]
internship = ["Yes", "No"]

for i in range(1000):
    age = random.randint(20, 26)
    cgpa = round(random.uniform(5.0, 9.8), 2)
    branch = random.choice(branches)
    skill = random.choice(skills)
    intern = random.choice(internship)
    projects = random.randint(0, 5)

    # logic for placement
    score = 0
    if cgpa > 7.5: score += 1
    if skill == "High": score += 1
    if intern == "Yes": score += 1
    if projects >= 2: score += 1
    if branch in ["CS", "IT"]: score += 1

    placed = 1 if score >= 3 else 0

    data.append([age, cgpa, branch, skill, intern, projects, placed])

df = pd.DataFrame(data, columns=[
    "age", "cgpa", "branch", "skill", "internship", "projects", "placed"
])

df.to_csv("placement.csv", index=False)
df.head()


Unnamed: 0,age,cgpa,branch,skill,internship,projects,placed
0,25,5.53,ENTC,Low,Yes,1,0
1,25,5.49,CIVIL,Low,No,0,0
2,20,5.45,IT,High,Yes,4,1
3,21,8.44,CIVIL,Medium,Yes,3,1
4,24,6.34,CS,Low,No,2,0


In [2]:
df

Unnamed: 0,age,cgpa,branch,skill,internship,projects,placed
0,25,5.53,ENTC,Low,Yes,1,0
1,25,5.49,CIVIL,Low,No,0,0
2,20,5.45,IT,High,Yes,4,1
3,21,8.44,CIVIL,Medium,Yes,3,1
4,24,6.34,CS,Low,No,2,0
...,...,...,...,...,...,...,...
995,24,9.79,CIVIL,Low,Yes,0,0
996,22,8.73,ENTC,High,Yes,0,1
997,25,6.99,ENTC,Medium,Yes,2,0
998,22,7.85,CS,Medium,Yes,2,1


In [3]:
df = pd.read_csv("placement.csv")
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   age         1000 non-null   int64  
 1   cgpa        1000 non-null   float64
 2   branch      1000 non-null   object 
 3   skill       1000 non-null   object 
 4   internship  1000 non-null   object 
 5   projects    1000 non-null   int64  
 6   placed      1000 non-null   int64  
dtypes: float64(1), int64(3), object(3)
memory usage: 54.8+ KB


In [4]:
X = df.drop("placed", axis=1)
y = df["placed"]


In [5]:
#CatvsNum

cat_cols = ["branch", "skill", "internship"]
num_cols = ["age", "cgpa", "projects"]


In [6]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

preprocessor = ColumnTransformer([
    ("cat", OneHotEncoder(drop="first"), cat_cols),
    ("num", "passthrough", num_cols)
])


In [7]:
#splitting the dataset

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [8]:
#pipeline

from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC

pipe = Pipeline([
    ("preprocess", preprocessor),
    ("scaler", StandardScaler()),
    ("svm", SVC())
])


In [9]:
pipe.fit(X_train, y_train)
pipe.score(X_test, y_test)


0.915

# *Step 10 – Hyperparameter grid*

In [10]:
param_grid = {
    "svm__kernel": ["linear", "rbf"],
    "svm__C": [0.1, 1, 10, 50],
    "svm__gamma": [0.01, 0.1, 1]
}


In [12]:
from sklearn.model_selection import GridSearchCV

grid = GridSearchCV(
    pipe,
    param_grid,
    cv=5,
    scoring="accuracy",
    n_jobs=-1
)

grid.fit(X_train, y_train)


In [13]:
grid.best_params_


{'svm__C': 50, 'svm__gamma': 0.1, 'svm__kernel': 'rbf'}

In [14]:
best_model = grid.best_estimator_
best_model.score(X_test, y_test)


0.925

In [15]:
from sklearn.metrics import classification_report, confusion_matrix

y_pred = best_model.predict(X_test)

confusion_matrix(y_test, y_pred)
classification_report(y_test, y_pred)


'              precision    recall  f1-score   support\n\n           0       0.94      0.91      0.92        98\n           1       0.91      0.94      0.93       102\n\n    accuracy                           0.93       200\n   macro avg       0.93      0.92      0.92       200\nweighted avg       0.93      0.93      0.92       200\n'

In [16]:
confusion_matrix(y_test, y_pred)
classification_report(y_test, y_pred)


'              precision    recall  f1-score   support\n\n           0       0.94      0.91      0.92        98\n           1       0.91      0.94      0.93       102\n\n    accuracy                           0.93       200\n   macro avg       0.93      0.92      0.92       200\nweighted avg       0.93      0.93      0.92       200\n'