In [1]:
pip install numpy pandas scikit-learn matplotlib seaborn

Collecting numpy
  Using cached numpy-2.2.6-cp310-cp310-macosx_14_0_arm64.whl.metadata (62 kB)
Collecting pandas
  Downloading pandas-2.3.3-cp310-cp310-macosx_11_0_arm64.whl.metadata (91 kB)
Collecting scikit-learn
  Downloading scikit_learn-1.7.2-cp310-cp310-macosx_12_0_arm64.whl.metadata (11 kB)
Collecting matplotlib
  Downloading matplotlib-3.10.7-cp310-cp310-macosx_11_0_arm64.whl.metadata (11 kB)
Collecting seaborn
  Using cached seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Collecting pytz>=2020.1 (from pandas)
  Using cached pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Using cached tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting scipy>=1.8.0 (from scikit-learn)
  Using cached scipy-1.15.3-cp310-cp310-macosx_14_0_arm64.whl.metadata (61 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Using cached joblib-1.5.2-py3-none-any.whl.metadata (5.6 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Using cached thr

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report


In [5]:
data = pd.read_csv("placement_data_30.csv")

In [7]:
# 2.1 Drop duplicates
data = data.drop_duplicates()

In [9]:
# 2.2 Handle missing values — example: numeric fill with mean, categorical with mode
for col in data.columns:
    if data[col].dtype in [np.float64, np.int64]:
        # numeric column: fill missing with mean
        data[col] = data[col].fillna(data[col].mean())
    else:
        # categorical column: fill missing with mode (first mode)
        mode_val = data[col].mode()[0]
        data[col] = data[col].fillna(mode_val)

In [10]:
le = LabelEncoder()
data['Placed'] = le.fit_transform(data['Placed'])  # e.g., Yes=1, No=0

In [12]:
feature_cols = ['Tenth_Percent', 'Twelfth_Percent', 'FE_Percent', 'SE_Percent', 'TE_Percent', 
                'Certifications', 'Projects_Completed', 'Internships']

X = data[feature_cols]
y = data['Placed']


In [13]:
Q1 = X.quantile(0.25)
Q3 = X.quantile(0.75)
IQR = Q3 - Q1
filter_mask = ~((X < (Q1 - 1.5 * IQR)) | (X > (Q3 + 1.5 * IQR))).any(axis=1)
X = X.loc[filter_mask]
y = y.loc[filter_mask]

In [14]:
# 6.1 Normalization (Min–Max scaling example)
mm_scaler = MinMaxScaler()
X_norm = mm_scaler.fit_transform(X)


In [15]:
# 6.2 Standardization
std_scaler = StandardScaler()
X_std = std_scaler.fit_transform(X_norm)

In [16]:
# For model building use X_std
X_final = X_std

In [17]:
X_train, X_test, y_train, y_test = train_test_split(
    X_final, y, test_size=0.2, random_state=42, stratify=y
)

In [18]:
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)


0,1,2
,criterion,'gini'
,splitter,'best'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,42
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [19]:

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 1.0

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         5

    accuracy                           1.00         7
   macro avg       1.00      1.00      1.00         7
weighted avg       1.00      1.00      1.00         7



In [20]:
# 10. Predict for a new sample (2024-25 batch)
sample = pd.DataFrame({
    'Tenth_Percent': [84],
    'Twelfth_Percent': [80],
    'FE_Percent': [75],
    'SE_Percent': [78],
    'TE_Percent': [77],
    'Certifications': [3],
    'Projects_Completed': [2],
    'Internships': [1]
})

In [21]:
# Apply same preprocessing
sample_norm = mm_scaler.transform(sample)
sample_std = std_scaler.transform(sample_norm)

In [22]:
prediction = model.predict(sample_std)
print("\nPrediction for sample case:", "Placed" if prediction[0] == 1 else "Not Placed")



Prediction for sample case: Placed
