In [1]:
import pandas as pd
import numpy as np

# For reproducibility of random numbers
np.random.seed(42)

# Number of students
n = 100

# Creating a dictionary with synthetic student data
data = pd.read_csv("placement_student_data.csv")

# Converting dictionary into a DataFrame
df = pd.DataFrame(data)


In [3]:
from sklearn.impute import SimpleImputer
import numpy as np

# Introduce missing values manually for testing
df.loc[5:10, 'TE SGPA'] = np.nan
df.loc[15:18, 'SE SGPA'] = np.nan

# Impute missing SGPA values using mean
imputer = SimpleImputer(strategy='mean')
df[['FE SGPA', 'SE SGPA', 'TE SGPA']] = imputer.fit_transform(df[['FE SGPA', 'SE SGPA', 'TE SGPA']])


In [5]:
# Add a duplicate row manually (copy of first student)
df = pd.concat([df, df.iloc[[0]]], ignore_index=True)

# Remove duplicate rows
df = df.drop_duplicates()


In [7]:
# Remove outliers using IQR method for FE SGPA, SE SGPA, and TE SGPA
for col in ['FE SGPA', 'SE SGPA', 'TE SGPA']:
    Q1 = df[col].quantile(0.25)
    Q3 = df[col].quantile(0.75)
    IQR = Q3 - Q1
    df = df[(df[col] >= (Q1 - 1.5 * IQR)) & (df[col] <= (Q3 + 1.5 * IQR))]


In [9]:
from sklearn.preprocessing import StandardScaler

# Define correct feature columns for scaling
features = ['10th %', '12th %', 'FE SGPA', 'SE SGPA', 'TE SGPA',
            'Certifications', 'Projects', 'Internships']

# Create a copy of the DataFrame
df_scaled = df.copy()

# Apply Standard Scaling (mean = 0, std = 1)
scaler = StandardScaler()
df_scaled[features] = scaler.fit_transform(df_scaled[features])


In [11]:
from sklearn.model_selection import train_test_split

# Features and target
features = ['10th %', '12th %', 'FE SGPA', 'SE SGPA', 'TE SGPA',
            'Certifications', 'Projects', 'Internships']
X = df_scaled[features]
y = df_scaled['Placed']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)


In [13]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
print("âœ… Accuracy:", accuracy_score(y_test, y_pred))
print("\nâœ… Classification Report:\n", classification_report(y_test, y_pred))

# Save model
joblib.dump(model, "placement_model.pkl")
print("âœ… Model saved as placement_model.pkl")


âœ… Accuracy: 0.5789473684210527

âœ… Classification Report:
               precision    recall  f1-score   support

           0       0.53      0.59      0.56        17
           1       0.63      0.57      0.60        21

    accuracy                           0.58        38
   macro avg       0.58      0.58      0.58        38
weighted avg       0.58      0.58      0.58        38

âœ… Model saved as placement_model.pkl


In [15]:
from sklearn.linear_model import LogisticRegression

# Initialize with solver and iterations increased
model = LogisticRegression(solver='liblinear', max_iter=1000)

# Train the model
model.fit(X_train, y_train)
# Predictions
y_pred = model.predict(X_test)

# Evaluation
from sklearn.metrics import accuracy_score, classification_report

print("âœ… Accuracy:", accuracy_score(y_test, y_pred))
print("\nâœ… Classification Report:\n", classification_report(y_test, y_pred))


âœ… Accuracy: 0.5263157894736842

âœ… Classification Report:
               precision    recall  f1-score   support

           0       0.47      0.47      0.47        17
           1       0.57      0.57      0.57        21

    accuracy                           0.53        38
   macro avg       0.52      0.52      0.52        38
weighted avg       0.53      0.53      0.53        38



In [17]:
import joblib
joblib.dump(model, "logistic_model.pkl")
print("âœ… Logistic Regression model saved as logistic_model.pkl")


âœ… Logistic Regression model saved as logistic_model.pkl


In [19]:
# Sample student (e.g., from 2024â€“25 batch)
sample = pd.DataFrame([{
    '10th %': 85,
    '12th %': 82,
    'FE SGPA': 8.1,
    'SE SGPA': 8.3,
    'TE SGPA': 8.0,
    'Certifications': 2,
    'Projects': 3,
    'Internships': 1
}])

# Scale the sample using the same scaler
sample_scaled = scaler.transform(sample)

# Predict
prediction = model.predict(sample_scaled)
print("ðŸŽ“ Placement Prediction:", "Placed" if prediction[0] == 1 else "Not Placed")


ðŸŽ“ Placement Prediction: Placed


