In [15]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [16]:
df = pd.read_csv("D:\\archive\\placementdata.csv")
df.head()

Unnamed: 0,StudentID,CGPA,Internships,Projects,Workshops/Certifications,AptitudeTestScore,SoftSkillsRating,ExtracurricularActivities,PlacementTraining,SSC_Marks,HSC_Marks,PlacementStatus
0,1,7.5,1,1,1,65,4.4,No,No,61,79,NotPlaced
1,2,8.9,0,3,2,90,4.0,Yes,Yes,78,82,Placed
2,3,7.3,1,2,2,82,4.8,Yes,No,79,80,NotPlaced
3,4,7.5,1,1,2,85,4.4,Yes,Yes,81,80,Placed
4,5,8.3,1,2,2,86,4.5,Yes,Yes,74,88,Placed


In [17]:
df['CGPA'].describe()

count    10000.000000
mean         7.698010
std          0.640131
min          6.500000
25%          7.400000
50%          7.700000
75%          8.200000
max          9.100000
Name: CGPA, dtype: float64

In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 12 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   StudentID                  10000 non-null  int64  
 1   CGPA                       10000 non-null  float64
 2   Internships                10000 non-null  int64  
 3   Projects                   10000 non-null  int64  
 4   Workshops/Certifications   10000 non-null  int64  
 5   AptitudeTestScore          10000 non-null  int64  
 6   SoftSkillsRating           10000 non-null  float64
 7   ExtracurricularActivities  10000 non-null  object 
 8   PlacementTraining          10000 non-null  object 
 9   SSC_Marks                  10000 non-null  int64  
 10  HSC_Marks                  10000 non-null  int64  
 11  PlacementStatus            10000 non-null  object 
dtypes: float64(2), int64(7), object(3)
memory usage: 937.6+ KB


In [19]:
df.isnull().sum()

StudentID                    0
CGPA                         0
Internships                  0
Projects                     0
Workshops/Certifications     0
AptitudeTestScore            0
SoftSkillsRating             0
ExtracurricularActivities    0
PlacementTraining            0
SSC_Marks                    0
HSC_Marks                    0
PlacementStatus              0
dtype: int64

In [20]:
df.drop("StudentID", axis=1, inplace=True)

In [21]:
df["ExtracurricularActivities"] = df["ExtracurricularActivities"].map({"Yes": 1, "No": 0})
df["PlacementTraining"] = df["PlacementTraining"].map({"Yes": 1, "No": 0})

df["PlacementStatus"] = df["PlacementStatus"].map({
    "Placed": 1,
    "NotPlaced": 0
})

In [9]:
print(df["PlacementStatus"].unique())  # must be [0 1]

In [22]:
X = df.drop("PlacementStatus", axis=1)
y = df["PlacementStatus"]

In [23]:
print("Shape of x_test:" ,x_test.shape)
print("Shape of y_test:" ,y_test.shape)

Shape of x_test: (2500, 11)
Shape of y_test: (2500,)


In [24]:
print("Shape of x_train:" ,x_train.shape)
print("Shape of y_train:" ,y_train.shape)

Shape of x_train: (7500, 11)
Shape of y_train: (7500,)


In [26]:
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.25, random_state=0, stratify=y)

In [27]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [28]:
model = LogisticRegression()
model.fit(X_train, y_train)

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,100


In [29]:
y_pred = model.predict(X_test)

In [30]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.7996
[[1197  254]
 [ 247  802]]
              precision    recall  f1-score   support

           0       0.83      0.82      0.83      1451
           1       0.76      0.76      0.76      1049

    accuracy                           0.80      2500
   macro avg       0.79      0.79      0.79      2500
weighted avg       0.80      0.80      0.80      2500



In [39]:
new_student_original_cgpa = 5.5   # change this value as needed

In [40]:
new_student = np.array([[  
    new_student_original_cgpa,  # CGPA
    78,    # SSC_Marks
    82,    # HSC_Marks
    1,     # Internships
    3,     # Projects
    2,     # Workshops/Certifications
    85,    # AptitudeTestScore
    4,     # SoftSkillsRating
    1,     # ExtracurricularActivities
    1      # PlacementTraining
]])

In [41]:
new_student_scaled = scaler.transform(new_student)

if new_student_original_cgpa < 6.0:
    print("Student is likely to be NOT PLACED (CGPA below cutoff)")
else:
    prediction = model.predict(new_student_scaled)
    if prediction[0] == 1:
        print("Student is likely to be PLACED")
    else:
        print("Student is likely to be NOT PLACED")


Student is likely to be NOT PLACED (CGPA below cutoff)


