### Student Placement Prediction using Logistic Regression

#### Import Library

In [1]:
import pandas as pd
import numpy as np

#### Import DataFrame

In [3]:
df=pd.read_csv(r"https://github.com/YBI-Foundation/Dataset/raw/main/Placement.csv")

In [4]:
df.head()

Unnamed: 0,Student_ID,CGPA,IQ,Placement
0,1,6.8,123,1
1,2,5.9,106,0
2,3,5.3,121,0
3,4,7.4,132,1
4,5,5.8,142,0


#### Get Information of Dataframe

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Student_ID  100 non-null    int64  
 1   CGPA        100 non-null    float64
 2   IQ          100 non-null    int64  
 3   Placement   100 non-null    int64  
dtypes: float64(1), int64(3)
memory usage: 3.2 KB


#### Get Summary Statistics

In [6]:
df.describe()

Unnamed: 0,Student_ID,CGPA,IQ,Placement
count,100.0,100.0,100.0,100.0
mean,50.5,5.991,123.58,0.5
std,29.011492,1.143634,39.944198,0.502519
min,1.0,3.3,37.0,0.0
25%,25.75,5.075,101.5,0.0
50%,50.5,6.0,127.5,0.5
75%,75.25,6.9,149.0,1.0
max,100.0,8.5,233.0,1.0


#### Get Column Names

In [8]:
df.columns

Index(['Student_ID', 'CGPA', 'IQ', 'Placement'], dtype='object')

#### Get Shape of DataFrame

In [9]:
df.shape

(100, 4)

#### Get Unique Values in y variable

In [10]:
df['Placement'].value_counts()

0    50
1    50
Name: Placement, dtype: int64

In [11]:
df.groupby('Placement').mean()

Unnamed: 0_level_0,Student_ID,CGPA,IQ
Placement,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,46.32,5.056,126.88
1,54.68,6.926,120.28


#### Define y and X

In [12]:
y=df['Placement']

In [13]:
y.shape

(100,)

In [14]:
y

0     1
1     0
2     0
3     1
4     0
     ..
95    0
96    0
97    1
98    1
99    1
Name: Placement, Length: 100, dtype: int64

In [15]:
X=df[['CGPA','IQ']]

In [16]:
X.shape

(100, 2)

In [17]:
X

Unnamed: 0,CGPA,IQ
0,6.8,123
1,5.9,106
2,5.3,121
3,7.4,132
4,5.8,142
...,...,...
95,4.3,200
96,4.4,42
97,6.7,182
98,6.3,103


#### Get X Standardized

In [18]:
from sklearn.preprocessing import StandardScaler

In [19]:
ss=StandardScaler()

In [20]:
X=ss.fit_transform(X)

In [21]:
X

array([[ 0.71095807, -0.01459341],
       [-0.0799718 , -0.44233119],
       [-0.60725838, -0.0649155 ],
       [ 1.23824465,  0.21185601],
       [-0.16785289,  0.46346647],
       [ 0.97460136, -1.90167186],
       [-0.25573399,  0.48862751],
       [-0.87090167, -1.52425617],
       [ 0.0957904 ,  0.81572111],
       [-0.78302057, -1.44877303],
       [ 0.0079093 , -1.977155  ],
       [ 0.79883917,  0.36282228],
       [-0.51937728,  0.38798333],
       [ 0.35943369, -0.19072073],
       [ 0.0957904 , -0.51781433],
       [-0.78302057,  1.31894203],
       [-0.69513948,  2.52667224],
       [-2.36488031,  1.49506935],
       [-1.74971264, -0.59329747],
       [-0.69513948,  0.21185601],
       [ 0.53519588, -0.09007654],
       [ 0.97460136,  0.68991588],
       [-0.95878277, -0.09007654],
       [-1.13454496, -0.92039106],
       [-1.13454496, -0.0649155 ],
       [-0.87090167, -0.81974688],
       [ 0.88672027,  1.89764609],
       [ 0.0079093 ,  0.01056764],
       [-0.69513948,

In [22]:
X.shape

(100, 2)

#### Get Train Test Split

In [23]:
from sklearn.model_selection import train_test_split

In [24]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,stratify=y,random_state=192529)

In [25]:
X_train.shape,X_test.shape,y_train.shape,y_test.shape

((70, 2), (30, 2), (70,), (30,))

#### Get Model Train

In [26]:
from sklearn.linear_model import LogisticRegression

In [27]:
lr=LogisticRegression()

In [28]:
lr.fit(X_train,y_train)

#### Get Model Prediction

In [29]:
y_pred=lr.predict(X_test)

In [30]:
y_pred.shape

(30,)

In [31]:
y_pred

array([1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1,
       0, 1, 1, 1, 0, 1, 1, 0], dtype=int64)

#### Get Probability of each predicted class

In [32]:
lr.predict_proba(X_test)

array([[0.17008701, 0.82991299],
       [0.95699717, 0.04300283],
       [0.88729211, 0.11270789],
       [0.9571089 , 0.0428911 ],
       [0.88701997, 0.11298003],
       [0.03192741, 0.96807259],
       [0.96648409, 0.03351591],
       [0.98442328, 0.01557672],
       [0.17022249, 0.82977751],
       [0.01493481, 0.98506519],
       [0.99570654, 0.00429346],
       [0.05287139, 0.94712861],
       [0.08585522, 0.91414478],
       [0.94476676, 0.05523324],
       [0.05259713, 0.94740287],
       [0.00891871, 0.99108129],
       [0.94502764, 0.05497236],
       [0.96654275, 0.03345725],
       [0.00411043, 0.99588957],
       [0.25648129, 0.74351871],
       [0.82362505, 0.17637495],
       [0.30931814, 0.69068186],
       [0.94493064, 0.05506936],
       [0.01922481, 0.98077519],
       [0.06749825, 0.93250175],
       [0.01932255, 0.98067745],
       [0.94496946, 0.05503054],
       [0.43023867, 0.56976133],
       [0.36734765, 0.63265235],
       [0.82346238, 0.17653762]])

#### Get Model Evaluation

In [33]:
from sklearn.metrics import classification_report,confusion_matrix

In [34]:
print(confusion_matrix(y_test,y_pred))

[[14  1]
 [ 0 15]]


In [35]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       1.00      0.93      0.97        15
           1       0.94      1.00      0.97        15

    accuracy                           0.97        30
   macro avg       0.97      0.97      0.97        30
weighted avg       0.97      0.97      0.97        30



#### Get Future Predictions

In [36]:
X_new=df.sample(1)

In [37]:
X_new

Unnamed: 0,Student_ID,CGPA,IQ,Placement
77,78,7.3,50,1


In [38]:
X_new.shape

(1, 4)

In [39]:
X_new=X_new.drop(['Student_ID','Placement'],axis=1)

In [40]:
X_new

Unnamed: 0,CGPA,IQ
77,7.3,50


In [41]:
X_new.shape

(1, 2)

In [42]:
X_new=ss.fit_transform(X_new)

In [43]:
y_pred_new=lr.predict(X_new)

In [44]:
y_pred_new

array([0], dtype=int64)

In [45]:
lr.predict_proba(X_new)

array([[0.50031929, 0.49968071]])