In [56]:
# Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [57]:
# Load dataset
df=pd.read_csv('/content/student_exam_scores.csv')
df['pass_fail']=(df['exam_score']>=40).astype(int)
df

Unnamed: 0,student_id,hours_studied,sleep_hours,attendance_percent,previous_scores,exam_score,pass_fail
0,S001,8.0,8.8,72.1,45,30.2,0
1,S002,1.3,8.6,60.7,55,25.0,0
2,S003,4.0,8.2,73.7,86,35.8,0
3,S004,3.5,4.8,95.1,66,34.0,0
4,S005,9.1,6.4,89.8,71,40.3,1
...,...,...,...,...,...,...,...
195,S196,10.5,5.4,94.0,87,42.7,1
196,S197,7.1,6.1,85.1,92,40.4,1
197,S198,1.6,6.9,63.8,76,28.2,0
198,S199,12.0,7.3,50.5,58,42.0,1


In [58]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 7 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   student_id          200 non-null    object 
 1   hours_studied       200 non-null    float64
 2   sleep_hours         200 non-null    float64
 3   attendance_percent  200 non-null    float64
 4   previous_scores     200 non-null    int64  
 5   exam_score          200 non-null    float64
 6   pass_fail           200 non-null    int64  
dtypes: float64(4), int64(2), object(1)
memory usage: 11.1+ KB


In [59]:
# Drop identifier column
df=df.drop('student_id',axis=1)

In [60]:
df

Unnamed: 0,hours_studied,sleep_hours,attendance_percent,previous_scores,exam_score,pass_fail
0,8.0,8.8,72.1,45,30.2,0
1,1.3,8.6,60.7,55,25.0,0
2,4.0,8.2,73.7,86,35.8,0
3,3.5,4.8,95.1,66,34.0,0
4,9.1,6.4,89.8,71,40.3,1
...,...,...,...,...,...,...
195,10.5,5.4,94.0,87,42.7,1
196,7.1,6.1,85.1,92,40.4,1
197,1.6,6.9,63.8,76,28.2,0
198,12.0,7.3,50.5,58,42.0,1


In [61]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   hours_studied       200 non-null    float64
 1   sleep_hours         200 non-null    float64
 2   attendance_percent  200 non-null    float64
 3   previous_scores     200 non-null    int64  
 4   exam_score          200 non-null    float64
 5   pass_fail           200 non-null    int64  
dtypes: float64(4), int64(2)
memory usage: 9.5 KB


In [62]:
# Checking null values
df.isnull().sum()

Unnamed: 0,0
hours_studied,0
sleep_hours,0
attendance_percent,0
previous_scores,0
exam_score,0
pass_fail,0


In [63]:
# Checking duplicates
df.duplicated().sum()

np.int64(0)

In [64]:
df["pass_fail"].value_counts()

Unnamed: 0_level_0,count
pass_fail,Unnamed: 1_level_1
0,162
1,38


In [65]:
# Separate features and target
X=df[['hours_studied', 'sleep_hours', 'attendance_percent', 'previous_scores']]
y=df['pass_fail']

In [66]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [67]:
# Scale features
scaler=StandardScaler()
X_train_scaled=scaler.fit_transform(X_train)
X_test_scaled=scaler.transform(X_test)

In [68]:
# Create and train Logistic Regression model
model=LogisticRegression()
model.fit(X_train_scaled, y_train)

In [69]:
# Predict on the test set
y_pred=model.predict(X_test_scaled)

In [70]:
# Evaluate on test set
accuracy_score(y_test,y_pred)

0.975

In [71]:
model.intercept_

array([-3.23034617])

In [72]:
model.coef_

array([[2.21907483, 0.94306699, 0.72303775, 1.35204898]])

In [73]:
# Predict for a new sample
new_data=pd.DataFrame({
    "hours_studied": [6],
    "sleep_hours": [7],
    "attendance_percent": [80],
    "previous_scores": [45]
})
new_data

Unnamed: 0,hours_studied,sleep_hours,attendance_percent,previous_scores
0,6,7,80,45


In [74]:
# scale new data using the same scaler
new_data_scaled=scaler.transform(new_data)
new_data_scaled

array([[-0.0743362 ,  0.26302583,  0.3866248 , -1.47867185]])

In [75]:
# Predict for the new sample
new_prediction=model.predict(new_data_scaled)
new_prediction

array([0])

In [76]:
# Interpret prediction
if new_prediction[0]==1:
    print("Student will PASS")
else:
    print("Student will FAIL")


Student will FAIL
