# **TASK 3: Student Performance Prediction (Classification Task)**

# Import libraries

> Add blockquote



In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

# 2. Load and Prepare the Dataset
We will handle categorical encoding if a "Pass/Fail" string exists instead of numbers.

In [None]:
# Load the dataset
try:
    df = pd.read_csv('pass_fail.csv')
except FileNotFoundError:
    print("Error: 'student_record.csv' not found.")

# Basic Cleaning: Drop ID columns if they exist
if 'Self_ID' in df.columns:
    df = df.drop(columns=['Self_ID'])

# Encode Target if it's text (e.g., 'Pass'/'Fail' to 1/0)
le = LabelEncoder()
if df['pass'].dtype == 'object':
    df['pass'] = le.fit_transform(df['pass'])

print("Dataset Preview:")
print(df.head())

Dataset Preview:
   student_id  attendance_pct  homework_pct  midterm_score  \
0           1              95            92             88   
1           2              88            85             79   
2           3              60            55             58   
3           4              72            70             65   
4           5              40            45             50   

   study_hours_per_week  pass  
0                    12     1  
1                    10     1  
2                     4     0  
3                     6     1  
4                     3     0  


# 3. Feature Selection and Scaling



In [None]:
# Features: Attendance, Homework, Midterm Score, Study Hours
X = df.drop(columns=['pass'])
y = df['pass']

# KNN is distance-based, so scaling features is crucial for accuracy
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split: 80% Training, 20% Testing
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# 4. Train Model (KNN)

In [None]:
# Initialize KNN (using 5 neighbors as a default)
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# Make predictions
y_pred = knn.predict(X_test)

# 5. Evaluate the Results
We use Mean Absolute Error (MAE) to see how many "currency units" our prediction is off on average.

In [None]:
# Calculate Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

# Sample comparison
sample_results = pd.DataFrame({
    'Actual Status': y_test.values[:5],
    'Predicted Status': y_pred[:5]
})
print("\nSample Predictions (1=Pass, 0=Fail):")
print(sample_results)

Model Accuracy: 100.00%

Sample Predictions (1=Pass, 0=Fail):
   Actual Status  Predicted Status
0              1                 1
1              1                 1
2              0                 0
3              0                 0
4              1                 1
