### Import Required Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, LabelEncoder

### Load sample dataset

In [2]:
data = {
    "Student_ID": range(1, 11),
    "Study_Hours": [15, 5, 8, 12, np.nan, 10, 20, 7, 6, 14],
    "Attendance": [90, 70, 60, 80, 75, np.nan, 95, 65, 55, 85],
    "Previous_Grade": [85, 65, 72, 78, 70, 68, 90, np.nan, 60, 82],
    "Parental_Education": ["Graduate", "High School", "Graduate", "Postgraduate", 
                            "Graduate", "High School", "Postgraduate", 
                            "Graduate", "High School", "Postgraduate"],
    "Extracurricular": ["Yes", "No", "Yes", "No", "Yes", "No", "Yes", "No", "Yes", "No"],
    "Final_Grade": [88, 60, 68, 82, 74, 66, 92, 64, 58, 86]
}

df = pd.DataFrame(data)

print("Original Data:\n", df)

Original Data:
    Student_ID  Study_Hours  Attendance  Previous_Grade Parental_Education  \
0           1         15.0        90.0            85.0           Graduate   
1           2          5.0        70.0            65.0        High School   
2           3          8.0        60.0            72.0           Graduate   
3           4         12.0        80.0            78.0       Postgraduate   
4           5          NaN        75.0            70.0           Graduate   
5           6         10.0         NaN            68.0        High School   
6           7         20.0        95.0            90.0       Postgraduate   
7           8          7.0        65.0             NaN           Graduate   
8           9          6.0        55.0            60.0        High School   
9          10         14.0        85.0            82.0       Postgraduate   

  Extracurricular  Final_Grade  
0             Yes           88  
1              No           60  
2             Yes           68  
3   

### Handle Missing Values

In [6]:
# Replace missing values with mean 
imputer = SimpleImputer(strategy='mean') 
df[['Study_Hours', 'Attendance', 'Previous_Grade']] = pd.DataFrame(imputer.fit_transform(df[['Study_Hours', 'Attendance', 'Previous_Grade']]))
print("\nDataset after Handling Missing Values:\n", df)


Dataset after Handling Missing Values:
    Student_ID  Study_Hours  Attendance  Previous_Grade Parental_Education  \
0           1    15.000000        90.0       85.000000           Graduate   
1           2     5.000000        70.0       65.000000        High School   
2           3     8.000000        60.0       72.000000           Graduate   
3           4    12.000000        80.0       78.000000       Postgraduate   
4           5    10.777778        75.0       70.000000           Graduate   
5           6    10.000000        75.0       68.000000        High School   
6           7    20.000000        95.0       90.000000       Postgraduate   
7           8     7.000000        65.0       74.444444           Graduate   
8           9     6.000000        55.0       60.000000        High School   
9          10    14.000000        85.0       82.000000       Postgraduate   

  Extracurricular  Final_Grade  
0             Yes           88  
1              No           60  
2           

### Encode Categorical Features

In [7]:
le = LabelEncoder()
df["Parental_Education"] = le.fit_transform(df["Parental_Education"])
df["Extracurricular"] = df["Extracurricular"].map({"Yes": 1, "No": 0})

### Train-Test Split

In [9]:
X = df.drop(columns=["Student_ID", "Final_Grade"])  # features
y = df["Final_Grade"]  # target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("\nProcessed Features (X):\n", X_train[:5])
print("\nTarget (y):\n", y_train[:5])


Processed Features (X):
    Study_Hours  Attendance  Previous_Grade  Parental_Education  \
5         10.0        75.0       68.000000                   1   
0         15.0        90.0       85.000000                   0   
7          7.0        65.0       74.444444                   0   
2          8.0        60.0       72.000000                   0   
9         14.0        85.0       82.000000                   2   

   Extracurricular  
5                0  
0                1  
7                0  
2                1  
9                0  

Target (y):
 5    66
0    88
7    64
2    68
9    86
Name: Final_Grade, dtype: int64


### Feature Scaling

In [10]:
scaler_standard = StandardScaler() 
X_train_standardized = scaler_standard.fit_transform(X_train) 
X_test_standardized = scaler_standard.transform(X_test) 
 
print("\nStandardized Data (First 3 rows):\n", X_train_standardized[:3]) 


Standardized Data (First 3 rows):
 [[-0.53394906 -0.27994626 -1.30301816  0.13483997 -1.        ]
 [ 0.73904207  1.06379577  1.04586877 -0.94387981  1.        ]
 [-1.29774373 -1.17577427 -0.41259043 -0.94387981 -1.        ]]
