In [174]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.preprocessing import StandardScaler ,LabelEncoder

In [176]:
file_name = r"C:\Users\Administrator\Downloads\SLD2.csv"
df = pd.read_csv(file_name)

In [180]:
df.head()

Unnamed: 0,Study_Hours_Per_Day,Extracurricular_Hours_Per_Day,Sleep_Hours_Per_Day,Social_Hours_Per_Day,Physical_Activity_Hours_Per_Day,Stress_Level,GPA
0,6.9,3.8,8.7,2.8,1.8,Moderate,2.99
1,5.3,3.5,8.0,4.2,3.0,Low,2.75
2,5.1,3.9,9.2,1.2,4.6,Low,2.67
3,6.5,2.1,7.2,1.7,6.5,Moderate,2.88
4,8.1,0.6,6.5,2.2,6.6,High,3.51


In [182]:
df.shape

(2000, 7)

In [184]:
print(df.isnull().sum())

Study_Hours_Per_Day                 0
Extracurricular_Hours_Per_Day       0
Sleep_Hours_Per_Day                50
Social_Hours_Per_Day                0
Physical_Activity_Hours_Per_Day     0
Stress_Level                        0
GPA                                 0
dtype: int64


In [186]:
if 'Sleep_Hours_Per_Day' in df.columns:
    mean_sleep_hours = df['Sleep_Hours_Per_Day'].mean()
    df['Sleep_Hours_Per_Day'] = df['Sleep_Hours_Per_Day'].fillna(mean_sleep_hours)

In [188]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 7 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Study_Hours_Per_Day              2000 non-null   float64
 1   Extracurricular_Hours_Per_Day    2000 non-null   float64
 2   Sleep_Hours_Per_Day              2000 non-null   float64
 3   Social_Hours_Per_Day             2000 non-null   float64
 4   Physical_Activity_Hours_Per_Day  2000 non-null   float64
 5   Stress_Level                     2000 non-null   object 
 6   GPA                              2000 non-null   float64
dtypes: float64(6), object(1)
memory usage: 109.5+ KB


In [190]:
if 'Stress_Level' in df.columns: 
    label_encoder = LabelEncoder()
    df['Stress_Level'] = label_encoder.fit_transform(df['Stress_Level'])
    print(label_encoder.classes_)

['High' 'Low' 'Moderate']


In [192]:
df.head(10)

Unnamed: 0,Study_Hours_Per_Day,Extracurricular_Hours_Per_Day,Sleep_Hours_Per_Day,Social_Hours_Per_Day,Physical_Activity_Hours_Per_Day,Stress_Level,GPA
0,6.9,3.8,8.7,2.8,1.8,2,2.99
1,5.3,3.5,8.0,4.2,3.0,1,2.75
2,5.1,3.9,9.2,1.2,4.6,1,2.67
3,6.5,2.1,7.2,1.7,6.5,2,2.88
4,8.1,0.6,6.5,2.2,6.6,0,3.51
5,6.0,2.1,8.0,0.3,7.6,2,2.85
6,8.0,0.7,5.3,5.7,4.3,0,3.08
7,8.4,1.8,5.6,3.0,5.2,0,3.2
8,5.2,3.6,6.3,4.0,4.9,1,2.82
9,7.7,0.7,9.8,4.5,1.3,2,2.76


In [252]:
X = df.iloc[:, :-1].values 
y = df.iloc[:, -1].values

In [254]:
scaler = StandardScaler()
X_normalized = scaler.fit_transform(X)

In [256]:

X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [258]:

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)

X_train shape: (1600, 6)
X_test shape: (400, 6)


In [260]:
knn_regressor = KNeighborsRegressor(n_neighbors=5)
knn_regressor.fit(X_train, Y_train)


In [262]:
y_pred = knn_regressor.predict(X_test)

In [264]:
mae = mean_absolute_error(Y_test, y_pred)
mse = mean_squared_error(Y_test, y_pred)
r2 = r2_score(Y_test, y_pred)

In [266]:
print("Mean Absolute Error (MAE):", mae)
print(" mean squared error (MSE):", mse)
print("r2_score:", r2)

Mean Absolute Error (MAE): 0.18872500000000003
 mean squared error (MSE): 0.05506171
r2_score: 0.4102420468978103


In [268]:
trial_data = pd.DataFrame([
    [9, 1.7, 7.3, 3.1, 2.9, 0]  
])

In [270]:
predicted_gpa = knn_regressor.predict(trial_data)
print("Predicted GPA:", predicted_gpa[0])

Predicted GPA: 3.4539999999999997
