In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense


df = pd.read_csv('Gpa.csv')


print(df.head())
print(df.info())


df = df.dropna()

label_encoders = {}
for column in ['Field of Study', 'Employment Status', 'Preferred Study Time', 'Hometown']:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le


features = ['Hours Studied Weekly', 'Class Participation Rate', 'Secondary School GPA',
            'Hobby Activities', 'Employment Status', 'Online Research Hours', 'Age', 'Preferred Study Time']
target = 'College GPA'

X = df[features]
y = df[target]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

knn_model = KNeighborsRegressor(n_neighbors=5)
knn_model.fit(X_train, y_train)
knn_predictions = knn_model.predict(X_test)
knn_mse = mean_squared_error(y_test, knn_predictions)
knn_r2 = r2_score(y_test, knn_predictions)
knn_r2_percentage = knn_r2 * 100

print(f"KNN Model - Mean Squared Error: {knn_mse:.3f}")
print(f"KNN Model - R-squared: {knn_r2:.3f}")
print(f"KNN Model - R-squared (percentage): {knn_r2_percentage:.2f}%")


ann_model = Sequential([
    Dense(10, input_dim=X_train.shape[1], activation='relu'),
    Dense(1)
])
ann_model.compile(optimizer='adam', loss='mean_squared_error')
ann_model.fit(X_train, y_train, epochs=100, batch_size=10, verbose=0)
ann_predictions = ann_model.predict(X_test).flatten()
ann_mse = mean_squared_error(y_test, ann_predictions)
ann_r2 = r2_score(y_test, ann_predictions)
ann_r2_percentage = ann_r2 * 100

print(f"ANN Model - Mean Squared Error: {ann_mse:.3f}")
print(f"ANN Model - R-squared: {ann_r2:.3f}")
print(f"ANN Model - R-squared (percentage): {ann_r2_percentage:.2f}%")


knn_accuracy = knn_r2 * 100
ann_accuracy = ann_r2 * 100

print(f"KNN Model Accuracy: {knn_accuracy:.2f}%")
print(f"ANN Model Accuracy: {ann_accuracy:.2f}%")


   Hours Studied Weekly  Class Participation Rate Field of Study  \
0                  6.77                     85.24        Science   
1                 11.86                     62.25    Engineering   
2                  0.58                     94.29       Business   
3                 19.35                     53.94       Business   
4                 38.79                     73.80        Science   

   Secondary School GPA  Hobby Activities Employment Status  \
0                  3.32                 3        Unemployed   
1                  3.34                 3          Employed   
2                  3.61                 4        Unemployed   
3                  3.55                 2          Employed   
4                  2.95                 2          Employed   

   Online Research Hours  Student ID   Age       Hometown  \
0                   2.68    256811.0  22.0      Santa Ana   
1                  16.84    878869.0  19.0       San Jose   
2                  10.06    5

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
ANN Model - Mean Squared Error: 0.003
ANN Model - R-squared: 0.990
ANN Model - R-squared (percentage): 99.01%
KNN Model Accuracy: 87.28%
ANN Model Accuracy: 99.01%
