# Part 1 Polynomial regression Full dataset


In [49]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from itertools import combinations_with_replacement

In [50]:
df = pd.read_csv('zuucrew.csv')
df.head()



Unnamed: 0,MemberName,EducationLevel,Attendance,TotalHours,AssignmentsCompleted,HackathonParticipation,GitHubScore,PeerReviewScore,CourseName,CapstoneScore
0,Theekshana Rathnayake,3,79.9,43.7,2,0,62.8,5.0,Foundations of ML,45.3
1,Mayura Sandakalum Sellapperuma,2,76.8,95.6,6,0,87.4,2.7,Foundations of ML,78.8
2,Amila Narangoda,3,96.6,75.9,8,0,98.4,2.8,Foundations of ML,65.4
3,Nisal Gamage,1,74.5,63.9,7,0,76.8,1.7,Production-Ready ML Systems,100.0
4,Tharusha Vihanga,2,83.2,24.0,6,0,41.8,4.2,Foundations of ML,40.1


In [51]:
df = df[df['CourseName']=='Foundations of ML']
df.head()

Unnamed: 0,MemberName,EducationLevel,Attendance,TotalHours,AssignmentsCompleted,HackathonParticipation,GitHubScore,PeerReviewScore,CourseName,CapstoneScore
0,Theekshana Rathnayake,3,79.9,43.7,2,0,62.8,5.0,Foundations of ML,45.3
1,Mayura Sandakalum Sellapperuma,2,76.8,95.6,6,0,87.4,2.7,Foundations of ML,78.8
2,Amila Narangoda,3,96.6,75.9,8,0,98.4,2.8,Foundations of ML,65.4
4,Tharusha Vihanga,2,83.2,24.0,6,0,41.8,4.2,Foundations of ML,40.1
7,Chamath Perera,3,86.5,88.0,5,0,23.9,1.3,Foundations of ML,68.2


In [52]:
X = df[['EducationLevel', 'Attendance', 'TotalHours', 'AssignmentsCompleted', 
        'HackathonParticipation', 'GitHubScore', 'PeerReviewScore']].values   
y = df["CapstoneScore"].values

y = y.reshape(-1, 1)  


X_scaled = (X - X.mean(axis=0)) / X.std(axis=0)


X_with_bias = np.hstack((np.ones((X_scaled.shape[0], 1)), X_scaled))




In [53]:
def create_polynomial_features(X, degree):
    
    n_samples, n_features = X.shape
    X_poly = [np.ones(n_samples)] 
    
    for deg in range(1, degree+1):
        for items in combinations_with_replacement(range(n_features), deg):
            new_feature = np.prod(X[:, items], axis=1)
            X_poly.append(new_feature)
    
    return np.vstack(X_poly).T

In [54]:
def fit_polynomial(X, y, degree, learning_rate=0.01, iterations=1000):
    X_poly = create_polynomial_features(X, degree)
    
    
    beta = np.zeros((X_poly.shape[1], 1))
    m = len(y)
    cost_history = []
    
    for i in range(iterations):
        y_pred = X_poly.dot(beta)
        error = y_pred - y
        gradients = (1/m) * X_poly.T.dot(error)
        beta -= learning_rate * gradients
        cost = (1/(2*m)) * np.sum(error**2)
        cost_history.append(cost)
    
    return beta, cost_history

In [55]:
def predict_polynomial(X, beta, degree):
    X_poly = create_polynomial_features(X, degree)
    return X_poly.dot(beta)

In [57]:
beta, history = fit_polynomial(X_with_bias, y, degree=2, learning_rate=0.01, iterations=10000)

print("Final cost:", history[-1])
print("Coefficients:", beta.flatten())

y_pred = predict_polynomial(X_with_bias, beta, degree=2)

Final cost: 7.5167035690340605
Coefficients: [14.24369116 14.24369116 -0.12195188  0.39705214  8.07584883 -0.20522591
 -1.21240133  0.56271143  0.90934422 14.24369116 -0.12195188  0.39705214
  8.07584883 -0.20522591 -1.21240133  0.56271143  0.90934422  0.65569261
  0.2820101   0.35877172  0.06090535  0.67343978  0.35795095 -1.19863595
 -1.75618599 -0.33947321  1.00484109 -0.25634912 -0.07265974  0.4879941
 -0.10307571  1.09609939  0.35213389 -0.25984729 -0.27374729 -0.31046474
 -0.34200895 -0.12586888  0.31530933 13.97258998  0.05633556  0.20287099
 -0.734255   -0.47814134  0.58913393]


## Evalution Metrices

In [None]:
def mean_squared_error(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

In [None]:
def mean_absolute_error(y_true, y_pred):
    return np.mean(np.abs(y_true - y_pred))

In [None]:
def root_mean_squared_error(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

In [None]:
def r2_score(y_true, y_pred):
    ss_res = np.sum((y_true - y_pred) ** 2)  
    ss_tot = np.sum((y_true - np.mean(y_true)) ** 2) 
    return 1 - (ss_res / ss_tot)

In [None]:
print("MSE:", mean_squared_error(y, y_pred))
print("MAE:", mean_absolute_error(y, y_pred))
print("RMSE:", root_mean_squared_error(y, y_pred))
print("R²:", r2_score(y, y_pred))

MSE: 237256117.02409846
MAE: 14808.672540909123
RMSE: 15403.1203664744
R²: -830353.1710822392
