In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

train_data = pd.read_csv('train_set.csv')
test_data = pd.read_csv('test_set.csv')
test_answer_data = pd.read_csv('test_set_answer.csv')

class PolynomialRegression:
    def __init__(self, degree):
        self.degree = degree
        self.coefficients = None

    def _generate_polynomial_features(self, X):
        n_samples, n_features = X.shape
        features = [np.ones((n_samples, 1))]  # x^0 (bias)
        #handling other degrees
        for d in range(1, self.degree + 1):
            for i in range(n_features):
                feature = (X[:, i] ** d).reshape(-1, 1)
                features.append(feature)
        return np.hstack(features)

    def fit(self, X, y):
        X = np.array(X)
        y = np.array(y).reshape(-1, 1)
        X_poly = self._generate_polynomial_features(X)
        self.coefficients = np.linalg.pinv(X_poly.T @ X_poly) @ X_poly.T @ y

    def predict(self, X):
        X = np.array(X)
        X_poly = self._generate_polynomial_features(X)
        y_pred=X_poly @ self.coefficients 
        return y_pred     

    def score(self, X, y):
        y = np.array(y).reshape(-1, 1)
        y_pred = self.predict(X)
        ss_res = np.sum((y - y_pred) ** 2)
        ss_tot = np.sum((y - np.mean(y)) ** 2)
        return 1 - ss_res /ss_tot 

    def mean_squared_error(self, y_true, y_pred):
        
        y_true = np.array(y_true).reshape(-1)
        y_pred = np.array(y_pred).reshape(-1)
        return np.mean((y_true - y_pred) ** 2)

    def mean_absolute_error(self, y_true, y_pred):
        
        y_true = np.array(y_true).reshape(-1)
        y_pred = np.array(y_pred).reshape(-1)
        return np.mean(np.abs(y_true - y_pred))


x1 = train_data['Pregnancies']
x2 = train_data['BloodPressure']
x3 = train_data['SkinThickness']
x4 = train_data['Insulin']
x5 = train_data['BMI']
x6 = train_data['DiabetesPedigreeFunction']
x7 = train_data['Age']
Y = train_data['Glucose']

X = pd.DataFrame([x1, x2, x3, x4, x5, x6, x7]).T
X = (X - X.mean()) / X.std() 





x1_test = test_data['Pregnancies']
x2_test = test_data['BloodPressure']
x3_test = test_data['SkinThickness']
x4_test = test_data['Insulin']
x5_test = test_data['BMI']
x6_test = test_data['DiabetesPedigreeFunction']
x7_test = test_data['Age']
Y_test = test_answer_data['Glucose']

X_test = pd.DataFrame([x1_test, x2_test, x3_test, x4_test, x5_test, x6_test, x7_test]).T
X_test = (X_test - X_test.mean()) / X_test.std() 



degrees=[2,3,4]
fit_status=''

for degree in degrees:

    model = PolynomialRegression(degree)
    model.fit(X, Y)

    Y_train_pred = model.predict(X)
    Y_pred = model.predict(X_test)





    print('======================')
    print(f'deg={degree}')

    
    print(f' Mean Squared Error (MSE): {model.mean_squared_error(Y_test, Y_pred)}')
    print(f' Mean Absolute Error (MAE) {model.mean_absolute_error(Y_test, Y_pred)}')
    print(f' R^2 score: {model.score(X_test, Y_test)}') 

   # Inside your for loop, after predictions:
    train_r2 = model.score(X, Y)
    test_r2 = model.score(X_test, Y_test)

   
    if train_r2 < 0.6 and test_r2 < 0.6:
        fit_status = 'underfit'
    elif train_r2 > 0.9 and (train_r2 - test_r2) > 0.2:
        fit_status = 'overfit'
    else:
        fit_status = 'fit'

    print(f'Fit status: {fit_status}')



    print('predict: \n')
    print(Y_pred.tolist())






    

ModuleNotFoundError: No module named 'tensorflow'