In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("/content/polynomial_regression_train.csv")
df

Unnamed: 0,ID,Feature_1,Feature_2,Feature_3,Feature_4,Feature_5,Target
0,1,-0.299150,0.980381,-1.244780,0.420551,-0.352235,8.168743e-10
1,2,-0.374319,-1.179867,0.615588,0.054701,0.687182,-5.081225e-10
2,3,-1.240429,-0.139086,-0.001653,0.487685,-0.242808,4.560170e-10
3,4,-0.429881,-1.724246,-0.098046,0.896504,0.318585,-2.647546e-09
4,5,-1.726345,1.945823,1.275979,-0.474670,1.050494,-1.704606e-08
...,...,...,...,...,...,...,...
47995,47996,1.125337,0.229907,-1.789732,-0.571275,-1.900555,7.752513e-09
47996,47997,1.733309,-0.721866,0.958440,0.317420,0.108628,7.770135e-09
47997,47998,-1.677538,1.758274,1.020534,0.346496,1.313755,-1.096204e-08
47998,47999,1.313345,0.315358,-0.060629,0.703251,-0.618520,1.962097e-09


In [8]:
def polynomial_features(X, degree):
  n_samples, n_features = X.shape
  features = [np.zeros(n_samples)]

  def generate_combinations(variables,degree):
    if degree == 0:
      return [[]]
    if len(variables) == 0:
      return []

    without_current = generate_combinations(variables[1:], degree)

    with_current = [[variables[0]] + comb for comb in generate_combinations (variables, (degree - 1))]

    return with_current + without_current

  # features.append(np.ones((X.shape[0], 1)))

  for d in range (1, degree + 1):
    combinations = generate_combinations(list(range(n_features)), d)
    for comb in combinations:
      if comb:
        feature = np.prod(X[:, comb], axis = 1)
        # feature = feature.reshape(-1, 1)
        features.append(feature)

  return np.vstack(features).T

def fit_polynomial_regression (X, Y, degree, learning_rate = 0.00001, epochs = 10000):

  X_poly = polynomial_features(X, degree)
  n_samples, n_features = X_poly.shape
  theta = np.zeros(n_features)

  for epoch in range (epochs):
    Y_pred = np.dot(X_poly, theta)
    # Y_reshaped = Y.values.ravel() if Y.ndim > 1 else Y
    loss = np.mean((Y_pred-Y)**2)

    gradient = (2 / n_samples)*np.dot(X_poly.T, ((Y_pred - Y)))
    theta = theta - learning_rate * gradient

  return theta

def predict (X, theta, degree):
  X_poly = polynomial_features(X, degree)
  Y_pred = np.dot(X_poly, theta)
  return Y_pred

In [10]:
X = np.zeros((df.shape[0], (df.shape[1]-2)))

for j in range((df.shape[1]-2)):
  X[:,j] = df.iloc[:,j+1]

Y = df.iloc[:,-1]

# degree = int(input("Enter the degree: "))
# learning_rate = float(input("Enter the learning rate: "))
# epochs = int(input("Enter the number of epochs: "))

degree = 2
learning_rate = 0.00001
epochs = 10000

theta = fit_polynomial_regression(X, Y, degree, learning_rate, epochs)

print("coefficients are: ", theta)

coefficients are:  [ 0.00000000e+00  1.03865088e-09  1.21933878e-10  1.18909782e-09
  7.02366584e-10  9.13141752e-11  3.00465655e-09 -7.80937116e-10
  2.82742147e-09 -1.28093370e-09  1.16594678e-09 -1.61357627e-09
 -1.63622424e-09 -2.41454541e-11 -1.63287015e-10  6.33709150e-10
  2.66647164e-11  4.28377091e-10 -3.15426906e-10 -9.31759365e-10
 -1.97433424e-09]


In [11]:
df_test = pd.read_csv("/content/polynomial_regression_test.csv")

In [16]:
X_test = np.zeros((df_test.shape[0], (df_test.shape[1]-1)))

for j in range (df_test.shape[1]-1):
  X_test[:,j] = df_test.iloc[:,j+1]

Y_pred = predict(X_test, theta, degree)

print (Y_pred)

[ 1.60531458e-08 -6.57665492e-09 -6.56881671e-09 ...  2.28245026e-09
  3.48610345e-09 -6.82505870e-09]
