In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import ipywidgets as widgets


from math import comb
from scipy.interpolate import lagrange

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold, train_test_split
from sklearn.linear_model import LinearRegression, Ridge, SGDRegressor, Lasso
from sklearn.preprocessing import PolynomialFeatures

In [2]:
#-------------- DEFINE THE FUNCTIONS --------------#

def generate_sequence(f, n):
    sequence = []
    for i in range(n):
        sequence.append(f(i))
    return sequence

#-------------- POLYNOMIALS --------------#

def f1(x):
    return x**10 - x**9

def f2(x):
    return 6 * x * (x - 1) + 1 # Star numbers

def f3(x):
    return int(x * (x + 1) / 2) # Sum of the series

def f4(x):
    return x**5 - x**2

def f5(x):
    return 2*x**3 * (x**2 - 9)

def f6(x):
    return (x - 1) * (x - 2) * (x - 3) * (x - 4) * (x - 5)

def f7(x): # Lagrangian interpolation of f1 sequence [0, 4]
    return 6305 * x**3 - 18659 * x**2 + 12354 * x

#-------------- EXPONENTIALS --------------#

def f8(x):
    return 2**x + 3 * x

def f9(x):
    return int(1 / (x + 1) * comb(2 * x, x)) # Catalan numbers

def f10(x):
    return x * 2**x + 1 # Cullen numbers

def f11(x):
    return (2**x - 1)**2 - 2 # Carol numbers



#functions = []
#functions.append(f1)  
#functions.append(f2) 
#functions.append(f3) 
#functions.append(f4)
#functions.append(f5)
#functions.append(f6)



In [3]:
#-------------- CREATE .CSV-FILE --------------#

x_column = {'x': range(100)}
df   = pd.DataFrame(x_column)
df1  = pd.DataFrame({'y1': generate_sequence(f1, 100)})
df2  = pd.DataFrame({'y2': generate_sequence(f2, 100)})
df3  = pd.DataFrame({'y3': generate_sequence(f3, 100)})
df4  = pd.DataFrame({'y4': generate_sequence(f4, 100)})
df5  = pd.DataFrame({'y5': generate_sequence(f5, 100)})
df6  = pd.DataFrame({'y6': generate_sequence(f6, 100)})
df7  = pd.DataFrame({'y7': generate_sequence(f7, 100)})
df8  = pd.DataFrame({'y8': generate_sequence(f8, 100)})
df9  = pd.DataFrame({'y9': generate_sequence(f9, 100)})
df10 = pd.DataFrame({'y10': generate_sequence(f10, 100)})
df11 = pd.DataFrame({'y11': generate_sequence(f11, 100)})
#df12 = pd.DataFrame({'y11': generate_sequence(f12, 100)})

csv = pd.concat([df, df1, df2, df3, df4, df5, df6, df7, df8, df9, df10, df11], axis=1)
csv.to_csv('polynomials.csv', index=False)

dataset = pd.read_csv('polynomials.csv')

X   = dataset.iloc[:, 0]
y1  = dataset.iloc[:, 1]
y2  = dataset.iloc[:, 2]
y3  = dataset.iloc[:, 3]
y4  = dataset.iloc[:, 4]
y5  = dataset.iloc[:, 5]
y6  = dataset.iloc[:, 6]
y7  = dataset.iloc[:, 7]
y8  = dataset.iloc[:, 8]
y9  = dataset.iloc[:, 9]
y10 = dataset.iloc[:, 10]
y11 = dataset.iloc[:, 11]
#y12 = dataset.iloc[:, 12]

In [75]:
#-------------- TRAIN_TEST_SPLIT --------------#

dataset = pd.read_csv('polynomials.csv')

X_train, X_test, y1_train, y1_test = train_test_split(X, y1, test_size=95, random_state=42) # Change second argument und choose different functions
X_train = X_train.to_numpy().reshape(-1, 1)
X_test = X_test.to_numpy().reshape(-1, 1)

#-------------- PREPROCESSING --------------#
MSE = []
Coefs = []
for i in range(0, 21):

    poly = PolynomialFeatures(degree=i)
    X_train_poly = poly.fit_transform(X_train)
    X_test_poly = poly.fit_transform(X_test)

    model = LinearRegression()

    model.fit(X_train_poly, y1_train)
    Coefs.append(model.coef_)

    y1_pred = model.predict(X_test_poly)

    error1 = MSE.append(mean_squared_error(y1_test, y1_pred))

    print("Degree " + str(i) +  "\t" + str(MSE[i]))
    
#-------------- PRINT ERROR AND POLYNOMIAL --------------#

#print("\n Function: " + "6305 * x**3 - 18659 * x**2 + 12354 * x")

print("\n Best error with degree: " + str(MSE.index(min(MSE))))
poly = ""
min_index = MSE.index(min(MSE))
for i in range(len(Coefs[min_index]) - 1, 0, -1):
    if (np.abs(Coefs[min_index][i]) != 0):
        poly += str(Coefs[min_index][i]) + 'x^' + str(i) + " + \n"
print("\n" + str(poly) + 'C')

#-------------- WRITE RESULTS TO FILE --------------#

#f = open('results1.csv', 'w')
#f.write("\n Best error with degree: " + str(MSE.index(min(MSE))))
#f.write("\n" +  str(min(MSE)))

#f.close()

#plt.scatter(range(100), generate_sequence(f3, 100))

#plt.plot(X_test, y1_pred, color='red', linewidth=3)

Degree 0	3.506768027918186e+38
Degree 1	2.078221338098398e+38
Degree 2	8.032073796307765e+37
Degree 3	4.67607766465524e+37
Degree 4	4.0818894345099633e+37
Degree 5	2.0439287925526092e+36
Degree 6	6.666287800944698e+35
Degree 7	2.0154889288718444e+35
Degree 8	3.815532519697341e+34
Degree 9	2.656956978942017e+33
Degree 10	4640021112.222701
Degree 11	1.2368198585509328e+17
Degree 12	2.3799130165217126e+24
Degree 13	6.526940840069649e+30
Degree 14	4.79711683459988e+34
Degree 15	2.507324287730886e+36
Degree 16	4.616308233661648e+37
Degree 17	5.194952704256925e+38
Degree 18	4.365604757737448e+39
Degree 19	3.0253447576313085e+40
Degree 20	1.8306793714416975e+41

 Best error with degree: 10

1.0000000000000682x^10 + 
-0.9999999999737839x^9 + 
-7.72628099504982e-09x^8 + 
5.847894811339173e-07x^7 + 
-1.3681669903230925e-05x^6 + 
-7.855731499295988e-07x^5 + 
-2.8932932101170194e-08x^4 + 
-8.561395871603847e-10x^3 + 
-2.1367365599747972e-11x^2 + 
-3.9124286122772134e-13x^1 + 
C


In [67]:
X_train, X_test, y1_train, y1_test = train_test_split(X, y1, test_size=0.8, random_state=None) # Change second argument und choose different functions
#X_train = X_train.to_numpy().reshape(-1, 1)
#X_test = X_test.to_numpy().reshape(-1, 1)



X_train = X_train.to_numpy()
y1_train = y1_train.to_numpy()
#poly = lagrange(X_train, f1)



x = np.array(range(50))
y = x**10 - x**9
poly = lagrange(x, y)
poly




poly1d([ 2.14086292e-50, -1.45149132e-47, -6.92913852e-45,  1.66502686e-42,
       -3.67419353e-40, -3.07397400e-38,  1.59967311e-35, -3.95689916e-33,
        4.33204117e-31, -1.43743089e-29,  2.22340497e-27, -2.02489699e-25,
       -5.22368657e-24, -6.72883750e-22,  1.20402197e-20, -7.20967258e-19,
        1.56983448e-16, -4.07901875e-15,  2.36627716e-13, -1.30622630e-11,
        8.18735641e-11, -4.05317907e-09, -2.93961088e-08, -2.29988721e-06,
        1.31403600e-04, -1.59611526e-03,  3.23220747e-02, -3.56887342e-01,
        4.25230017e+00, -4.41659873e+01,  1.35239346e+02, -1.11469117e+04,
        5.36701196e+04, -1.74868540e+06,  2.11153564e+07, -1.19055108e+08,
        9.75370355e+08, -2.80247791e+09,  1.27264506e+10, -7.40269191e+10,
        1.98115225e+11, -3.51480006e+11,  2.80455228e+11, -3.55373327e+12,
       -1.77455471e+12, -6.01433464e+11, -9.40358976e+10,  2.59826205e+12,
        5.50085805e+11,  0.00000000e+00])