# Polynomial Regression using Plotly and Scikit-Learn

### Import of the necessary modules

In [5]:
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

### Definition of the function that formats polynomial equations

In [6]:
def format_coefs(coefs):  # this function is used to format the equation for the graph label using the input coefficients
    equation_list = [f"{coef}x^{i}" for i, coef in enumerate(coefs)]
    equation = "$" + " + ".join(equation_list) + "$"

    replace_map = {"x^0": "", "x^1": "x", '+ -': '- '}
    for old, new in replace_map.items():
        equation = equation.replace(old, new)
        
    return equation

### Data retrieval and formatting

In [7]:
df = px.data.tips()  # built-in dataset for demonstration in which each row represents a restaurant bill

df["e_plus"] = df["tip"]/40   # we define the asymmetric error bars
df["e_minus"] = df["tip"]/50  # of the samples (upper and lower) as an example

X = df.total_bill.values.reshape(-1, 1)  # an ordered table is created containing the values of the bills
Y = df.tip.values.reshape(-1, 1)  # an ordered table is created containing the values of the tips

x_range = np.linspace(X.min(), X.max(), 100).reshape(-1, 1)

fig = px.scatter(df, x='total_bill', y='tip', opacity=0.65)  # we display the sample points
# fig = px.scatter(df, x='total_bill', y='tip', opacity=0.65, error_y="e_plus", error_y_minus="e_minus")  # if we want to display the asymmetric error bars on the samples

### Execution of a loop to draw the curves associated with polynomials of different degrees

In [8]:
for degree in [1, 2, 3, 4]:  # a for loop is executed in order to represent the models as a polynomial of degree 1, 2, 3 and 4
    poly = PolynomialFeatures(degree)  # generate the polynomial
    poly.fit(X)  # fit the polynomial
    X_poly = poly.transform(X)
    x_range_poly = poly.transform(x_range)

    model = LinearRegression(fit_intercept=False)  # Ordinary least squares Linear Regression
    model.fit(X_poly, df.tip, sample_weight=None)
    # model.fit(X_poly, df.tip, sample_weight=[])  # if we want to add a sample weight
    
    y_poly = model.predict(x_range_poly)
    equation = format_coefs(model.coef_.round(2))  # get the polynomial equation properly in order to display it as a label
    fig.add_traces(go.Scatter(x=x_range.squeeze(), y=y_poly, name=equation))  # the curve associated with the polynomial equation is plotted

fig.show()  # display the resulting graph
# fig.write_image("fig.png")  # save the graph as a PNG