In [None]:
import polars as pl
import plotly.express as px
import seaborn as sns
import sklearn
import numpy as np
import gradient_descent

In [None]:
mpdt = sns.load_dataset("mpg")
mpdt

In [None]:
def get_numerical(df: pl.DataFrame, col_name: str):
    stats = {"dispersion": None, "qwantile 0.1": None, "qwantile 0.9": None, "quartile 1": None, "quartile 3": None}
    stats["dispersion"] = df[col_name].std() ** 2
    stats["qwantile 0.1"] = df[col_name].quantile(0.1)
    stats["qwantile 0.9"] = df[col_name].quantile(0.9)
    stats["quartile 1"] = df[col_name].quantile(0.25)
    stats["quartile 3"] = df[col_name].quantile(0.75)
    return stats

In [None]:
get_numerical(mpdt, "mpg")

In [None]:
get_numerical(mpdt, "cylinders")

In [None]:
get_numerical(mpdt, "displacement")

In [None]:
get_numerical(mpdt, "horsepower")

In [None]:
get_numerical(mpdt, "weight")

In [None]:
get_numerical(mpdt, "acceleration")

In [None]:
get_numerical(mpdt, "model_year")

In [None]:
px.scatter(mpdt, x="model_year", y="horsepower", trendline="ols")

In [None]:
px.scatter(mpdt, x="cylinders", y="horsepower", trendline="ols")

In [None]:
mpdl = pl.DataFrame(mpdt).with_columns(
    pl.col("origin").eq("europe").cast(pl.UInt8).alias("europe"),
    pl.col("origin").eq("usa").cast(pl.UInt8).alias("usa"),
    pl.col("origin").eq("japan").cast(pl.UInt8).alias("japan"),
).drop("origin").drop_nulls()
mpdl

In [None]:
corr_matrix = mpdl.select(pl.exclude("name")).corr()
fig = px.imshow(corr_matrix, text_auto=True, x=mpdl.select(pl.exclude("name")).columns, y=mpdl.select(pl.exclude("name")).columns)
fig.update_layout(title="Correlation Matrix", width=800, height=800)
fig.show()

In [None]:
SGDLearner = sklearn.linear_model.SGDRegressor(max_iter=1_000_000, eta0=0.001, n_iter_no_change=100_000)
x_vals = [[hs] for hs in mpdl["horsepower"]]
model = SGDLearner.fit(x_vals, mpdl["mpg"])

In [None]:
y_vals = model.predict(x_vals)

In [None]:
px.scatter(mpdl, x="horsepower", y="mpg").add_scatter(x=np.array(x_vals).reshape(-1), y=y_vals, name="SGD Regression")

In [None]:
x = mpdl["horsepower"]
y = mpdl["mpg"]
GDLearner = gradient_descent.descent(x, y, 500, 0.01)

In [None]:
space = np.linspace(min(mpdl["horsepower"]), max(mpdl["horsepower"]), 1000)
px.scatter(mpdl, x="horsepower", y="mpg").add_scatter(x=space, y=gradient_descent.approx_fn(space, GDLearner[0], GDLearner[1]), name="GD Regression")