In [None]:
import polars as pl
import pandas as pd
import plotly.express as px
import seaborn as sns
import sklearn
import numpy as np
import gradient_descent

In [None]:
def get_numerical(df: pl.DataFrame, col_name: str):
    stats = {"dispersion": None, "qwantile 0.1": None, "qwantile 0.9": None, "quartile 1": None, "quartile 3": None}
    stats["dispersion"] = df[col_name].std() ** 2
    stats["qwantile 0.1"] = df[col_name].quantile(0.1)
    stats["qwantile 0.9"] = df[col_name].quantile(0.9)
    stats["quartile 1"] = df[col_name].quantile(0.25)
    stats["quartile 3"] = df[col_name].quantile(0.75)
    return stats

In [None]:
pudt = pd.read_csv("train.csv")
pudt = pudt.drop(["id"], axis=1)
pudt

In [None]:
get_numerical(pudt, "Mean_Integrated")

In [None]:
get_numerical(pudt, "SD")

In [None]:
get_numerical(pudt, "EK")

In [None]:
get_numerical(pudt, "Skewness")

In [None]:
get_numerical(pudt, "Mean_DMSNR_Curve")

In [None]:
get_numerical(pudt, "SD_DMSNR_Curve")

In [None]:
get_numerical(pudt, "EK_DMSNR_Curve")

In [None]:
get_numerical(pudt, "Skewness_DMSNR_Curve")

In [None]:
get_numerical(pudt, "Class")

In [None]:
px.scatter(pudt, x="Mean_Integrated", y="Skewness", trendline="ols")

In [None]:
px.scatter(pudt, x="SD", y="Skewness", trendline="ols")

In [None]:
pudl = pl.DataFrame(pudt).drop_nulls()
pudl

In [None]:
corr_matrix = pudl.corr()
fig = px.imshow(corr_matrix, text_auto=True, x=pudl.columns, y=pudl.columns)
fig.update_layout(title="Correlation Matrix", width=800, height=800)
fig.show()

In [None]:
SGDLearner = sklearn.linear_model.SGDRegressor(max_iter=1_000_000, eta0=0.001, n_iter_no_change=10)
x_vals = [[hs] for hs in pudl["SD"]]
model = SGDLearner.fit(x_vals, pudl["Skewness"])

In [None]:
y_vals = model.predict(x_vals)
px.scatter(pudl, x="SD", y="Skewness").add_scatter(x=np.array(x_vals).reshape(-1), y=y_vals, name="SGD Regression")

In [None]:
tdl = pudl.sample(n=1000, seed=42)
x = tdl["SD"]
y = tdl["Skewness"]
GDLearner = gradient_descent.descent(x, y, 500, 0.01)

In [None]:
space = np.linspace(min(pudl["SD"]), max(pudl["SD"]), 1000)
px.scatter(pudl, x="SD", y="Skewness").add_scatter(x=space,
                                                      y=gradient_descent.approx_fn(space, GDLearner[0], GDLearner[1]),
                                                      name="GD Regression")