In [85]:
import polars as pl

In [86]:
input_matrix = pl.read_excel(
    'zad1.xlsx'
)

In [87]:
input_matrix.head()

Metal oxide,Hfc,Xc,Obs. log(EC50)-1
str,f64,f64,f64
"""TiO2""",-1492.0,4.9,1.76
"""ZnO2""",-638.1,4.95,2.02
"""SiO2""",-618.3,3.81,2.12
"""V2O3""",-139.5,3.24,2.24
"""Sb2O3""",-206.7,4.46,2.31


In [88]:
descriptors = input_matrix.select([pl.col("Hfc"), pl.col("Xc")])

In [89]:
standarized_descriptors = descriptors.select(
    (pl.all() - pl.all().mean()) / pl.all().std()
)

In [90]:
Y_pred = standarized_descriptors.select(
    (2.466 + 0.244*pl.col("Hfc") + 0.394*pl.col("Xc")).alias("log(EC50)-1")
).with_columns(
    input_matrix["Obs. log(EC50)-1"]
)

In [91]:
Y_pred

log(EC50)-1,Obs. log(EC50)-1
f64,f64
1.774622,1.76
2.253886,2.02
1.985789,2.12
2.108199,2.24
2.369945,2.31
2.617089,2.5
2.56245,2.64
2.781791,2.83
3.022142,2.92
3.184086,3.32


# Współczynnik determinacji

$R^2 = 1 - \frac{\sum(y_{pred} - y_{obs})^2}{\sum(y_{obs} - \bar{y}_{obs})^2}$

In [92]:
R2 = 1 - ((Y_pred['log(EC50)-1'] - Y_pred['Obs. log(EC50)-1'])**2).sum() / ((Y_pred['Obs. log(EC50)-1'] - Y_pred['Obs. log(EC50)-1'].mean())**2).sum()

In [93]:
R2

0.9272795443275705

# Średni kwadratowy błąd kalibracji

$RMSE_C = \sqrt{\frac{\sum(y_{pred} - y_{obs})^2}{n_c}}$

In [94]:
RMSEc = Y_pred.select(
    (((pl.col("log(EC50)-1") - pl.col("Obs. log(EC50)-1"))**2).sum() / Y_pred.shape[0]).sqrt()
).item()

In [95]:
RMSEc

0.1203526121139761

# Istotnosc statystyczna modelu

$F = \frac{S^2_M}{S^2_E}$

$S^2_M = \frac{ \sum{ (y_{pred} - \bar{y}_{pred}) }^2 }{n-1}$

$S^2_E = \frac{ \sum{ (y_{obs} - y_{pred}) }^2 }{n-p-1}$

In [96]:
Y_pred.insert_column(
    0,
    input_matrix["Metal oxide"]
)

Metal oxide,log(EC50)-1,Obs. log(EC50)-1
str,f64,f64
"""TiO2""",1.774622,1.76
"""ZnO2""",2.253886,2.02
"""SiO2""",1.985789,2.12
"""V2O3""",2.108199,2.24
"""Sb2O3""",2.369945,2.31
"""Bi2O3""",2.617089,2.5
"""Mn2O3""",2.56245,2.64
"""CoO""",2.781791,2.83
"""In2O3""",3.022142,2.92
"""ZnO""",3.184086,3.32


In [97]:
df1 = Y_pred['log(EC50)-1'].shape[0] - 1
df2 = Y_pred['Obs. log(EC50)-1'].shape[0] - 1

n = Y_pred.shape[0] 
p = Y_pred.shape[1] - 1


f_value = Y_pred.select(
    (((pl.col("log(EC50)-1") - pl.col("log(EC50)-1").mean())**2).sum() / (n - 1)) / (((pl.col("Obs. log(EC50)-1") - pl.col("log(EC50)-1"))**2).sum() / (n - p - 1))
).item()

In [98]:
print(f'F-stat: {f_value}')

F-stat: 9.921161358208717
