# Labb Statistiska Metoder - Small Diameter Flow

In [578]:
import LinearRegressionFunctions as lrf
import pandas as pd
import scipy.stats as stats

flow_data = pd.read_csv("../Data/Small-diameter-flow.csv", index_col="Unnamed: 0")

flow_data_std = pd.DataFrame()

#Standardize Data
for col in flow_data.columns:
    flow_data_std[col] = stats.zscore(flow_data[col])

mlr = lrf.LinearRegression(flow_data_std)



mlr.sample_size = ["Flow"]
mlr.params = ["Kinematic", "Geometric", "Inertial", "Observer"] 


mlr.fit()




In [579]:
# Significance tests on individual variables.

dict_sig = mlr.Significance_individual_variables()

print("Significance tests on individual variables\n")

for key in dict_sig:
     print(f"{key}: {dict_sig[key]}")

Significance tests on individual variables

Beta-0: [1.]
Beta-1: [1.78792057e-268]
Beta-2: [0.]
Beta-3: [1.46558092e-274]


In [580]:
mlr.coefficients

array([[-8.88178420e-16],
       [ 2.87218576e-01],
       [ 1.10963846e+00],
       [-3.93924385e-01]])

In [581]:
# Methods for grad G and Confidence level 

print(f"Variance:                     {mlr.variance()}")
print(f"Standard deviation:           {mlr.sigma()}")
print(f"Significance of regression:   {mlr.significans()}")
print(f"Relevance of regression (R2): {mlr.R2()}")
print(f"Confidence level, %:          {mlr.R2()* 100:.4f}")


Variance:                     0.0029381084042615223
Standard deviation:           0.05420432090028914
Significance of regression:   2.0572503573131086e-125
Relevance of regression (R2): 0.9971212473210771
Confidence level, %:          99.7121


In [582]:
# A function or method that calculates the Pearson number between all pairs of parameters.

p_dict = mlr.p_value_pairs_param()
p_value_pairs_param_df = pd.DataFrame(p_dict) 
p_value_pairs_param_df

Unnamed: 0,Params,Flow,Kinematic,Geometric,Inertial,Observer
0,Flow,1.0,0.863403,0.995794,0.90331,0.18149
1,Kinematic,0.863403,1.0,0.863135,0.968671,0.103227
2,Geometric,0.995794,0.863135,1.0,0.91833,0.175199
3,Inertial,0.90331,0.968671,0.91833,1.0,0.121981
4,Observer,0.18149,0.103227,0.175199,0.121981,1.0


In [583]:
# Confidence intervals on individual parameters.

confi_lvl_dict = mlr.confidence_interval_parameters()
index = ["Predictor value", "Upper", "Lower"]
df_1 = pd.DataFrame(confi_lvl_dict)
df_1



Unnamed: 0,Intervals,Beta-0,Beta-1,Beta-2,Beta-3
0,Parameters value,-8.881784e-16,0.2872186,1.109638,-0.3939244
1,Error Margen,1.3878149999999998e-20,5.801721e-20,3.640202e-20,7.401713e-20
2,Upper,-8.881645e-16,0.2872186,1.109638,-0.3939244
3,Lower,-8.881923e-16,0.2872186,1.109638,-0.3939244


# Question
**”Is there an observer bias in the data collected for the small-diameter flow measurements?"**



In [587]:
# flow_data_0 = flow_data.loc[(flow_data["Observer"] == 0)]
# flow_data_1 = flow_data.loc[(flow_data["Observer"] == 1)]

mlr_0 = lrf.LinearRegression(flow_data)
mlr_1 = lrf.LinearRegression(flow_data)

mlr_1.sample_size = ["Flow"]
mlr_1.params = ["Kinematic", "Geometric", "Inertial", "Observer"] 
mlr_0.sample_size = ["Flow"]
mlr_0.params = ["Kinematic", "Geometric", "Inertial"] 

mlr_1.fit()
mlr_0.fit()

# # mlr_0.coefficients = mlr.coefficients
# # mlr_1.coefficients = mlr.coefficients

# mlr_0.coefficients, mlr_1.coefficients, mlr.coefficients

# flow_data_0.describe(), flow_data.describe()




In [588]:
# Methods for grad G and Confidence level 

print(f"Variance:                     Observer 0:    {mlr_0.variance()}")
print(f"Variance:                     All varibales: {mlr_1.variance()}\n")

print(f"Standard deviation:           Observer 0:    {mlr_0.sigma()}")
print(f"Standard deviation:           All varibales: {mlr_1.sigma()}\n")

print(f"Significance of regression:   Observer 0:    {mlr_0.significans()}")
print(f"Significance of regression:   All varibales: {mlr_1.significans()}\n")

print(f"Relevance of regression (R2): Observer 0:    {mlr_0.R2()}")
print(f"Relevance of regression (R2): All varibales: {mlr_1.R2()}\n")

print(f"Confidence level, %:          Observer 0:    {mlr_0.R2()* 100:.4f}")
print(f"Confidence level, %:          All varibales: {mlr_1.R2()* 100:.4f}\n")

Variance:                     Observer 0:    0.0063086854875835045
Variance:                     All varibales: 0.006272292538356665

Standard deviation:           Observer 0:    0.07942723391622993
Standard deviation:           All varibales: 0.07919780639864128

Significance of regression:   Observer 0:    7.998510998158917e-141
Significance of regression:   All varibales: 1.7265182348561856e-139

Relevance of regression (R2): Observer 0:    0.997121247321077
Relevance of regression (R2): All varibales: 0.9971526073276518

Confidence level, %:          Observer 0:    99.7121
Confidence level, %:          All varibales: 99.7153



In [589]:
# Significance tests on individual variables.

dict_sig_1 = mlr_1.Significance_individual_variables()
dict_sig_0 = mlr_0.Significance_individual_variables()


print("Significance tests on individual All varibales\n")

for key in dict_sig_1:
     print(f"{key}: {dict_sig_1[key]}")


print("Significance tests on individual variables no observer\n")

for key in dict_sig_0:
     print(f"{key}: {dict_sig_0[key]}")

Significance tests on individual All varibales

Beta-0: [3.22736887e-147]
Beta-1: [5.73058004e-236]
Beta-2: [0.]
Beta-3: [1.16280667e-241]
Beta-4: [2.34224112e-44]
Significance tests on individual variables no observer

Beta-0: [1.36944285e-146]
Beta-1: [2.27997786e-236]
Beta-2: [0.]
Beta-3: [1.91928308e-242]
