# Labb Statistiska Metoder - Small Diameter Flow

In [613]:
import LinearRegressionFunctions as lrf
import pandas as pd

flow_data = pd.read_csv("../Data/Small-diameter-flow.csv", index_col="Unnamed: 0")
mlr = lrf.LinearRegression(flow_data)

mlr.sample_size = ["Flow"]
mlr.params = ["Kinematic", "Geometric", "Inertial", "Observer"] 

mlr.fit()




In [614]:
mlr.coefficients

array([[-2.58376909],
       [ 0.87000771],
       [ 3.60315069],
       [-0.75188958],
       [ 0.0168805 ]])

In [615]:
# Methods for grad G and Confidence level 

print(f"Variance:                     {mlr.variance()}")
print(f"Standard deviation:           {mlr.sigma()}")
print(f"Significance of regression:   {mlr.significans()}")
print(f"Relevance of regression (R2): {mlr.R2()}")
print(f"Confidence level, %:          {mlr.R2()* 100:.4f}")


Variance:                     0.006272292538356665
Standard deviation:           0.07919780639864128
Significance of regression:   1.7265182348561856e-139
Relevance of regression (R2): 0.9971526073276518
Confidence level, %:          99.7153


In [616]:
# Significance tests on individual variables.

dict_sig = mlr.Significance_individual_variables()

print("Significance tests on individual variables\n")

for key in dict_sig:
     print(f"{key}: {dict_sig[key]}")

Significance tests on individual variables

Beta-0: [3.22736887e-147]
Beta-1: [5.73058004e-236]
Beta-2: [0.]
Beta-3: [1.16280667e-241]
Beta-4: [2.34224112e-44]


In [617]:
# A function or method that calculates the Pearson number between all pairs of parameters.

p_dict = mlr.p_value_pairs_param()
p_value_pairs_param_df = pd.DataFrame(p_dict) 
p_value_pairs_param_df

Unnamed: 0,Params,Flow,Kinematic,Geometric,Inertial,Observer
0,Flow,1.0,0.863403,0.995794,0.90331,0.18149
1,Kinematic,0.863403,1.0,0.863135,0.968671,0.103227
2,Geometric,0.995794,0.863135,1.0,0.91833,0.175199
3,Inertial,0.90331,0.968671,0.91833,1.0,0.121981
4,Observer,0.18149,0.103227,0.175199,0.121981,1.0


In [618]:
# Confidence intervals on individual parameters.

confi_lvl_dict = mlr.confidence_interval_parameters()
index = ["Predictor value", "Upper", "Lower"]
df_1 = pd.DataFrame(confi_lvl_dict)
df_1



Unnamed: 0,Intervals,Beta-0,Beta-1,Beta-2,Beta-3,Beta-4
0,Parameters value,-2.583769,0.8700077,3.603151,-0.7518896,0.0168805
1,Error Margen,2.219944e-18,2.556936e-19,1.7453859999999998e-19,2.0641989999999997e-19,6.094732e-20
2,Upper,-2.583769,0.8700077,3.603151,-0.7518896,0.0168805
3,Lower,-2.583769,0.8700077,3.603151,-0.7518896,0.0168805


# Question
**”Is there an observer bias in the data collected for the small-diameter flow measurements?"**

The significant of the observers is not very significant, look at the significance for individual parameters. Beta-4, Observer, has low P-value, below 0.05, pointing towards it being not significant.

If we run and compare the regression with or without Observer as a feature, we see very little change in the values for Variance, Standard deviation, Significance of regression and Relevance of regression. Further suggesting that the significans and effect of that feature has on the regression is very small.




In [619]:
# Comparing regression with Observar as a feature or not

mlr_0 = lrf.LinearRegression(flow_data)
mlr_1 = lrf.LinearRegression(flow_data)

mlr_1.sample_size = ["Flow"]
mlr_1.params = ["Kinematic", "Geometric", "Inertial", "Observer"] 
mlr_0.sample_size = ["Flow"]
mlr_0.params = ["Kinematic", "Geometric", "Inertial"] 

mlr_1.fit()
mlr_0.fit()

print(f"Variance:                     All varibales: {mlr_1.variance()}")
print(f"                              No Observer:   {mlr_0.variance()}\n")

print(f"Standard deviation:           All varibales: {mlr_1.sigma()}")
print(f"                              No Observer:   {mlr_0.sigma()}\n")

print(f"Significance of regression:   All varibales: {mlr_1.significans()}")
print(f"                              No Observer:   {mlr_0.significans()}\n")

print(f"Relevance of regression (R2): All varibales: {mlr_1.R2()}")
print(f"                              No Observer:   {mlr_0.R2()}\n")

print(f"Confidence level, %:          All varibales: {mlr_1.R2()* 100:.4f}")
print(f"                              No Observer:   {mlr_0.R2()* 100:.4f}\n")


Variance:                     All varibales: 0.006272292538356665
                              No Observer:   0.0063086854875835045

Standard deviation:           All varibales: 0.07919780639864128
                              No Observer:   0.07942723391622993

Significance of regression:   All varibales: 1.7265182348561856e-139
                              No Observer:   7.998510998158917e-141

Relevance of regression (R2): All varibales: 0.9971526073276518
                              No Observer:   0.997121247321077

Confidence level, %:          All varibales: 99.7153
                              No Observer:   99.7121

