In [16]:
# All the packages and imports
import pandas as pd 
import researchpy as rp
import numpy as np
from scipy.stats import pearsonr
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.outliers_influence import variance_inflation_factor
import patsy


# Read the dataset
rq2 = pd.read_csv('rq2dataset.csv')

In [17]:
dat1 = rq2['General_trust']
dat2 = rq2['Attitude']
dat3 =  rq2['Perceived_trust']

xyz = pd.DataFrame({'xv' : dat1,
                   'yv' : dat2,
                   'zv':dat3})

xyz.corr()

Unnamed: 0,xv,yv,zv
xv,1.0,0.100584,0.152855
yv,0.100584,1.0,0.200025
zv,0.152855,0.200025,1.0


# RQ2 - Linear regression

In [18]:
model4 = smf.ols("Perceived_trust ~ General_trust + Attitude", rq2).fit()
model4.summary()

0,1,2,3
Dep. Variable:,Perceived_trust,R-squared:,0.058
Model:,OLS,Adj. R-squared:,0.055
Method:,Least Squares,F-statistic:,24.94
Date:,"Thu, 25 May 2023",Prob (F-statistic):,3.07e-11
Time:,17:16:32,Log-Likelihood:,-1095.3
No. Observations:,816,AIC:,2197.0
Df Residuals:,813,BIC:,2211.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,1.9102,0.144,13.285,0.000,1.628,2.192
General_trust,0.0653,0.017,3.919,0.000,0.033,0.098
Attitude,0.1734,0.032,5.452,0.000,0.111,0.236

0,1,2,3
Omnibus:,14.411,Durbin-Watson:,1.767
Prob(Omnibus):,0.001,Jarque-Bera (JB):,12.206
Skew:,-0.229,Prob(JB):,0.00224
Kurtosis:,2.613,Cond. No.,30.0


## Check for assumptions

In [23]:
# This creates the design matrix used by StatsModels
x = patsy.dmatrix("General_trust + Attitude", data = rq2)

vif = pd.DataFrame()
vif["VIF Factor"] = [variance_inflation_factor(x, i) for i in range(x.shape[1])]
vif["Features"] = x.design_info.term_names

vif

Unnamed: 0,VIF Factor,Features
0,19.596944,Intercept
1,1.01022,General_trust
2,1.01022,Attitude


In [24]:
x = np.asarray(patsy.dmatrix("General_trust + Attitude", data = rq2))

In [25]:
# Getting the singular values from SVD

_, sing_as, _ = np.linalg.svd(x)


# Calculating the condiction index

condition_index = []

for n in sing_as:
    ci = sing_as.max() / n
    condition_index.append(ci)

condition_index

[1.0, 4.945545198885372, 30.02896501939751]

In [26]:
eigen_vals = (sing_as * sing_as).round(3)

pd.DataFrame(np.c_[eigen_vals, condition_index],
             columns = ["Eigenvalues", "Condition Index"]).round(1)

Unnamed: 0,Eigenvalues,Condition Index
0,36342.3,1.0
1,1485.9,4.9
2,40.3,30.0
