In [None]:
import random
random.seed(0)
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder
from factor_analyzer import FactorAnalyzer
from matplotlib import pyplot as plt
import pingouin as pg

In [None]:
df = pd.read_csv('Book1.csv')
df.head(3)
factors = df[['p1','p2','p3','p4','p5','s1','s2','s3','s4','s5','c1','c2','c3','c4','c5','e1','e2','e3','e4','e5']]

In [None]:
oe = OrdinalEncoder(
    categories=[['1', '2', '3', '4', '5']]
)

for col in factors:
    oe.fit_transform(df[[col]])

In [None]:
from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity
chisquare, p_value = calculate_bartlett_sphericity(factors)
frame = {
    'Chi square':[chisquare],
    'p':[p_value]
}

bartletts_dataframe = pd.DataFrame(frame, columns=['Chi square', 'p'], index=['Bartletts test'])
bartletts_dataframe.to_csv('bartletts.csv')
bartletts_dataframe

In [None]:
from factor_analyzer.factor_analyzer import calculate_kmo
kmo_all, kmo_model = calculate_kmo(factors)
kmo_values = pd.DataFrame(kmo_all, columns=['KMO'], index=[factors.columns])
kmo_values.to_csv('kmo.csv')
kmo_values, kmo_model

In [None]:
x = [[i] for i in range(21)]
y = [1 for i in range(21)]

fa = FactorAnalyzer()
fa.fit(factors)
ev, v = fa.get_eigenvalues()
eigen_values_df = pd.DataFrame(ev, columns=['Eigenvalues'])
eigen_values_df.to_csv('Eigenvalues.csv')

plt.figure(figsize=(9, 5))
eigen_values_df.plot()
plt.scatter([i for i in range(len(eigen_values_df))], eigen_values_df)
plt.title('Scree Plot', size=15)
plt.xlabel('Factors')
plt.ylabel('Eigenvalue')
plt.plot(x, y, c='r', linewidth=4)
plt.show()

In [None]:
fa = FactorAnalyzer(n_factors=4, method='minres',rotation='oblimin')
fa.fit(factors)

In [None]:
factor_loadings = pd.DataFrame(fa.loadings_, columns=['Factor 1', 'Factor 2', 'Factor 3', 'Factor 4'], index=[factors.columns])
factor_loadings['Uniquenesses'] = fa.get_uniquenesses()
factor_loadings['Communality'] = fa.get_communalities()
factor_loadings.to_csv('loadings.csv')
factor_loadings

In [None]:
p = factors[['p1', 'p2', 'p3', 'p4', 'p5']]
s = factors[['s1', 's2', 's3', 's4', 's5']]
c = factors[['c1', 'c2', 'c3', 'c4', 'c5']]
e = factors[['e1', 'e2', 'e3', 'e4', 'e5']]

cronbach_p = pg.cronbach_alpha(data=p)
cronbach_s = pg.cronbach_alpha(data=s)
cronbach_c = pg.cronbach_alpha(data=c)
cronbach_e = pg.cronbach_alpha(data=e)

values = [
    cronbach_p[0],
    cronbach_s[0],
    cronbach_c[0],
    cronbach_e[0],
]

cronbach_all = pd.DataFrame(values, columns=['Cronbach alpha'], index=['Personal factor', 'Social factor', 'Course factor', 'Economic factor'])
cronbach_all.to_csv('Cronbach.csv')
cronbach_all

In [None]:
def compute_cronbach_if_one_item_removed(dataframe):
    values = []
    new_df = dataframe.copy()
    for col in new_df:
        one_item_removed = dataframe.drop([col], axis=1)
        result = pg.cronbach_alpha(data=one_item_removed)
        values.append(result[0])
        new_df = dataframe.copy()
    return values

def compute_the_cronbach_and_save_df(dataframe, title):
    details = {
        title: compute_cronbach_if_one_item_removed(dataframe)
    }
    results = pd.DataFrame(details, index=dataframe.columns)
    results.to_csv(f'{title} if item removed.csv')

In [None]:
compute_the_cronbach_and_save_df(p, "Personal factor")
compute_the_cronbach_and_save_df(c, "Course factor")
compute_the_cronbach_and_save_df(e, "Economic factor")
compute_the_cronbach_and_save_df(s, "Social factor")

In [None]:
factor_summary = pd.DataFrame(fa.get_factor_variance(), columns=['Factor 1','Factor 2','Factor 3','Factor 4'], index=['SS Loadings','Proportion Var','Cumulative Var'])
factor_summary.to_csv('factor_variance.csv')
factor_summary

In [None]:
factor_score = pd.DataFrame(fa.transform(factors), columns=['Economic factor', 'Course factor', 'Personal factor', 'Social factor'])
factor_score.to_csv("Factors.csv", index=False)