In [257]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer, SimpleImputer
from sklearn.linear_model import PoissonRegressor
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from factor_analyzer import FactorAnalyzer
from factor_analyzer import ConfirmatoryFactorAnalyzer, ModelSpecificationParser
from openpyxl import load_workbook
import pandas as pd
import statsmodels.api as sm

In [258]:
BaseDatos = pd.read_excel('InfantMortal-2.xlsx', sheet_name='ACPEjercicio')
BaseDatos.shape

df = pd.DataFrame(BaseDatos)
df.head()

Unnamed: 0,Column1,Fertility,Agriculture,Examination,Education,Catholic,Infant.Mortality
0,Courtelary,80.2,17.0,15,12,9.96,22.2
1,Delemont,83.1,45.1,6,9,84.84,22.2
2,Franches-Mnt,92.5,39.7,5,5,93.4,20.2
3,Moutier,85.8,36.5,12,7,33.77,20.3
4,Neuveville,76.9,43.5,17,15,5.16,20.6


In [259]:
df_number = BaseDatos.select_dtypes(include=['number'])

In [260]:
scaler = StandardScaler()
dfEscalado = scaler.fit_transform(df_number)
dfEscalado = pd.DataFrame(dfEscalado, columns=df_number.columns)
dfEscalado.head()

Unnamed: 0,Fertility,Agriculture,Examination,Education,Catholic,Infant.Mortality
0,0.813835,-1.498091,-0.188705,0.107361,-0.75581,0.783416
1,1.048499,-0.247441,-1.32902,-0.208012,1.059075,0.783416
2,1.809134,-0.487779,-1.455721,-0.628508,1.266546,0.089343
3,1.266979,-0.630202,-0.56881,-0.41826,-0.178721,0.124047
4,0.546803,-0.318652,0.064699,0.422733,-0.872149,0.228158


In [261]:
print("Promedio:")
print(dfEscalado.mean())
print("\nDesviación estándar:")
print(dfEscalado.std())

Promedio:
Fertility           1.681870e-15
Agriculture         7.653452e-16
Examination        -3.543265e-17
Education          -3.779483e-17
Catholic           -1.192899e-16
Infant.Mortality    6.808974e-16
dtype: float64

Desviación estándar:
Fertility           1.010811
Agriculture         1.010811
Examination         1.010811
Education           1.010811
Catholic            1.010811
Infant.Mortality    1.010811
dtype: float64


In [262]:
model_dict = {
    "Factor1": ["Agriculture", "Examination", "Education"],
    "Factor2": ["Catholic"],
    "Factor3": ["Fertility", "Infant.Mortality"]
}

model_spec = ModelSpecificationParser.parse_model_specification_from_dict(BaseDatos, model_dict)

cfa = ConfirmatoryFactorAnalyzer(model_spec, disp=False)
cfa.fit(dfEscalado)



In [263]:
loadings = pd.DataFrame(cfa.loadings_, index=df_number.columns, columns=model_dict.keys())
print("Cargas factoriales:\n", loadings.round(3))



Cargas factoriales:
                   Factor1  Factor2  Factor3
Fertility           0.624    0.000    0.000
Agriculture         0.567    0.000    0.000
Examination        -0.785    0.000    0.000
Education           0.000    0.807    0.000
Catholic            0.000    0.000    0.328
Infant.Mortality    0.000    0.000    0.293


In [264]:
scores = cfa.transform(dfEscalado)
df_scores = pd.DataFrame(scores, columns=model_dict.keys())



In [265]:
df_reg = BaseDatos.copy()
df_reg = pd.concat([df_reg, df_scores], axis=1)

In [266]:
af1 = cfa.loadings_[:,0]*dfEscalado
af2 = cfa.loadings_[:,1]*dfEscalado
af3 = cfa.loadings_[:,2]*dfEscalado

af1.head()

Unnamed: 0,Fertility,Agriculture,Examination,Education,Catholic,Infant.Mortality
0,0.508084,-0.850144,0.148065,0.0,-0.0,0.0
1,0.654586,-0.140419,1.042802,-0.0,0.0,0.0
2,1.129457,-0.276807,1.142217,-0.0,0.0,0.0
3,0.790985,-0.35763,0.446311,-0.0,-0.0,0.0
4,0.341374,-0.18083,-0.050765,0.0,-0.0,0.0


In [267]:
af2.head()

Unnamed: 0,Fertility,Agriculture,Examination,Education,Catholic,Infant.Mortality
0,0.0,-0.0,-0.0,0.086692,-0.0,0.0
1,0.0,-0.0,-0.0,-0.167966,0.0,0.0
2,0.0,-0.0,-0.0,-0.50751,0.0,0.0
3,0.0,-0.0,-0.0,-0.337738,-0.0,0.0
4,0.0,-0.0,0.0,0.34135,-0.0,0.0


In [268]:
af3.head()

Unnamed: 0,Fertility,Agriculture,Examination,Education,Catholic,Infant.Mortality
0,0.0,-0.0,-0.0,0.0,-0.247721,0.229709
1,0.0,-0.0,-0.0,-0.0,0.347118,0.229709
2,0.0,-0.0,-0.0,-0.0,0.415118,0.026197
3,0.0,-0.0,-0.0,-0.0,-0.058577,0.036372
4,0.0,-0.0,0.0,0.0,-0.285852,0.066899


In [269]:
df['RegAF1']=af1.sum(axis=1)
df['RegAF2']=af2.sum(axis=1)
df['RegAF3']=af3.sum(axis=1)

df

Unnamed: 0,Column1,Fertility,Agriculture,Examination,Education,Catholic,Infant.Mortality,RegAF1,RegAF2,RegAF3
0,Courtelary,80.2,17.0,15,12,9.96,22.2,-0.193995,0.086692,-0.018012
1,Delemont,83.1,45.1,6,9,84.84,22.2,1.55697,-0.167966,0.576827
2,Franches-Mnt,92.5,39.7,5,5,93.4,20.2,1.994867,-0.50751,0.441315
3,Moutier,85.8,36.5,12,7,33.77,20.3,0.879666,-0.337738,-0.022205
4,Neuveville,76.9,43.5,17,15,5.16,20.6,0.109778,0.34135,-0.218953
5,Porrentruy,76.1,35.3,9,7,90.57,26.6,0.657577,-0.337738,1.070072
6,Broye,83.8,70.2,16,7,92.85,23.6,1.232134,-0.337738,0.782916
7,Glane,92.4,67.8,14,8,97.16,24.9,1.804803,-0.252852,0.949437
8,Gruyere,82.4,53.3,12,7,97.67,21.0,1.132224,-0.337738,0.55664
9,Sarine,82.9,45.2,16,13,91.38,24.4,0.55524,0.171578,0.852643


In [270]:
# with pd.ExcelWriter('InfantMortal-2.xlsx', engine='openpyxl', mode='a') as writer:
#     BaseDatos.to_excel(writer, sheet_name='Desarrollo', index=False)

In [271]:
df.head()

Unnamed: 0,Column1,Fertility,Agriculture,Examination,Education,Catholic,Infant.Mortality,RegAF1,RegAF2,RegAF3
0,Courtelary,80.2,17.0,15,12,9.96,22.2,-0.193995,0.086692,-0.018012
1,Delemont,83.1,45.1,6,9,84.84,22.2,1.55697,-0.167966,0.576827
2,Franches-Mnt,92.5,39.7,5,5,93.4,20.2,1.994867,-0.50751,0.441315
3,Moutier,85.8,36.5,12,7,33.77,20.3,0.879666,-0.337738,-0.022205
4,Neuveville,76.9,43.5,17,15,5.16,20.6,0.109778,0.34135,-0.218953


In [272]:
#"Fertility","Agriculture","Examination","Education","Catholic","Infant.Mortality","RegAF1","RegAF2","Catholic"

#Fertility           Factor 3
#Agriculture         Factor 1
#Examination         Factor 1
#Education           Factor 1
#Catholic            Factor 2
#Infant.Mortality    Factor 3

y = df["Infant.Mortality"]
x = df[['RegAF1','RegAF2']]
x= sm.add_constant(x)

mdl = sm.OLS(y,x)
mdl_fit = mdl.fit()


In [273]:
print(mdl_fit.summary())

                            OLS Regression Results                            
Dep. Variable:       Infant.Mortality   R-squared:                       0.041
Model:                            OLS   Adj. R-squared:                 -0.002
Method:                 Least Squares   F-statistic:                    0.9507
Date:                Mon, 19 May 2025   Prob (F-statistic):              0.394
Time:                        11:50:12   Log-Likelihood:                -115.44
No. Observations:                  47   AIC:                             236.9
Df Residuals:                      44   BIC:                             242.4
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         19.9426      0.425     46.889      0.0