In [1]:
import pandas as pd
import numpy as np
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample
import statsmodels.api as sm
from scipy import stats
from sklearn.linear_model import LogisticRegression
from statsmodels.stats.diagnostic import het_breuschpagan
from statsmodels.stats.stattools import durbin_watson
from statsmodels.tools import eval_measures
import itertools
import warnings

warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv('data/cdv.csv')

X = df.drop(columns=['Y'])
df['Y'] = df['Y'].astype('category')
y_cat = df['Y'].cat.reorder_categories([2,1,3], ordered=True)

n = len(df['Y'])
p = df.shape[1]
k = p - 1

#### Cek multikolinearitas

In [3]:
X_const = add_constant(X)
vif_df = pd.DataFrame()
vif_df["Variable"] = X_const.columns
vif_df["VIF"] = [variance_inflation_factor(X_const.values, i) for i in range(X_const.shape[1])]

vif_df = vif_df[vif_df["Variable"] != "const"]
if (vif_df["VIF"] <= 10).all():
    print("✅ Asumsi non-multikolinearitas terpenuhi")
else:
    print("⚠️ Ada variabel dengan VIF > 10, indikasi multikolinearitas")

✅ Asumsi non-multikolinearitas terpenuhi


Isi X: X_rasio & kombinasi X_ctg

### V1

In [4]:
rasio_vars = ['X1', 'X4', 'X5', 'X6', 'X7', 'X8']
ctg_vars = ['X2','X3','X9','X10']

ctg_combinations = []
for r in range(1, len(ctg_vars)+1):
    ctg_combinations += list(itertools.combinations(ctg_vars, r))
results = []

In [5]:
for combo in ctg_combinations:
    predictors_raw = rasio_vars + list(combo)
    X = df[predictors_raw]
    X_model = sm.add_constant(X, has_constant='add')

    try:
        model = sm.MNLogit(y_cat, X_model)
        result = model.fit()
        llf = result.llf
        llnull = result.llnull
        pseudo_r2 = 1 - (llf / llnull)

        if pd.notna(pseudo_r2):
            summary_text = result.summary().as_text()
            results.append({
                "kombinasi_kategori": "+".join(combo),
                "predictors_raw": predictors_raw,
                "predictors_in_model": list(X_model.columns),
                "n_vars": X_model.shape[1],
                "Pseudo_R2": pseudo_r2,
                "AIC": result.aic,
                "BIC": result.bic,
                "summary": summary_text
            })

    except Exception as e:
        print(f"Model gagal untuk kombinasi {combo}: {e}")
        continue

Optimization terminated successfully.
         Current function value: 0.561501
         Iterations 10
Optimization terminated successfully.
         Current function value: nan
         Iterations 17
Optimization terminated successfully.
         Current function value: nan
         Iterations 21
Optimization terminated successfully.
         Current function value: nan
         Iterations 13
Optimization terminated successfully.
         Current function value: nan
         Iterations 28
Optimization terminated successfully.
         Current function value: nan
         Iterations 19
Optimization terminated successfully.
         Current function value: nan
         Iterations 16
Optimization terminated successfully.
         Current function value: nan
         Iterations 27
Optimization terminated successfully.
         Current function value: nan
         Iterations 16
Optimization terminated successfully.
         Current function value: nan
         Iterations 19
Optimization te

In [6]:
results_df = pd.DataFrame(results)
results_df = results_df.drop(columns=["summary"])
results_df = results_df.sort_values(by="Pseudo_R2", ascending=False)

print("\n=== HASIL MODEL VALID (Pseudo R² tidak NaN) ===\n")
results_df


=== HASIL MODEL VALID (Pseudo R² tidak NaN) ===



Unnamed: 0,kombinasi_kategori,predictors_raw,predictors_in_model,n_vars,Pseudo_R2,AIC,BIC
0,X2,"[X1, X4, X5, X6, X7, X8, X2]","[const, X1, X4, X5, X6, X7, X8, X2]",8,0.463516,55.583023,72.295382


In [7]:
print(results[0]["summary"])

                          MNLogit Regression Results                          
Dep. Variable:                      Y   No. Observations:                   21
Model:                        MNLogit   Df Residuals:                        5
Method:                           MLE   Df Model:                           14
Date:                Tue, 09 Dec 2025   Pseudo R-squ.:                  0.4635
Time:                        10:11:04   Log-Likelihood:                -11.792
converged:                       True   LL-Null:                       -21.979
Covariance Type:            nonrobust   LLR p-value:                    0.1187
       Y=1       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         16.3992     14.746      1.112      0.266     -12.503      45.302
X1            -1.3000      0.805     -1.616      0.106      -2.877       0.277
X4             0.0556      0.127      0.436      0.6

Karena uji wald pada hasil di atas menunjukkan tidak ada variabel x yang signifikan, maka dilakukan kombinasi x rasio tanpa distandarisasi dengan x_ctg (X2 dan X3 saja) 

### V2

In [8]:
def build_mnlogit_model(vars_to_use, y):
    X = df[vars_to_use]
    X_model = sm.add_constant(X, has_constant='add')
    model = sm.MNLogit(y, X_model)
    result = model.fit(method='newton', disp=False)
    return result

ctg_vars = ['X2', 'X3', 'X9', 'X10']
num_rasio_to_use = 6
results = []
for num_rasio in range(1, num_rasio_to_use + 1):
    for comb in itertools.combinations(rasio_vars, num_rasio):
        for cat_comb in itertools.combinations(ctg_vars, 1):  # Kombinasi X2 dan X3
            for cat_var in cat_comb:
                vars_to_use = list(comb) + [cat_var]
                try:
                    result = build_mnlogit_model(vars_to_use, y_cat)
                    llf = result.llf
                    llnull = result.llnull
                    pseudo_r2 = 1 - (llf / llnull)

                    if pd.notna(pseudo_r2):
                        summary_text = result.summary().as_text()
                        p_values = result.pvalues
                        p_values_without_const = p_values.drop('const', errors='ignore')
                        significant_vars_in_both = (p_values_without_const < 0.1).any(axis=1).sum()
                        results.append({
                            "kombinasi_kategori": "+".join([cat_var] + list(comb)),
                            "predictors_raw": str(list(comb) + [cat_var]),
                            "predictors_in_model": str(list(sm.add_constant(df[vars_to_use], has_constant='add').columns)),
                            "n_vars": len(vars_to_use) + 1,
                            "Pseudo_R2": pseudo_r2,
                            "AIC": result.aic,
                            "BIC": result.bic,
                            "significant_vars_count": significant_vars_in_both,
                            "summary": summary_text
                        })
                except Exception as e:
                    print(f"Model gagal untuk kombinasi {vars_to_use}: {e}")
                    continue

Model gagal untuk kombinasi ['X1', 'X4', 'X5', 'X9']: Singular matrix
Model gagal untuk kombinasi ['X1', 'X5', 'X7', 'X9']: Singular matrix


In [9]:
results_df = pd.DataFrame(results)
for col in results_df.columns:
    if results_df[col].dtype == 'object':
        results_df[col] = results_df[col].astype(str)
results_df = results_df.drop(columns=["summary"])
# results_df

In [10]:
# results_df[results_df["n_vars"] == 5].sort_values(by="significant_vars_count", ascending=False)
# results_df[results_df["significant_vars_count"] >= 5].sort_values(by="Pseudo_R2", ascending=False)
results_df[results_df["significant_vars_count"] >= 2].sort_values(by="Pseudo_R2", ascending=False)

Unnamed: 0,kombinasi_kategori,predictors_raw,predictors_in_model,n_vars,Pseudo_R2,AIC,BIC,significant_vars_count
187,X10+X4+X5+X6+X8,"['X4', 'X5', 'X6', 'X8', 'X10']","['const', 'X4', 'X5', 'X6', 'X8', 'X10']",6,0.586738,42.166388,54.700658,2
173,X10+X1+X5+X6+X7,"['X1', 'X5', 'X6', 'X7', 'X10']","['const', 'X1', 'X5', 'X6', 'X7', 'X10']",6,0.537283,44.340321,56.87459,2
184,X10+X4+X5+X6+X7,"['X4', 'X5', 'X6', 'X7', 'X10']","['const', 'X4', 'X5', 'X6', 'X7', 'X10']",6,0.528052,44.746139,57.280408,2
148,X9+X5+X7+X8,"['X5', 'X7', 'X8', 'X9']","['const', 'X5', 'X7', 'X8', 'X9']",5,0.51617,41.26843,51.713654,2
100,X10+X1+X5+X6,"['X1', 'X5', 'X6', 'X10']","['const', 'X1', 'X5', 'X6', 'X10']",5,0.501245,41.92452,52.369745,2
128,X10+X4+X5+X8,"['X4', 'X5', 'X8', 'X10']","['const', 'X4', 'X5', 'X8', 'X10']",5,0.462797,43.614613,54.059837,2
143,X10+X5+X6+X7,"['X5', 'X6', 'X7', 'X10']","['const', 'X5', 'X6', 'X7', 'X10']",5,0.451402,44.115522,54.560747,3
70,X10+X5+X8,"['X5', 'X8', 'X10']","['const', 'X5', 'X8', 'X10']",4,0.450205,40.168159,48.524339,3
121,X10+X4+X5+X6,"['X4', 'X5', 'X6', 'X10']","['const', 'X4', 'X5', 'X6', 'X10']",5,0.429006,45.10001,55.545235,3
63,X10+X5+X6,"['X5', 'X6', 'X10']","['const', 'X5', 'X6', 'X10']",4,0.382617,43.139208,51.495388,3


In [12]:
print(results[143]["summary"])

                          MNLogit Regression Results                          
Dep. Variable:                      Y   No. Observations:                   21
Model:                        MNLogit   Df Residuals:                       11
Method:                           MLE   Df Model:                            8
Date:                Tue, 09 Dec 2025   Pseudo R-squ.:                  0.4514
Time:                        10:11:13   Log-Likelihood:                -12.058
converged:                       True   LL-Null:                       -21.979
Covariance Type:            nonrobust   LLR p-value:                   0.01095
       Y=1       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          2.3318      7.774      0.300      0.764     -12.905      17.568
X5             0.0975      0.053      1.825      0.068      -0.007       0.202
X6             0.8199      0.534      1.537      0.1