In [1]:
import pandas as pd
import numpy as np
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample
import statsmodels.api as sm
from scipy import stats
from sklearn.linear_model import LogisticRegression
from statsmodels.stats.diagnostic import het_breuschpagan
from statsmodels.stats.stattools import durbin_watson
from statsmodels.tools import eval_measures
import warnings

warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv('data/cdv.csv')
print(df.columns.tolist())

['X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', 'Y']


In [3]:
# df.describe()

In [4]:
X = df.drop(columns=['Y'])
df['Y'] = df['Y'].astype('category')
y_cat = df['Y'].cat.reorder_categories([2,1,3], ordered=True)
print(y_cat.value_counts())

n = len(df['Y'])
p = df.shape[1]
k = p - 1

Y
2    9
1    8
3    4
Name: count, dtype: int64


#### Cek multikolinearitas

In [5]:
X_const = add_constant(X)
vif_df = pd.DataFrame()
vif_df["Variable"] = X_const.columns
vif_df["VIF"] = [variance_inflation_factor(X_const.values, i) for i in range(X_const.shape[1])]
print(vif_df)

   Variable         VIF
0     const  310.528334
1        X1    1.659555
2        X2    1.321321
3        X3    1.370177
4        X4    2.297288
5        X5    2.571857
6        X6    2.002548
7        X7    1.737764
8        X8    2.139399
9        X9    2.152826
10      X10    1.870077


#### Standarisasi Variabel Rasio

In [6]:
ctg_vars = ['X2','X3','X9','X10']
rasio_vars = ['X1', 'X4', 'X5', 'X6', 'X7', 'X8']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X[rasio_vars])
X_scaled_df = pd.DataFrame(X_scaled, columns=rasio_vars, index=X.index)

Isi X:
1. X_ctg + X_rasio tanpa distandarisasi
2. X_ctg + X_rasio distandarisasi
3. X_rasio tanpa distandarisasi
4. X_rasio distandarisasi
5. X_ctg 

In [7]:
def run_multinomial_logit(X, y):
    X = sm.add_constant(X)
    model = sm.MNLogit(y, X)
    result = model.fit()
    
    llf = result.llf
    llnull = result.llnull
    pseudo_r2 = 1 - (llf / llnull)
    summary_text = result.summary().as_text()
    if np.isnan(pseudo_r2):
        return None  
    return {
        'pseudor2': pseudo_r2,
        'loglik': llf,
        'aic': result.aic,
        'bic': result.bic,
        'iterations': result.mle_retvals['iterations'],
        'converged': result.mle_retvals['converged'],
        'params': result.params,
        'summary': summary_text
    }

In [8]:
combinations = {
    'All Rasio & Kategori': pd.concat([X[rasio_vars], X[ctg_vars]], axis=1),
    'Standardized Rasi & Kategori': pd.concat([X[ctg_vars], X_scaled_df], axis=1),
    'Only Rasio': X[rasio_vars],
    'Only Kategori': X[ctg_vars],
    'Only Standardized Rasio': X_scaled_df
}

In [9]:
results = []
for combo_name, combo_data in combinations.items():
    result = run_multinomial_logit(combo_data, y_cat)
    
    if result:
        results.append({
            'Combination': combo_name,
            'Pseudo R²': result['pseudor2'],
            'Iterations': result['iterations'],
            'Converged': result['converged'],
            'LogLik': result['loglik'],
            'AIC': result['aic'],
            'BIC': result['bic'],
            'Summary': result['summary'],
        })

Optimization terminated successfully.
         Current function value: nan
         Iterations 17
Optimization terminated successfully.
         Current function value: nan
         Iterations 23
Optimization terminated successfully.
         Current function value: 0.665189
         Iterations 7
Optimization terminated successfully.
         Current function value: nan
         Iterations 25
Optimization terminated successfully.
         Current function value: 0.665189
         Iterations 7


In [10]:
results_df = pd.DataFrame(results)
results_df = results_df.drop(columns=["Summary"])
print(results_df)

               Combination  Pseudo R²  Iterations  Converged    LogLik  \
0               Only Rasio   0.364448           7       True -13.96896   
1  Only Standardized Rasio   0.364448           7       True -13.96896   

        AIC        BIC  
0  55.93792  70.561234  
1  55.93792  70.561234  


In [12]:
print(results[0]["Summary"])
print("\033[34m" + "="*78 + "\033[0m")
print(results[1]["Summary"])

                          MNLogit Regression Results                          
Dep. Variable:                      Y   No. Observations:                   21
Model:                        MNLogit   Df Residuals:                        7
Method:                           MLE   Df Model:                           12
Date:                Mon, 08 Dec 2025   Pseudo R-squ.:                  0.3644
Time:                        19:01:54   Log-Likelihood:                -13.969
converged:                       True   LL-Null:                       -21.979
Covariance Type:            nonrobust   LLR p-value:                    0.1903
       Y=1       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         18.9908     14.673      1.294      0.196      -9.767      47.749
X1            -1.3877      0.818     -1.696      0.090      -2.991       0.216
X4             0.0376      0.127      0.296      0.7