In [1]:
import pandas as pd
import numpy as np
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample
import statsmodels.api as sm
from scipy import stats
from sklearn.linear_model import LogisticRegression
from statsmodels.stats.diagnostic import het_breuschpagan
from statsmodels.stats.stattools import durbin_watson
from statsmodels.tools import eval_measures
import itertools
import warnings

warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv('data/cdv.csv')

X = df.drop(columns=['Y'])
df['Y'] = df['Y'].astype('category')
y_cat = df['Y'].cat.reorder_categories([2,1,3], ordered=True)

### Dengan Variabel Dummy

In [3]:
X_const = add_constant(X)
vif_df = pd.DataFrame()
vif_df["Variable"] = X_const.columns
vif_df["VIF"] = [variance_inflation_factor(X_const.values, i) for i in range(X_const.shape[1])]

vif_df = vif_df[vif_df["Variable"] != "const"]
if (vif_df["VIF"] <= 10).all():
    print("✅ Asumsi non-multikolinearitas terpenuhi")
else:
    print("⚠️ Ada variabel dengan VIF > 10, indikasi multikolinearitas")

✅ Asumsi non-multikolinearitas terpenuhi


In [4]:
def buildMNLogit(used_cols, y):
    X_const = sm.add_constant(X[used_cols])
    model = sm.MNLogit(y, X_const)
    result = model.fit()
    ll_full = result.llf
    ll_null = result.llnull
    pseudo_r2 = 1 - (ll_full / ll_null)
    
    null_model = sm.MNLogit(y, sm.add_constant(np.ones(len(X[used_cols]))))
    null_result = null_model.fit()
    
    # Likelihood ratio test statistic
    G_stat = 2 * (ll_full - ll_null)
    df_diff = len(result.params) - len(null_result.params) 
    p_value_g = 1 - stats.chi2.cdf(G_stat, df_diff)
    
    print(f"Pseudo R² (McFadden): {pseudo_r2}")
    print(f"AIC: {result.aic}")
    print(f"BIC: {result.bic}")
    print(f"G-test statistic: {G_stat}")
    print(f"P-value for G-test: {p_value_g}\n")
    print(result.summary())

In [5]:
used_cols = ['X5', 'X6', 'X7', 'X10']
buildMNLogit(['X5', 'X6', 'X7', 'X10'], y_cat)

Optimization terminated successfully.
         Current function value: 0.574179
         Iterations 8
Optimization terminated successfully.
         Current function value: 1.046630
         Iterations 5
Pseudo R² (McFadden): 0.45140227935478083
AIC: 44.11552217400804
BIC: 54.560746551242275
G-test statistic: 19.842958268902272
P-value for G-test: 0.000536337075990656

                          MNLogit Regression Results                          
Dep. Variable:                      Y   No. Observations:                   21
Model:                        MNLogit   Df Residuals:                       11
Method:                           MLE   Df Model:                            8
Date:                Thu, 11 Dec 2025   Pseudo R-squ.:                  0.4514
Time:                        08:09:30   Log-Likelihood:                -12.058
converged:                       True   LL-Null:                       -21.979
Covariance Type:            nonrobust   LLR p-value:                   0.01

In [6]:
buildMNLogit(['X5', 'X6', 'X10'], y_cat)

Optimization terminated successfully.
         Current function value: 0.646172
         Iterations 8
Optimization terminated successfully.
         Current function value: 1.046630
         Iterations 5
Pseudo R² (McFadden): 0.3826172327434234
AIC: 43.13920830023807
BIC: 51.495387802025455
G-test statistic: 16.819272142672244
P-value for G-test: 0.0007698746380390142

                          MNLogit Regression Results                          
Dep. Variable:                      Y   No. Observations:                   21
Model:                        MNLogit   Df Residuals:                       13
Method:                           MLE   Df Model:                            6
Date:                Thu, 11 Dec 2025   Pseudo R-squ.:                  0.3826
Time:                        08:09:30   Log-Likelihood:                -13.570
converged:                       True   LL-Null:                       -21.979
Covariance Type:            nonrobust   LLR p-value:                  0.009

In [7]:
# buildMNLogit(['X5', 'X6', 'X10'], y_cat)

In [8]:
# buildMNLogit(['X5', 'X10'], y_cat)

### Tanpa Variabel Dummy

In [9]:
rasio_vars = ['X5', 'X6']
buildMNLogit(rasio_vars, y_cat)

Optimization terminated successfully.
         Current function value: 0.931389
         Iterations 6
Optimization terminated successfully.
         Current function value: 1.046630
         Iterations 5
Pseudo R² (McFadden): 0.11010666911503386
AIC: 51.1183585819831
BIC: 57.38549320832364
G-test statistic: 4.840121860927212
P-value for G-test: 0.08891619958906827

                          MNLogit Regression Results                          
Dep. Variable:                      Y   No. Observations:                   21
Model:                        MNLogit   Df Residuals:                       15
Method:                           MLE   Df Model:                            4
Date:                Thu, 11 Dec 2025   Pseudo R-squ.:                  0.1101
Time:                        08:09:30   Log-Likelihood:                -19.559
converged:                       True   LL-Null:                       -21.979
Covariance Type:            nonrobust   LLR p-value:                    0.3041
