In [90]:
import pandas as pd
import numpy as np
import os

path=os.path.join(os.path.dirname(os.getcwd()), 'data', 'wine_quality_classification.csv')
data=pd.read_csv(path)
data['quality_label']=pd.Categorical(data['quality_label'], categories=['low', 'medium', 'high'], ordered=True)
qualities={'high':data[data['quality_label']=='high'], 'medium':data[data['quality_label']=='medium'], 'low':data[data['quality_label']=='low']}

In [84]:
from statsmodels.miscmodels.ordinal_model import OrderedModel

x=data[['fixed_acidity','residual_sugar','alcohol','density']]
y=data['quality_label']
model = OrderedModel(y, x,distr='logit')

result=model.fit(method='bfgs')
print(result.summary())

Optimization terminated successfully.
         Current function value: 1.093980
         Iterations: 38
         Function evaluations: 41
         Gradient evaluations: 41
                             OrderedModel Results                             
Dep. Variable:          quality_label   Log-Likelihood:                -1094.0
Model:                   OrderedModel   AIC:                             2200.
Method:            Maximum Likelihood   BIC:                             2229.
Date:                Sat, 17 May 2025                                         
Time:                        09:53:28                                         
No. Observations:                1000                                         
Df Residuals:                     994                                         
Df Model:                           4                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------

In [85]:
from statsmodels.stats.outliers_influence import variance_inflation_factor

# Calculate VIF for each predictor in 'x'
vif_data = pd.DataFrame()
vif_data["Variable"] = x.columns  # or x.columns.to_list() if x is a DataFrame
vif_data["VIF"] = [variance_inflation_factor(x.values, i) for i in range(x.shape[1])]

print(vif_data)

         Variable        VIF
0   fixed_acidity   9.762771
1  residual_sugar   4.356891
2         alcohol  40.595285
3         density  51.146768


In [86]:
from statsmodels.miscmodels.ordinal_model import OrderedModel

x=data[['fixed_acidity','residual_sugar','alcohol']]
y=data['quality_label']
model = OrderedModel(y, x,distr='logit')

result=model.fit(method='bfgs')
print(result.summary())

Optimization terminated successfully.
         Current function value: 1.094240
         Iterations: 17
         Function evaluations: 20
         Gradient evaluations: 20
                             OrderedModel Results                             
Dep. Variable:          quality_label   Log-Likelihood:                -1094.2
Model:                   OrderedModel   AIC:                             2198.
Method:            Maximum Likelihood   BIC:                             2223.
Date:                Sat, 17 May 2025                                         
Time:                        09:53:28                                         
No. Observations:                1000                                         
Df Residuals:                     995                                         
Df Model:                           3                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------

In [87]:
import statsmodels.api as sm

x=data[['fixed_acidity','residual_sugar','alcohol','density']]
y=data['quality_label']
x_with_const = sm.add_constant(x)
y_for_np=y.map(lambda x: 2 if x == 'high' else 1 if x == 'medium' else 0)
model = sm.OLS(y_for_np, x_with_const)
result = model.fit(cov_type='HC3')

print(result.summary())

                            OLS Regression Results                            
Dep. Variable:          quality_label   R-squared:                       0.004
Model:                            OLS   Adj. R-squared:                  0.000
Method:                 Least Squares   F-statistic:                     1.089
Date:                Sat, 17 May 2025   Prob (F-statistic):              0.361
Time:                        09:53:28   Log-Likelihood:                -1196.2
No. Observations:                1000   AIC:                             2402.
Df Residuals:                     995   BIC:                             2427.
Df Model:                           4                                         
Covariance Type:                  HC3                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
const              5.2598      5.793      0.

In [88]:
from statsmodels.miscmodels.ordinal_model import OrderedModel

x=data[['fixed_acidity','residual_sugar','alcohol','density']]
y=data['quality_label']
x['density']=x['density']**2
model = OrderedModel(y, x,distr='logit')

result=model.fit(method='bfgs')
print(result.summary())

Optimization terminated successfully.
         Current function value: 1.093980
         Iterations: 37
         Function evaluations: 40
         Gradient evaluations: 40
                             OrderedModel Results                             
Dep. Variable:          quality_label   Log-Likelihood:                -1094.0
Model:                   OrderedModel   AIC:                             2200.
Method:            Maximum Likelihood   BIC:                             2229.
Date:                Sat, 17 May 2025                                         
Time:                        09:53:29                                         
No. Observations:                1000                                         
Df Residuals:                     994                                         
Df Model:                           4                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x['density']=x['density']**2


In [89]:
for quality in qualities:
    for type in ['fixed_acidity','residual_sugar','alcohol','density']:
        top=qualities[quality][qualities[quality][type]==max(qualities[quality][type])]
        bottom=qualities[quality][qualities[quality][type]==min(qualities[quality][type])]
        print("top")
        print(top)
        print()
        print("bottom")
        print(bottom)
        print()

top
     fixed_acidity  residual_sugar  alcohol  density quality_label
312           16.0             3.1     10.0   0.9961          high
339           16.0             5.0     11.8   0.9906          high
635           16.0            14.1     13.3   0.9948          high

bottom
     fixed_acidity  residual_sugar  alcohol  density quality_label
39             4.0            11.8     13.7   0.9918          high
839            4.0            13.5     10.9   0.9928          high

top
     fixed_acidity  residual_sugar  alcohol  density quality_label
378            6.7            14.9      9.9   1.0017          high
380           15.7            14.9     13.4   1.0017          high

bottom
     fixed_acidity  residual_sugar  alcohol  density quality_label
110            8.2             0.5     12.6   0.9977          high
287           13.9             0.5     12.6   0.9915          high
457           11.7             0.5     13.0   0.9907          high
942            7.0             0.5   