# Insert general functions

In [1]:
import pandas as pd
import numpy as np

# Create our function

In [2]:
def classifier_reductor(X, Y, model, model_reductor, n_splits=10):
    import time
    from sklearn.model_selection import StratifiedKFold
    from sklearn.metrics import accuracy_score, cohen_kappa_score, confusion_matrix
    
    start = time.process_time()
    
    kf = StratifiedKFold(n_splits=n_splits,shuffle=True,random_state=42)
    kappa = []
    accuracy = []
    cm = []
    for train_index, test_index in kf.split(X,Y):
        X_train, X_test = X[train_index], X[test_index]
        Y_train, Y_test = Y[train_index], Y[test_index]
        
        model_reductor.fit(X_train, Y_train)
        X_train_reduced = model_reductor.transform(X_train)
        X_test_reduced = model_reductor.transform(X_test)
        
        model.fit(X_train_reduced, Y_train)
        Y_pred = model.predict(X_test_reduced)
        Y_pred_round = np.round(Y_pred)
        kappa.append( cohen_kappa_score(Y_test,Y_pred_round))
        accuracy.append(accuracy_score(Y_test,Y_pred_round))
        cm.append(confusion_matrix(Y_test,Y_pred_round))
    cm = np.array(cm)
    print('Tiempo transcurrido = ', time.process_time() - start, ' segundos')
    print('kappa = ', np.mean(kappa).round(2), u"\u00B1", np.std(kappa).round(2))
    print('accuracy = ', np.mean(accuracy).round(2), u"\u00B1", np.std(accuracy).round(2))
    print('correlation_matrix = \n', np.mean(cm, axis=0).round(2),"\n", u"\u00B1 \n", np.std(cm, axis=0).round(2))


# Import and normalize data

In [3]:

data = pd.read_csv('C:\\M2CML22\\Practicas\\Practica3_4\\Files\\product_quality.csv', sep = ';')

from sklearn.preprocessing import StandardScaler
X, Y = data.drop(columns='quality').values,  data['quality'].values
X_norm = StandardScaler().fit_transform(X)
data_norm = pd.DataFrame(StandardScaler().fit_transform(data), columns = data.columns)
data_norm.quality = Y


# Import ML functions

In [4]:
from sklearn.feature_selection import SelectFromModel, SequentialFeatureSelector
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, RandomForestClassifier

from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression, LinearRegression
import sklearn.neighbors as neig
import sklearn.discriminant_analysis as da
from xgboost import XGBClassifier, XGBRegressor



# Declare methods to use

In [5]:
# =============================================================================
# Feature Reduction
# =============================================================================
pca = PCA(n_components=5)
lda = da.LinearDiscriminantAnalysis()

# =============================================================================
# Classifiers
# =============================================================================
RF_C = RandomForestClassifier(n_estimators = 100, random_state=0)
LogReg = LogisticRegression()
SVC = SVC()
LDA = da.LinearDiscriminantAnalysis()
KNN_C = neig.KNeighborsClassifier(n_neighbors=1)
XGB_C = XGBClassifier()

# =============================================================================
# Classifiers
# =============================================================================
GBR = GradientBoostingRegressor(n_estimators = 100, random_state=0)
LinReg = LinearRegression()
KNN_R = neig.KNeighborsRegressor(n_neighbors=1)
XGB_R = XGBRegressor()
RF_R = RandomForestRegressor(n_estimators = 100, random_state=0)

# =============================================================================
# Wrapped methods
# =============================================================================
sel_model = SelectFromModel(RF_C)
sfs = SequentialFeatureSelector(LogReg, n_features_to_select=4)


# Run function

In [6]:
classifier_reductor(X_norm, Y, GBR, sel_model, n_splits=10)

Tiempo transcurrido =  3.40625  segundos
kappa =  0.34 ± 0.05
accuracy =  0.6 ± 0.03
correlation_matrix = 
 [[ 0.   0.1  0.7  0.2  0.   0. ]
 [ 0.   0.2  3.5  1.5  0.1  0. ]
 [ 0.   0.5 49.7 17.5  0.4  0. ]
 [ 0.   0.2 20.2 39.2  4.2  0. ]
 [ 0.   0.   0.7 12.5  6.7  0. ]
 [ 0.   0.   0.   0.9  0.9  0. ]] 
 ± 
 [[0.   0.3  0.46 0.4  0.   0.  ]
 [0.   0.4  1.2  1.02 0.3  0.  ]
 [0.   0.67 2.76 2.62 0.66 0.  ]
 [0.   0.4  4.94 4.51 2.27 0.  ]
 [0.   0.   1.19 1.36 1.   0.  ]
 [0.   0.   0.   0.83 0.94 0.  ]]
