# Homework 5

## Imports

In [None]:
! pip install eli5

In [None]:
import eli5
from eli5.sklearn import PermutationImportance

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn import datasets
from sklearn.datasets import load_wine
from sklearn.decomposition import PCA
from sklearn.ensemble import (
    GradientBoostingClassifier,
    RandomForestClassifier,
)
from sklearn.feature_selection import RFE
from sklearn.inspection import permutation_importance
from sklearn.linear_model import LinearRegression
from sklearn.manifold import TSNE
from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
)
from sklearn.model_selection import (
    train_test_split,
    cross_val_score,
    StratifiedKFold,
)
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler

## Data and model preparation

In [None]:
scale = False

wine = load_wine()
if scale:
    wine = StandardScaler().fit_transform(wine['data'])
data = pd.DataFrame(
    data=np.c_[wine['data'], wine['target']],
    columns=wine['feature_names'] + ['target']
)

features = list(data.columns)
features.remove('target')
x = data.loc[:, features].values
y = data.loc[:, ['target']].values
x = pd.DataFrame(x, columns=features)
y = pd.DataFrame(y, columns=['target'])

x_train, x_test, y_train, y_test = train_test_split(
    x,
    y,
    test_size=0.33,
    random_state=42
)

In [None]:
mlpc_model = MLPClassifier(
    random_state=1,
    max_iter=1000
).fit(x_train, y_train.values.ravel())

score = mlpc_model.score(x_test, y_test)
print(f'Score: {score}')

Score: 0.9661016949152542




In [None]:
gbc_model = GradientBoostingClassifier(
    n_estimators=100,
    learning_rate=1.0,
    max_depth=1,
    random_state=0
).fit(x_train, y_train.values.ravel())

score = gbc_model.score(x_test, y_test)
print(f'Score: {score}')

Score: 0.9830508474576272


In [None]:
rf_model = RandomForestClassifier(
    random_state=42
).fit(x_train, y_train.values.ravel())
score = rf_model.score(x_test, y_test)
print(f'Score: {score}')

Score: 1.0


## Permutational varaible importance analysis

In [None]:
perm = PermutationImportance(rf_model, random_state=1).fit(x_test, y_test)
eli5.show_weights(perm, feature_names=features)

Weight,Feature
0.0746  ± 0.0271,color_intensity
0.0712  ± 0.0450,proline
0.0712  ± 0.0450,flavanoids
0.0271  ± 0.0271,alcohol
0.0136  ± 0.0136,od280/od315_of_diluted_wines
0  ± 0.0000,hue
0  ± 0.0000,proanthocyanins
0  ± 0.0000,nonflavanoid_phenols
0  ± 0.0000,total_phenols
0  ± 0.0000,magnesium


### Candidate 1

In [None]:
rf_model_1 = RandomForestClassifier(
    n_estimators=50,
    random_state=42
).fit(x_train, y_train.values.ravel())
score = rf_model_1.score(x_test, y_test)
print(f'Score: {score}')

Score: 1.0


In [None]:
perm = PermutationImportance(rf_model_1, random_state=1).fit(x_test, y_test)
eli5.show_weights(perm, feature_names=features)

Weight,Feature
0.0881  ± 0.0542,proline
0.0814  ± 0.0332,flavanoids
0.0576  ± 0.0346,color_intensity
0.0373  ± 0.0332,alcohol
0  ± 0.0000,od280/od315_of_diluted_wines
0  ± 0.0000,hue
0  ± 0.0000,proanthocyanins
0  ± 0.0000,nonflavanoid_phenols
0  ± 0.0000,total_phenols
0  ± 0.0000,magnesium


### Candidate 2

In [None]:
rf_model_2 = RandomForestClassifier(
    n_estimators=20,
    max_depth=2,
    random_state=42
).fit(x_train, y_train.values.ravel())
score = rf_model_2.score(x_test, y_test)
print(f'Score: {score}')

Score: 0.9661016949152542


In [None]:
perm = PermutationImportance(rf_model_2, random_state=1).fit(x_test, y_test)
eli5.show_weights(perm, feature_names=features)

Weight,Feature
0.0847  ± 0.0371,flavanoids
0.0373  ± 0.0332,color_intensity
0.0203  ± 0.0254,hue
0.0169  ± 0.0429,alcohol
0.0169  ± 0.0214,proline
0.0136  ± 0.0136,total_phenols
0.0136  ± 0.0136,magnesium
0.0102  ± 0.0166,od280/od315_of_diluted_wines
0.0068  ± 0.0166,proanthocyanins
0  ± 0.0000,nonflavanoid_phenols


### Candidate 3

In [None]:
x_train_ = x_train.drop(columns=['proline'])
x_test_ = x_test.drop(columns=['proline'])

In [None]:
rf_model_3 = RandomForestClassifier(
    random_state=42
).fit(x_train_, y_train.values.ravel())
score = rf_model_3.score(x_test_, y_test)
print(f'Score: {score}')

Score: 0.9830508474576272


In [None]:
perm = PermutationImportance(rf_model_3, random_state=1).fit(x_test_, y_test)
eli5.show_weights(perm, feature_names=list(x_test_.columns))

Weight,Feature
0.1356  ± 0.0525,color_intensity
0.1322  ± 0.0395,alcohol
0.1017  ± 0.0525,flavanoids
0.0203  ± 0.0254,total_phenols
0.0169  ± 0.0214,magnesium
0.0136  ± 0.0254,ash
0.0102  ± 0.0166,od280/od315_of_diluted_wines
0.0102  ± 0.0166,proanthocyanins
0.0102  ± 0.0166,alcalinity_of_ash
0.0068  ± 0.0166,hue
