# Utils

In [1]:
from typing import List

import numpy as np
import pandas as pd
import plotly.figure_factory as ff
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import confusion_matrix

In [2]:
def get_accuracy(confusion_matrix: np.array) -> float:
    return np.trace(confusion_matrix) / np.sum(confusion_matrix)

In [3]:
def plot_heatmap(
        *,
        matrix: np.ndarray,
        classes: List[str],
        x_title: str = 'Predicted labels',
        y_title: str = 'True labels'
):
    fig = ff.create_annotated_heatmap(matrix, x=classes, y=classes, colorscale='greens')
    fig.update_xaxes(title=x_title, side='top', scaleanchor='y', constrain='domain')
    fig.update_yaxes(title=y_title, scaleanchor='x', autorange = "reversed")
    return fig

# All features

In [4]:
df = pd.read_csv('VegStNx.csv', index_col=0)
df

Unnamed: 0,Class,латеральные.отделы.префронтальной.коры.справа,латеральные.отделы.префронтальной.коры.слева,парасагитальные.отделы.префронтальной.коры.справа,парасагитальные.отделы.префронтальной.коры.слева,сенсомоторная.кора.справа,сенсомоторная.кора.слева,передняя.часть.правой.поясной.извилины,передняя.часть.левой.поясной.извилины,задняя.часть.поясной.извилины.справа,...,первичная.зрительная.кора.слева,латеральная.кора.височных.долей.справа,латеральная.кора.височных.долей.слева,медиальная.кора.височных.долей.справа,медиальная.кора.височных.долей.слева,мозжечок,мост,sex,age,et
1,2,-4.97,-5.08,-4.23,-3.93,3.5,3.89,-5.2,-5.21,-5.7,...,-1.21,-3.87,-3.4,-3.79,-3.67,0.0,-0.53,1,43,2
2,1,-1.47,-1.61,-2.95,-2.54,-2.17,-2.44,-2.16,-1.93,-4.43,...,-3.05,-0.77,-0.26,-2.24,-1.34,-0.64,0.0,2,37,2
3,1,-0.92,-0.84,-0.55,-0.85,-0.51,0.0,1.37,1.7,-1.94,...,-1.48,1.1,-0.36,4.74,1.77,0.0,3.36,1,32,1
4,3,-4.51,-4.58,-5.25,-5.17,-4.04,-4.27,-3.65,-3.53,-4.4,...,-2.83,-3.75,-3.95,-0.98,-1.02,-5.28,0.0,2,21,2
5,3,-5.31,-5.8,-4.1,-4.19,-5.67,-5.52,-2.37,-2.32,-4.42,...,-3.28,-3.8,-4.03,-0.81,-1.65,-1.7,0.0,2,33,2
6,1,-1.22,-1.36,-1.22,-1.07,0.29,-0.29,-0.42,0.11,-3.5,...,2.9,-1.15,0.28,-1.42,1.9,0.0,3.16,1,22,1
7,1,-3.88,-4.5,-4.6,-5.99,-4.58,-3.96,-3.79,-4.21,-7.15,...,-7.99,-4.56,-4.71,-8.36,-9.81,1.0,0.0,1,28,1
8,3,-3.14,-1.86,-1.29,-1.25,2.06,1.93,-0.59,-1.29,-3.4,...,-2.76,-1.64,-1.64,-0.85,0.54,0.0,7.48,1,15,2
9,3,-1.62,-1.27,-0.52,-0.29,6.15,6.61,-0.02,0.07,-2.05,...,5.8,0.37,0.53,2.0,3.36,0.0,5.33,1,15,2
10,2,-4.11,-2.58,-1.33,-0.66,-5.69,-5.03,-3.93,-4.45,-8.8,...,-6.84,-0.18,1.15,-3.65,-4.16,0.0,1.98,2,36,2


In [5]:
X = df.loc[:, 'латеральные.отделы.префронтальной.коры.справа':]
y = df['Class']

In [6]:
clf = LinearDiscriminantAnalysis()
clf.fit(X, y)

conf_matrix = confusion_matrix(y_true=y, y_pred=clf.predict(X))

print(f'Accuracy: {get_accuracy(conf_matrix)}')
plot_heatmap(matrix=conf_matrix, classes=clf.classes_.tolist())

Accuracy: 0.9038461538461539


Unsupported

# Correlation matrix

In [7]:
correlation = X.corr()
correlation

Unnamed: 0,латеральные.отделы.префронтальной.коры.справа,латеральные.отделы.префронтальной.коры.слева,парасагитальные.отделы.префронтальной.коры.справа,парасагитальные.отделы.префронтальной.коры.слева,сенсомоторная.кора.справа,сенсомоторная.кора.слева,передняя.часть.правой.поясной.извилины,передняя.часть.левой.поясной.извилины,задняя.часть.поясной.извилины.справа,задняя.часть.поясной.извилины.слева,...,первичная.зрительная.кора.слева,латеральная.кора.височных.долей.справа,латеральная.кора.височных.долей.слева,медиальная.кора.височных.долей.справа,медиальная.кора.височных.долей.слева,мозжечок,мост,sex,age,et
латеральные.отделы.префронтальной.коры.справа,1.0,0.824872,0.849296,0.795348,0.702088,0.637469,0.660751,0.646811,0.674299,0.658568,...,0.544405,0.792412,0.649288,0.400909,0.456313,0.475837,-0.001914,-0.0655,-0.153198,-0.22117
латеральные.отделы.префронтальной.коры.слева,0.824872,1.0,0.735628,0.902885,0.613064,0.73987,0.705521,0.712903,0.6203,0.630925,...,0.597917,0.844456,0.815265,0.431741,0.473605,0.368751,0.070869,0.010019,-0.127509,-0.086441
парасагитальные.отделы.префронтальной.коры.справа,0.849296,0.735628,1.0,0.850542,0.569625,0.535024,0.724162,0.652999,0.635205,0.517951,...,0.475966,0.657454,0.589301,0.546801,0.401042,0.431641,0.059403,0.072268,-0.170256,-0.0448
парасагитальные.отделы.префронтальной.коры.слева,0.795348,0.902885,0.850542,1.0,0.583148,0.690557,0.686781,0.769506,0.645711,0.655285,...,0.621612,0.776637,0.803125,0.464347,0.523394,0.41,0.080976,0.020356,-0.07568,-0.018492
сенсомоторная.кора.справа,0.702088,0.613064,0.569625,0.583148,1.0,0.819111,0.500398,0.418596,0.549247,0.494252,...,0.639797,0.625705,0.400479,0.243161,0.335149,0.452512,-0.011035,-0.233093,-0.164833,-0.330236
сенсомоторная.кора.слева,0.637469,0.73987,0.535024,0.690557,0.819111,1.0,0.500844,0.621374,0.632033,0.661707,...,0.604955,0.74372,0.639045,0.38873,0.431048,0.361079,0.12078,-0.281271,-0.046888,-0.167581
передняя.часть.правой.поясной.извилины,0.660751,0.705521,0.724162,0.686781,0.500398,0.500844,1.0,0.737081,0.616126,0.510768,...,0.485311,0.614534,0.549225,0.662369,0.579762,0.274787,0.056994,0.022025,-0.203765,-0.068758
передняя.часть.левой.поясной.извилины,0.646811,0.712903,0.652999,0.769506,0.418596,0.621374,0.737081,1.0,0.765431,0.778529,...,0.456805,0.660736,0.711687,0.675527,0.633667,0.218695,0.078465,-0.088795,-0.040448,-0.120086
задняя.часть.поясной.извилины.справа,0.674299,0.6203,0.635205,0.645711,0.549247,0.632033,0.616126,0.765431,1.0,0.87744,...,0.474385,0.697747,0.569341,0.728228,0.549165,0.327223,-0.03228,-0.1762,-0.027085,-0.173869
задняя.часть.поясной.извилины.слева,0.658568,0.630925,0.517951,0.655285,0.494252,0.661707,0.510768,0.778529,0.87744,1.0,...,0.523935,0.668078,0.67557,0.518152,0.661427,0.332969,-0.018224,-0.252985,-0.00768,-0.159789


In [8]:
for index in range(len(correlation)):
    for column in range(index + 1, len(correlation)):
        if abs(correlation.iloc[index, column]) >= 0.9:
            print(f'({index}, {column}) -- {round(correlation.iloc[index, column], 3)}')

(1, 3) -- 0.903
(1, 15) -- 0.914
(10, 16) -- 0.936
(13, 15) -- 0.911


In [9]:
import itertools

column_number = [1, 3, 10, 13, 15, 16]
results = {}
for l in range(1, len(column_number) + 1):
    for subset in itertools.combinations(column_number, l):
        reduced_X = X.drop(X.columns[list(subset)], axis=1)
        clf.fit(reduced_X, y)
        conf_matrix = confusion_matrix(y_true=y, y_pred=clf.predict(reduced_X))
        results[subset] = get_accuracy(conf_matrix)

{key: value for key, value in sorted(results.items(), key=lambda item: item[1], reverse=True)}

{(1,): 0.9038461538461539,
 (16,): 0.9038461538461539,
 (15, 16): 0.9038461538461539,
 (1, 16): 0.8846153846153846,
 (3, 10): 0.8846153846153846,
 (3, 16): 0.8846153846153846,
 (10, 16): 0.8846153846153846,
 (3,): 0.8653846153846154,
 (10,): 0.8653846153846154,
 (15,): 0.8653846153846154,
 (1, 3): 0.8653846153846154,
 (1, 10): 0.8653846153846154,
 (3, 15): 0.8653846153846154,
 (10, 15): 0.8653846153846154,
 (1, 3, 16): 0.8653846153846154,
 (3, 15, 16): 0.8653846153846154,
 (10, 15, 16): 0.8653846153846154,
 (13,): 0.8461538461538461,
 (1, 13): 0.8461538461538461,
 (1, 15): 0.8461538461538461,
 (13, 16): 0.8461538461538461,
 (1, 3, 10): 0.8461538461538461,
 (1, 3, 15): 0.8461538461538461,
 (1, 10, 15): 0.8461538461538461,
 (1, 10, 16): 0.8461538461538461,
 (3, 10, 15): 0.8461538461538461,
 (3, 13, 15): 0.8461538461538461,
 (1, 3, 15, 16): 0.8461538461538461,
 (10, 13): 0.8269230769230769,
 (1, 10, 13): 0.8269230769230769,
 (1, 13, 16): 0.8269230769230769,
 (1, 15, 16): 0.826923076923076

# Reduced features

In [10]:
columns_to_drop = X.columns[[3, 15, 16]]
columns_to_drop

Index(['парасагитальные.отделы.префронтальной.коры.слева',
       'нижние.отделы.теменной.доли.слева',
       'ассоциативная.кора.затылочных.долей.справа'],
      dtype='object')

In [11]:
reduced_X = X.drop(columns_to_drop, axis=1)
reduced_X

Unnamed: 0,латеральные.отделы.префронтальной.коры.справа,латеральные.отделы.префронтальной.коры.слева,парасагитальные.отделы.префронтальной.коры.справа,сенсомоторная.кора.справа,сенсомоторная.кора.слева,передняя.часть.правой.поясной.извилины,передняя.часть.левой.поясной.извилины,задняя.часть.поясной.извилины.справа,задняя.часть.поясной.извилины.слева,прекунеус.справа,...,первичная.зрительная.кора.слева,латеральная.кора.височных.долей.справа,латеральная.кора.височных.долей.слева,медиальная.кора.височных.долей.справа,медиальная.кора.височных.долей.слева,мозжечок,мост,sex,age,et
1,-4.97,-5.08,-4.23,3.5,3.89,-5.2,-5.21,-5.7,-5.38,-5.32,...,-1.21,-3.87,-3.4,-3.79,-3.67,0.0,-0.53,1,43,2
2,-1.47,-1.61,-2.95,-2.17,-2.44,-2.16,-1.93,-4.43,-4.51,-2.88,...,-3.05,-0.77,-0.26,-2.24,-1.34,-0.64,0.0,2,37,2
3,-0.92,-0.84,-0.55,-0.51,0.0,1.37,1.7,-1.94,-1.22,-0.53,...,-1.48,1.1,-0.36,4.74,1.77,0.0,3.36,1,32,1
4,-4.51,-4.58,-5.25,-4.04,-4.27,-3.65,-3.53,-4.4,-3.98,-3.21,...,-2.83,-3.75,-3.95,-0.98,-1.02,-5.28,0.0,2,21,2
5,-5.31,-5.8,-4.1,-5.67,-5.52,-2.37,-2.32,-4.42,-4.17,-4.0,...,-3.28,-3.8,-4.03,-0.81,-1.65,-1.7,0.0,2,33,2
6,-1.22,-1.36,-1.22,0.29,-0.29,-0.42,0.11,-3.5,-3.05,0.12,...,2.9,-1.15,0.28,-1.42,1.9,0.0,3.16,1,22,1
7,-3.88,-4.5,-4.6,-4.58,-3.96,-3.79,-4.21,-7.15,-6.02,-4.49,...,-7.99,-4.56,-4.71,-8.36,-9.81,1.0,0.0,1,28,1
8,-3.14,-1.86,-1.29,2.06,1.93,-0.59,-1.29,-3.4,-2.74,-2.28,...,-2.76,-1.64,-1.64,-0.85,0.54,0.0,7.48,1,15,2
9,-1.62,-1.27,-0.52,6.15,6.61,-0.02,0.07,-2.05,-1.43,0.76,...,5.8,0.37,0.53,2.0,3.36,0.0,5.33,1,15,2
10,-4.11,-2.58,-1.33,-5.69,-5.03,-3.93,-4.45,-8.8,-9.42,-6.51,...,-6.84,-0.18,1.15,-3.65,-4.16,0.0,1.98,2,36,2


In [12]:
clf.fit(reduced_X, y)

conf_matrix = confusion_matrix(y_true=y, y_pred=clf.predict(reduced_X))

print(f'Accuracy: {get_accuracy(conf_matrix)}')
plot_heatmap(matrix=conf_matrix, classes=clf.classes_.tolist())

Accuracy: 0.8653846153846154


Unsupported