In [104]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.model_selection import cross_val_score, cross_validate, KFold, LeaveOneOut, GridSearchCV

In [5]:
data = pd.read_csv('heart.csv', sep=',')
data.dtypes

age           int64
sex           int64
cp            int64
trestbps      int64
chol          int64
fbs           int64
restecg       int64
thalach       int64
exang         int64
oldpeak     float64
slope         int64
ca            int64
thal          int64
target        int64
dtype: object

In [31]:
data.isnull().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

In [27]:
Positive = data[data['target'] == 1]['target'].count()
Negative = data.shape[0] - Positive
print('Распределение классов:')
print('    Положительные: {} % ({})'.format(round(Positive/data.shape[0]*100, 4),Positive))
print('    Отрицательные: {} % ({})'.format(round(Negative/data.shape[0]*100, 4), Negative))

Распределение классов:
    Положительные: 54.4554 % (165)
    Отрицательные: 45.5446 % (138)


In [38]:
TrainX,TestX,TrainY,TestY = train_test_split(data[['age','sex','cp','trestbps','chol','fbs','restecg','thalach',
                                                   'exang','oldpeak','slope','ca','thal']],data['target'], test_size=0.3, random_state = 1)

In [98]:
LogReg = LogisticRegression(C=10)
SVCT = SVC(gamma = 10)
DecTree = DecisionTreeClassifier(max_depth = 4)

In [99]:
LogReg.fit(TrainX, TrainY)
SVCT.fit(TrainX, TrainY)
DecTree.fit(TrainX, TrainY)



DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [100]:
Result = []
Result.append(LogReg.predict(TestX))
Result.append(SVCT.predict(TestX))
Result.append(DecTree.predict(TestX))
for res in Result:
    print(accuracy_score(TestY,res))

0.7912087912087912
0.5494505494505495
0.7472527472527473


In [101]:
for res in Result:
    print(confusion_matrix(TestY,res), '\n')

[[30 11]
 [ 8 42]] 

[[ 0 41]
 [ 0 50]] 

[[29 12]
 [11 39]] 



In [102]:
for res in Result:
    print(classification_report(TestY,res), '\n')

              precision    recall  f1-score   support

           0       0.79      0.73      0.76        41
           1       0.79      0.84      0.82        50

   micro avg       0.79      0.79      0.79        91
   macro avg       0.79      0.79      0.79        91
weighted avg       0.79      0.79      0.79        91
 

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        41
           1       0.55      1.00      0.71        50

   micro avg       0.55      0.55      0.55        91
   macro avg       0.27      0.50      0.35        91
weighted avg       0.30      0.55      0.39        91
 

              precision    recall  f1-score   support

           0       0.72      0.71      0.72        41
           1       0.76      0.78      0.77        50

   micro avg       0.75      0.75      0.75        91
   macro avg       0.74      0.74      0.74        91
weighted avg       0.75      0.75      0.75        91
 



  'precision', 'predicted', average, warn_for)


In [113]:
CTP = [0.001,0.01,0.1,1,10,100,1000]
GammaTP = [0.001,0.01,0.1,1,10,100,1000]
DepthTP = [1,2,3,4,5,6,7,8,9,10]
tuned_parameters = [{'C':CTP}]
LogRegGS = GridSearchCV(LogisticRegression(), tuned_parameters, cv=LeaveOneOut(), scoring='accuracy')
LogRegGS.fit(data[['age','sex','cp','trestbps','chol','fbs','restecg','thalach','exang','oldpeak','slope','ca','thal']]
                 ,data['target'])
LogRegCBP = LogRegGS.best_params_.get('C')

tuned_parameters = [{'gamma':GammaTP}]
SVCGS = GridSearchCV(SVC(), tuned_parameters, cv=LeaveOneOut(), scoring='accuracy')
SVCGS.fit(data[['age','sex','cp','trestbps','chol','fbs','restecg','thalach','exang','oldpeak','slope','ca','thal']]
                 ,data['target'])
SVCGammaBP = SVCGS.best_params_.get('gamma')

tuned_parameters = [{'max_depth':DepthTP}]
DecTreeGS = GridSearchCV(DecisionTreeClassifier(), tuned_parameters, cv=LeaveOneOut(), scoring='accuracy')
DecTreeGS.fit(data[['age','sex','cp','trestbps','chol','fbs','restecg','thalach','exang','oldpeak','slope','ca','thal']]
                 ,data['target'])
DecTreeDepthBP = DecTreeGS.best_params_.get('max_depth')





















































































In [115]:
print('Подобранные гиперпараметры:')
print('    Логистическая регрессия: параметр С: ',LogRegCBP)
print('    Метод Опорных Векторов: параметр gamma: ',SVCGammaBP)
print('    Дерево решений: параметр глубина: ',DecTreeDepthBP)

Подобранные гиперпараметры:
    Логистическая регрессия: параметр С:  0.1
    Метод Опорных Векторов: параметр gamma:  0.001
    Дерево решений: параметр глубина:  6


In [116]:
LogReg = LogisticRegression(C=LogRegCBP)
SVCT = SVC(gamma = SVCGammaBP)
DecTree = DecisionTreeClassifier(max_depth = DecTreeDepthBP)

In [117]:
LogReg.fit(TrainX, TrainY)
SVCT.fit(TrainX, TrainY)
DecTree.fit(TrainX, TrainY)



DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=6,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [118]:
Result = []
Result.append(LogReg.predict(TestX))
Result.append(SVCT.predict(TestX))
Result.append(DecTree.predict(TestX))
for res in Result:
    print(accuracy_score(TestY,res))

0.7802197802197802
0.6373626373626373
0.6923076923076923


In [119]:
for res in Result:
    print(confusion_matrix(TestY,res), '\n')

[[29 12]
 [ 8 42]] 

[[25 16]
 [17 33]] 

[[31 10]
 [18 32]] 



In [120]:
for res in Result:
    print(classification_report(TestY,res), '\n')

              precision    recall  f1-score   support

           0       0.78      0.71      0.74        41
           1       0.78      0.84      0.81        50

   micro avg       0.78      0.78      0.78        91
   macro avg       0.78      0.77      0.78        91
weighted avg       0.78      0.78      0.78        91
 

              precision    recall  f1-score   support

           0       0.60      0.61      0.60        41
           1       0.67      0.66      0.67        50

   micro avg       0.64      0.64      0.64        91
   macro avg       0.63      0.63      0.63        91
weighted avg       0.64      0.64      0.64        91
 

              precision    recall  f1-score   support

           0       0.63      0.76      0.69        41
           1       0.76      0.64      0.70        50

   micro avg       0.69      0.69      0.69        91
   macro avg       0.70      0.70      0.69        91
weighted avg       0.70      0.69      0.69        91
 

