In [None]:
import pandas as pd
import pydotplus
from sklearn import tree
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier, plot_tree


In [None]:
!pip install graphviz



In [None]:
url='https://drive.google.com/uc?id=1LBDnhITL0Wqwp5G6M6IBI-SSz8BIoNec'
df = pd.read_csv(url)

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Pregnancies               768 non-null    int64  
 1   Glucose                   768 non-null    int64  
 2   BloodPressure             768 non-null    int64  
 3   SkinThickness             768 non-null    int64  
 4   Insulin                   768 non-null    int64  
 5   BMI                       768 non-null    float64
 6   DiabetesPedigreeFunction  768 non-null    float64
 7   Age                       768 non-null    int64  
 8   Outcome                   768 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB


In [None]:
X = df.drop(['Outcome'], axis=1)
y = df['Outcome']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0)

№ 1

---



Linear


In [None]:
linear_clf = make_pipeline(StandardScaler(), SVC(kernel='linear'))
linear_clf.fit(X_train, y_train)
print('R2 train:', linear_clf.score(X_train, y_train))
print('R2 test:', linear_clf.score(X_test, y_test))

R2 train: 0.7638436482084691
R2 test: 0.8246753246753247


Polynomial


In [None]:
poly_clf = make_pipeline(StandardScaler(), SVC(kernel='poly'))
poly_clf.fit(X_train, y_train)
print('R2 train:', poly_clf.score(X_train, y_train))
print('R2 test:', poly_clf.score(X_test, y_test))

R2 train: 0.7931596091205212
R2 test: 0.7532467532467533


Gauss


In [None]:
rbf_clf = make_pipeline(StandardScaler(), SVC(kernel='rbf'))
rbf_clf.fit(X_train, y_train)
print('R2 train:', rbf_clf.score(X_train, y_train))
print('R2 test:', rbf_clf.score(X_test, y_test))

R2 train: 0.8192182410423453
R2 test: 0.7922077922077922


Sigmoid


In [None]:
sigmoid_clf = make_pipeline(StandardScaler(), SVC(C = 0.05, kernel='sigmoid'))
sigmoid_clf.fit(X_train, y_train)
print('R2 train:', sigmoid_clf.score(X_train, y_train))
print('R2 test:', sigmoid_clf.score(X_test, y_test))

R2 train: 0.762214983713355
R2 test: 0.8181818181818182


№2

---



In [None]:
def tree_graph_to_png(tree_clf, features_names, png_file):
  tree_str = tree.export_graphviz(tree_clf,
                             feature_names = features_names,
                             filled =True,
                             out_file = None)
  graph = pydotplus.graph_from_dot_data(tree_str)
  graph.write_png(png_file)

Gini

In [None]:
gini_clf = DecisionTreeClassifier(criterion='gini')
gini_clf.fit(X_train, y_train)
tree_graph_to_png(gini_clf, features_names = df.columns.tolist()[:-1], png_file = 'gini.png')
print('R2 train:', gini_clf.score(X_train, y_train))
print('R2 test:', gini_clf.score(X_test, y_test))

R2 train: 1.0
R2 test: 0.7597402597402597


Entropy


In [None]:
entropy_clf = DecisionTreeClassifier(criterion='entropy')
entropy_clf.fit(X_train, y_train)
tree_graph_to_png(entropy_clf, features_names = df.columns.tolist()[:-1], png_file = 'entropy.png')
print('R2 train:', entropy_clf.score(X_train, y_train))
print('R2 test:', entropy_clf.score(X_test, y_test))

R2 train: 1.0
R2 test: 0.7467532467532467


In [None]:
log_clf = DecisionTreeClassifier(criterion='log_loss')
log_clf.fit(X_train, y_train)
tree_graph_to_png(log_clf, features_names = df.columns.tolist()[:-1], png_file = 'log_loss.png')
print('R2 train:', log_clf.score(X_train, y_train))
print('R2 test:', log_clf.score(X_test, y_test))

R2 train: 1.0
R2 test: 0.7402597402597403
