In [23]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.utils import shuffle
from sklearn.decomposition import PCA
from time import time

In [24]:
# Učitamo dataset i dodijelimo imena stupcima

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data"

colNames = []
for i in range(60):
    colNames.append(i+1)
colNames.append("Class")

sonar = pd.read_csv(url,names=colNames,header=None)

In [25]:
rock = sonar.loc[ sonar['Class'] == 'R', : ]
mine = sonar.loc[ sonar['Class'] == 'M', : ]

sonar_y = [] # target array (niz čiji elementi govore je li odgovarajući element tog indexa u datasetu sonar R ili M)
for i in range(97):
    sonar_y.append(0) # 0 označavaju R
for i in range(111):
    sonar_y.append(1) # 1 označavaju M
sonar_y = np.array(sonar_y)

sonar_bez_Class = sonar.drop('Class', axis=1) # uklonimo zadnji stupac Class iz sonar dataframea
sonar_X = np.array(sonar_bez_Class.values)

# sonar_y i sonar_X moraju biti np.array jer fja split prima samo objekte tog tipa

# shuffle

random_state = 42
# fiksiranje broja random state omogućuje da svaki put kada opet runamo ovaj kod, dobijemo istu podjelu na subsetove
# ovaj broj kojim fiksiramo odabran je nasumično

X,y = shuffle(sonar_X,sonar_y,random_state=random_state)

from sklearn.model_selection import RepeatedStratifiedKFold

random_state = 164981614

rskf = RepeatedStratifiedKFold(n_splits=10, n_repeats=1, random_state=random_state)

X_train, X_test, y_train, y_test = [], [], [], []

for train_index, test_index in rskf.split(X, y):
    X_train.append(X[train_index])
    X_test.append(X[test_index])
    y_train.append(y[train_index])
    y_test.append(y[test_index])

In [26]:
# https://automl.github.io/auto-sklearn/stable/api.html
# popis argumenata za fju autosklearnclassifier 

import autosklearn.classification
import sklearn.model_selection
from sklearn.metrics import accuracy_score, confusion_matrix

In [27]:
automl = autosklearn.classification.AutoSklearnClassifier()
rjecnik = {}
acc = []
t0 = time()
    
for i in range(10):
    automl.fit(X_train[i], y_train[i])
    y_pred = automl.predict(X_test[i])
    acc_score = accuracy_score(y_test[i], y_pred)
    acc.append(acc_score)
    conf_mat = confusion_matrix(y_test[i],y_pred)
    rjecnik[i+1] = (acc_score, conf_mat[0][0], conf_mat[0][1], conf_mat[1][0], conf_mat[1][1])
    print("i =", i+1)
    print(automl.sprint_statistics(), "\n\n")

print("done in %0.3fs" % (time() - t0))

i = 1
auto-sklearn results:
  Dataset name: 10f115df9fe92ddca4558ff3010214cc
  Metric: accuracy
  Best validation score: 0.903226
  Number of target algorithm runs: 373
  Number of successful target algorithm runs: 351
  Number of crashed target algorithm runs: 18
  Number of target algorithms that exceeded the memory limit: 4
  Number of target algorithms that exceeded the time limit: 0
 


i = 2
auto-sklearn results:
  Dataset name: 619479744114bee3c6b5465b94e30d5d
  Metric: accuracy
  Best validation score: 0.951613
  Number of target algorithm runs: 2572
  Number of successful target algorithm runs: 2494
  Number of crashed target algorithm runs: 76
  Number of target algorithms that exceeded the memory limit: 1
  Number of target algorithms that exceeded the time limit: 1
 


i = 3
auto-sklearn results:
  Dataset name: 784b789ea999bd7761d3eb01580a693f
  Metric: accuracy
  Best validation score: 0.903226
  Number of target algorithm runs: 1753
  Number of successful target algorith

In [29]:
print("Rezultati dobiveni koristeći autosklearn:\n")
df = pd.DataFrame(rjecnik, index=['accuracy', 'TP', 'FP', 'FN', 'TN'])
print(df)

avg_accuracy = np.average(acc)
print("\nProsječna točnost dobivena koristeći autosklearn je:\n", 
      avg_accuracy*100, "%.")

Rezultati dobiveni koristeći autosklearn:

                 1          2          3          4          5          6   \
accuracy   0.772727   0.761905   0.952381   0.904762   0.904762   0.904762   
TP         7.000000  10.000000   9.000000   8.000000  10.000000   9.000000   
FP         3.000000   0.000000   1.000000   2.000000   0.000000   1.000000   
FN         2.000000   5.000000   0.000000   0.000000   2.000000   1.000000   
TN        10.000000   6.000000  11.000000  11.000000   9.000000  10.000000   

                7    8     9     10  
accuracy  0.761905  0.7  0.85  0.65  
TP        7.000000  6.0  8.00  7.00  
FP        3.000000  3.0  1.00  2.00  
FN        2.000000  3.0  2.00  5.00  
TN        9.000000  8.0  9.00  6.00  

Prosječna točnost dobivena koristeći autosklearn je:
 81.63203463203462 %.
