In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.utils import shuffle
from sklearn.decomposition import PCA
from time import time

In [None]:
# Učitamo dataset i dodijelimo imena stupcima

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data"

colNames = []
for i in range(60):
    colNames.append(i+1)
colNames.append("Class")

sonar = pd.read_csv(url,names=colNames,header=None)

In [None]:
rock = sonar.loc[ sonar['Class'] == 'R', : ]
mine = sonar.loc[ sonar['Class'] == 'M', : ]

sonar_y = [] # target array (niz čiji elementi govore je li odgovarajući element tog indexa u datasetu sonar R ili M)
for i in range(97):
    sonar_y.append(0) # 0 označavaju R
for i in range(111):
    sonar_y.append(1) # 1 označavaju M
sonar_y = np.array(sonar_y)

sonar_bez_Class = sonar.drop('Class', axis=1) # uklonimo zadnji stupac Class iz sonar dataframea
sonar_X = np.array(sonar_bez_Class.values)

# sonar_y i sonar_X moraju biti np.array jer fja split prima samo objekte tog tipa

# shuffle

random_state = 42
# fiksiranje broja random state omogućuje da svaki put kada opet runamo ovaj kod, dobijemo istu podjelu na subsetove
# ovaj broj kojim fiksiramo odabran je nasumično

X,y = shuffle(sonar_X,sonar_y,random_state=random_state)

from sklearn.model_selection import RepeatedStratifiedKFold

random_state = 164981614

rskf = RepeatedStratifiedKFold(n_splits=10, n_repeats=1, random_state=random_state)

X_train, X_test, y_train, y_test = [], [], [], []

for train_index, test_index in rskf.split(X, y):
    X_train.append(X[train_index])
    X_test.append(X[test_index])
    y_train.append(y[train_index])
    y_test.append(y[test_index])

In [None]:
# https://automl.github.io/auto-sklearn/stable/api.html
# popis argumenata za fju autosklearnclassifier 

import autosklearn.classification
import sklearn.model_selection
from sklearn.metrics import accuracy_score, confusion_matrix

In [None]:
automl = autosklearn.classification.AutoSklearnClassifier()
rjecnik = {}
acc = []
t0 = time()
    
for i in range(10):
    automl.fit(X_train[i], y_train[i])
    y_pred = automl.predict(X_test[i])
    acc_score = accuracy_score(y_test[i], y_pred)
    acc.append(acc_score)
    conf_mat = confusion_matrix(y_test[i],y_pred)
    rjecnik[i+1] = (acc_score, conf_mat[1][1], conf_mat[0][1], conf_mat[1][0], conf_mat[0][0])
    print("i =", i+1)
    print(automl.sprint_statistics(), "\n\n")

print("done in %0.3fs" % (time() - t0))

In [None]:
print("Rezultati dobiveni koristeći autosklearn:\n")
df = pd.DataFrame(rjecnik, index=['accuracy', 'TP', 'FP', 'FN', 'TN'])
print(df)

avg_accuracy = np.average(acc)
print("\nProsječna točnost dobivena koristeći autosklearn je:\n", 
      avg_accuracy*100, "%.")