# ASKLB Jupyter Notebook 
### Note: If running on new Google Colab instance, run the three lines below to download dependencies:
!apt-get install build-essential swig <br>
!curl https://raw.githubusercontent.com/automl/auto-sklearn/master/requirements.txt | xargs -n 1 -L 1 pip install <br>
!pip install auto-sklearn

### Colab Link
https://colab.research.google.com/github/KordingLab/ASKLB/blob/master/notebooks/ASKLB_Notebook.ipynb

In [30]:
import autosklearn.classification
import autosklearn.regression
import sklearn.model_selection
import numpy
import ipywidgets as widgets
from IPython.display import display
from ipywidgets import HBox, Label
import warnings
import threading
import time
import os, sys
import sklearn.metrics as metrics
import matplotlib.pyplot as plt
import scikitplot as skplt

ModuleNotFoundError: No module named 'scikitplot'

### Load X and y datasets
Should have names automl_X.csv and automl_Y.csv respectively (will see if I can use a file upload widget)

In [26]:
X = numpy.loadtxt('automl_X.csv', delimiter=',')
y = numpy.loadtxt('automl_y.csv', delimiter=',')
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, random_state=1)

In [27]:
class HiddenPrints:
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = open(os.devnull, 'w')

    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout

def work(progress):
        for i in range(int(progress.max/5)):
            time.sleep(5)
            progress.value = progress.value+5

def on_button_clicked(b):    
    models_output.clear_output()
    metrics_output.clear_output()
    final_runtime_value_seconds = runtime_widget.value * 60
    progress.value = 0    
    progress.max=final_runtime_value_seconds
    with output:
        print("AUTOML FITTING STARTED, FITTING TIME IS ", int(final_runtime_value_seconds/60), " MINUTES")
        
    automl = autosklearn.classification.AutoSklearnClassifier(time_left_for_this_task = final_runtime_value_seconds)        
    thread = threading.Thread(target=work, args=(progress,))    
    thread.start()
    
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        with HiddenPrints():
            automl.fit(X_train, y_train)
    
    print("FITTING COMPLETED")
        
    with metrics_output:            
        y_train_hat = automl.predict(X_train)
        train_accuracy_score = sklearn.metrics.accuracy_score(y_train, y_train_hat)
        print("Training Accuracy Score: ", train_accuracy_score)
        
        
        y_test_hat = automl.predict(X_test)
        test_accuracy_score = sklearn.metrics.accuracy_score(y_test, y_test_hat)
        print("Testing Accuracy Score: ", test_accuracy_score)
        
        fpr_train, tpr_train, threshold_train = metrics.roc_curve(y_train, y_train_hat)
        roc_auc_train = metrics.auc(fpr_train, tpr_train)
        
        print("Training ROC AUC: ", roc_auc_train)
        
        %matplotlib notebook        
        plt.title('Train ROC')
        plt.plot(fpr_train, tpr_train, 'b', label = 'AUC = %0.2f' % roc_auc_train)
        plt.legend(loc = 'lower right')
        plt.plot([0, 1], [0, 1],'r--')
        plt.xlim([0, 1])
        plt.ylim([0, 1])
        plt.ylabel('True Positive Rate')
        plt.xlabel('False Positive Rate')
        plt.show()        
        
        fpr_test, tpr_test, threshold_test = metrics.roc_curve(y_test, y_test_hat)
        roc_auc_test = metrics.auc(fpr_test, tpr_test)
        
        print("Test ROC AUC: ", roc_auc_test)
        
        %matplotlib notebook        
        plt.title('Test ROC')
        plt.plot(fpr_test, tpr_test, 'b', label = 'AUC = %0.2f' % roc_auc_test)
        plt.legend(loc = 'lower right')
        plt.plot([0, 1], [0, 1],'r--')
        plt.xlim([0, 1])
        plt.ylim([0, 1])
        plt.ylabel('True Positive Rate')
        plt.xlabel('False Positive Rate')
        plt.show()
        
    with models_output:
        print("MODELS:")
        print(automl.get_models_with_weights())

In [28]:
runtime_widget = widgets.IntSlider(
    value=15,
    min=1,
    max=60)

progress = widgets.IntProgress(value=0, min=0, description="Progress")

button = widgets.Button(description="Fit to AutoML")
button.on_click(on_button_clicked)

runtime_slider = HBox([Label('AutoML Runtime (minutes)'), runtime_widget])

output = widgets.Output()
metrics_output = widgets.Output()
models_output = widgets.Output()
models_accordian = widgets.Accordion(children=[metrics_output, models_output])
models_accordian.set_title(0, 'Performance Metrics')
models_accordian.set_title(1, 'Models and Weights Data')




### AutoML WIDGET

In [29]:
display(runtime_slider)
display(button)
display(progress)
display(output)
display(models_accordian)

HBox(children=(Label(value='AutoML Runtime (minutes)'), IntSlider(value=15, max=60, min=1)))

Button(description='Fit to AutoML', style=ButtonStyle())

IntProgress(value=0, description='Progress')

Output()

Accordion(children=(Output(), Output()), _titles={'0': 'Performance Metrics', '1': 'Models and Weights Data'})

FITTING COMPLETED


In [None]:
automl.get_models_with_weights()

In [None]:
automl = autosklearn.classification.AutoSklearnClassifier(time_left_for_this_task = 30)
automl.fit(X_train, y_train)

In [None]:
models = automl.get_models_with_weights()

In [None]:
pipeline = models[0][1]

In [None]:
pipeline

In [None]:
pipeline

In [None]:
y_test_hat = automl.predict(X_test)
autosklearn.metrics.roc_auc(y_test, y_test_hat)

In [None]:
numpy.shape(y_test_hat)

In [None]:
import sklearn.metrics as metrics
fpr, tpr, threshold = metrics.roc_curve(y_test, y_test_hat)
roc_auc = metrics.auc(fpr, tpr)

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()