In [1]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<a href="javascript:code_toggle()">Show/hide code</a>''')

### main functions

In [2]:
# %load_ext autoreload
%reload_ext autoreload
%autoreload

from ipywidgets import IntSlider, Box, Layout, Label, Dropdown, Button
from IPython.display import display, HTML, clear_output
from core.whose_cpp_code import classify_authors
from numpy import mean, std
import pandas as pd
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings("ignore")   


def get_confidence(array):
    alpha = 0.95
    m = mean(array)  # general average
    sigma = std(array)  # standard deviation 

    # confidence interval
    print('mean accuracy: ', m)
    print('standart deviation: ', sigma)
    print('confidence interval: (', m - alpha * sigma, ';', m + alpha * sigma, ')')


form_layout = Layout(
    display='flex',
    flex_flow='column',
    align_items='stretch',
    width='50%'
)

form_item_layout = Layout(
    display='flex',
    flex_flow='row',
    justify_content='space-between'
)


# loops = IntSlider(min=1, max=10)
data = Dropdown(options={'students' : '/media/marina/hdd/diploma/data/c++/data/', 
                           'GoogleCodeJam' : '/media/marina/hdd/diploma/data/c++/data3/', 
                           'GitHub' : './data'})
classifier =  Dropdown(options={'RandomForest' : 'RandomForestClassifier', 
                                'GradientBoosting' : 'GradientBoostingClassifier',
                                'ExtraTrees' : 'ExtraTreesClassifier',
                                'AdaBoost' : 'AdaBoostClassifier'})

In [3]:
# %load_ext autoreload
%reload_ext autoreload
%autoreload

from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go

init_notebook_mode(connected=True)

def make_metrics_bar(metrics, loops_num):
    
    trace0 = go.Bar(
        x=list(range(1, loops_num+1)),
        y=metrics['f1_score'],
        name='F1-score',
        marker=dict(
            color='rgb(136, 142, 150)'
        )
    )
    trace1 = go.Bar(
        x=list(range(1, loops_num+1)),
        y=metrics['precision'],
        name='Precision',
        marker=dict(
            color='rgb(204,204,204)',
        )
    )
    trace2 = go.Bar(
        x=list(range(1, loops_num+1)),
        y=metrics['recall'],
        name='Recall',
        marker=dict(
            color='rgb(144, 177, 229)',
        )
    )
    trace3 = go.Bar(
        x=list(range(1, loops_num+1)),
        y=metrics['accuracy'],
        name='Accuracy',
        marker=dict(
            color='rgb(49,130,189)',
        )
    )

    data = [trace0, trace1, trace2, trace3]
    layout = go.Layout(
        xaxis=dict(
            tickangle=-45,
            title='Number of experiment',
            titlefont=dict(
                size=16,
                color='rgb(107, 107, 107)'
            ),
            tickfont=dict(
                size=14,
                color='rgb(107, 107, 107)'
            )
        ),
        yaxis=dict(
            title='Value, %',
            titlefont=dict(
                size=16,
                color='rgb(107, 107, 107)'
            ),
            tickfont=dict(
                size=14,
                color='rgb(107, 107, 107)'
            )
        ),
        barmode='group',
        title='Classification metrics',
    )

    fig = go.Figure(data=data, layout=layout)
    iplot(fig, filename='metrics-bar')
    

def make_pie(mean_accuracy):
    fig = {
      "data": [
        {
          "values": [1-mean_accuracy, mean_accuracy],
          "labels": ['Wrong predicted samples, %', 'True predictes samples, %'],
          "type": "pie",
          "text": "Accuracy",
          "textposition":"inside",
          "hole": .4,
    #       "domain": {"x": [.52, 1]},
        }],
      "layout": {
            "title": 'Total mean accuracy',

            "annotations": [
                {
                    "font": {
                        "size": 20
                    },
                    "showarrow": False,
                    "text": "Accuracy",
                }
            ]
        }
    }
    iplot(fig)

In [4]:
# %load_ext autoreload
%reload_ext autoreload
%autoreload

import numpy as np
import time

loops = IntSlider(min=1, max=10)

form_items = [
    Box([Label(value='Loops:'), loops], layout=form_item_layout),
    Box([Label(value='Data:'), data], layout=form_item_layout),
    Box([Label(value='Classifier:'), classifier], layout=form_item_layout),
]


def classify_mul(b):
    clear_output()
    accuracy, precision, recall, f1_score = [],[],[],[]
    start_time = time.time()
    for loop in range(loops.value):
        print('Loop ', loop + 1, ': Please, wait...')
        report = classify_authors(data.value, classifier.value)
        df = pd.DataFrame(report)
        accuracy.append(mean(df['accuracy'].tolist()))
        precision.append(mean(df['precision'].tolist()))
        recall.append(mean(df['recall'].tolist()))
        f1_score.append(mean(df['f1_score'].tolist()))

    run_time = round(time.time() - start_time, 2)
    print('Run time in sec: ', run_time)
    
    metrics = {'f1_score' : f1_score, 
               'precision' : precision, 
               'recall': recall,
               'accuracy' : accuracy
              }
    make_metrics_bar(metrics, loops.value)

    mean_accuracy = mean(accuracy)
    make_pie(mean_accuracy)
    
    # saving results to csv
    metrics.update({'classifier' : classifier.value})
    result_df = pd.DataFrame(metrics)
    result_df.to_csv('results.csv', mode='w')

   

form = Box(form_items, layout=form_layout)
classify_mul_btn = Button(description='Classify', tooltip='Click me', icon='check', button_style='success')
classify_mul_btn.on_click(classify_mul)

## main module

In [5]:
display(form, classify_mul_btn)

Loop  1 : Please, wait...
1973
1973
features: [ [-7.322774599895113, -7.322774599895113, 0, 0, -1.6522042594450257, 0, -3.5709203466197885, 0.219756999471738, 186, 35.54929577464789, -7.545918151209323, -7.545918151209323, -5.936480238775222, -5.936480238775222, -7.545918151209323, -5.936480238775222]
 [-5.7439642642890325, -5.849324779946859, -8.046549357283078, 0, -1.5437593113674548, 0, -3.5356898507662287, 0.24271533781620236, 75, 34.31868131868132, -8.046549357283078, -8.046549357283078, -5.7439642642890325, -5.967107815603243, -7.353402176723133, -5.648654084484708]
 [-4.482374608508164, -4.482374608508164, 0, 0, -2.0744289998562917, 0, -3.096080247388273, 0.1708542713567839, 27, 22.11111111111111, -6.679599185844383, -6.679599185844383, -6.679599185844383, 0, -6.679599185844383, -6.679599185844383]
 ...,
 [-7.423866907177365, -7.423866907177365, 0, 0, -2.6574285735931515, -3.2728270012787193, -3.140280345316736, 0.1512981199641898, 123, 23.110344827586207, -5.344425365497529, -5

ValueError: all the input arrays must have same number of dimensions