# Load required pakages

In [1]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default = 'browser'
import pandas as pd
import plotly.express as px
import plotly, os,joblib
import numpy as np

# Define required functions

In [2]:
#write perfomance score in csv file
def writeData(data,filename):
    data["Accuracy"] = 100 * data["Accuracy"]
    data["Balanced Acc"] = 100 * data["Balanced Acc"]
    data["F1"] = 100 * data["F1"]
    data["Recall"] = 100 * data["Recall"]
    data["Precision"] = 100 * data["Precision"]
    data["Avg precision"] = 100 * data["Avg precision"]
    data["roc_auc"] = 100 * data["roc_auc"]
    data['model'] = data['model'].str.replace('__','-')
    
    column_names=['model','F1','Accuracy','Balanced Acc', 'Recall','Precision','Avg precision','roc_auc']
    
    data = data.reindex(columns=column_names)
    data.to_csv(filename+'.csv', index = False, header = True)

#prepare input for error/variance plot
def errorPlot_input(results):
    data = pd.DataFrame({'Accuracy': results["Acc"],
               'Balanced Acc': results["Bal_acc"],
                'F1': results["F1"],
                'Recall': results["recall"],
                'Precision': results["precision"],
               'Avg precision': results["average_precision"],
                'roc_auc': results["roc_auc"],
              "model":results["model"]}, index=results["model"])
    
    
    data= data.sort_values('F1')
    
    data["Accuracy"] = 100 * data["Accuracy"]
    data["Balanced Acc"] = 100 * data["Balanced Acc"]
    data["F1"] = 100 * data["F1"]
    data["Recall"] = 100 * data["Recall"]
    data["Precision"] = 100 * data["Precision"]
    data["Avg precision"] = 100 * data["Avg precision"]
    data["roc_auc"] = 100 * data["roc_auc"]
    data['model'] = data['model'].str.replace('__','-')
    
    column_names=['model','F1','Accuracy','Balanced Acc', 'Recall','Precision','Avg precision','roc_auc']
    data = data.reindex(columns=column_names)
    return(data)

#write feature selcted by top 10 performing algorithms
def writeSelectedfeatures(top_10_algo):
    #read sel feature by all algo
    selFeat = pd.read_csv("sel_feat_whole_withoutDummy.csv", index_col=0)
    
    #sel features bs top 10 performing algo
    selFeat=selFeat[top_10_algo]
    
    #take intersection
    a=selFeat.to_dict('list')
    for key in a.keys():
        a[key]=set(a[key])
    common=list(set.intersection(*a.values()))
    
    #write 
    with open('top10_common_sel_feat_whole_withoutDummy.txt','w+') as f:
        for i in common:
            f.write('%s\n'%i)
        
# generate error/variance plots for top 10 algorithms performance 
def errorPlot(results_whole,nested_results_whole):
   
    fig = make_subplots(rows=1, cols=2, 
                        y_title='Standard Deviation in Models Performance (%)',
                        x_title='Metrics',
                        subplot_titles=("k-fold CV", "Nested CV"))
    
    ########################
    #using k-fold cv results
    ########################
    
    df=errorPlot_input(results_whole)
    df=df.iloc[26:,]
    top_10_algo=df.index.tolist()
    writeSelectedfeatures(top_10_algo)



    x_data = list(df.columns)[1:]
    y_data = [round(num,1) for num in list(df.mean(axis=0))]
    err_y_data = list(df.std(axis=0))

    fig.add_trace(go.Scatter(x=x_data, y=y_data,
                                text=np.round(y_data, 1),
                                mode='markers+text',
                                textposition='top right',
                                marker=dict(color="#27B4BB", size=8),
                                showlegend=False
                ), row=1, col=1)
    
    for i, bar in enumerate(err_y_data):
        fig.add_trace(go.Scatter(
                        x=[x_data[i]],
                        y=[y_data[i]],
                       # text=np.round(y_data, 1),
                        mode='markers+text',
                        textposition='top center',
                        error_y=dict(
                            type='data',
                            color = "black",
                            array=[bar],
                            visible=True),
                        marker=dict(color='rgba(0,0,0,0)', size=8),
                        showlegend=False
                    ), row=1, col=1)
    
    
    #using Nested cv results
    df=errorPlot_input(nested_results_whole)
    df=df.loc[top_10_algo]

    x_data = list(df.columns)[1:]
    y_data = [round(num,1) for num in list(df.mean(axis=0))]
    err_y_data = list(df.std(axis=0))
    
    fig.add_trace(go.Scatter(x=x_data, y=y_data,
                           text=np.round(y_data, 1),
                           mode='markers+text',
                           textposition='top right',
                           marker=dict(color="#B56621", size=8),
                           showlegend=False
           ), row=1, col=2)
    
    for i, bar in enumerate(err_y_data):
        fig.add_trace(go.Scatter(
                        x=[x_data[i]],
                        y=[y_data[i]],
                        mode='markers+text',
                        textposition='top center',
                        error_y=dict(
                            type='data',
                            color = "black",
                            array=[bar],
                            visible=True),
                        marker=dict(color='rgba(0,0,0,0)', size=8),
                        showlegend=False
                    ), row=1, col=2)
        

         
    fig.update_layout(template="plotly_white",font=dict(family="Arial",size=7), margin=dict(r=5,l=60))
    fig.write_image("stdPlot.png",engine="kaleido", scale =20.0)


def generatePlot(plotName,rows,cols,data,width,height,vertical_spacing,horizontal_spacing,
                 title_font,marker_size,label_size,tick_size,plot_title,line_color):
    df_sorted=data
    
    specs=[]
    for row in range(1,rows+1):
        a=[]
        for col in range(1,cols+1):
            a.append({"type": "polar"})
        specs.append(a)
            
    fig = make_subplots(rows=rows, cols=cols,vertical_spacing=vertical_spacing,
                        subplot_titles=[i.replace('__', '-') for i in df_sorted.index.tolist()],
                        horizontal_spacing=horizontal_spacing,specs=specs)
    
    row=1     
    col=1
    for model in df_sorted.index:
        name=[]
        value=[]
    
        model_score=df_sorted.loc[model]
        for score in model_score.index:
            if score=="model":
                continue
            name.append(score)
            value.append(model_score.loc[score]*100)
        
        fig_tem=go.Scatterpolar(r=value,name=model,dtheta=20,
                              theta=name,fill='toself',
                              line_color=line_color)     
        fig.add_trace(fig_tem,
                  row=row, col=col)

        
        if col ==cols:
            col=1
            row+=1
        else:
            col+=1
        
    
    for i in fig['layout']['annotations']:
        i['font'] = dict(size=title_font,family="Arial",color='black')
        i['borderpad'] =5
    
    fig.update_layout(width=width,height=height,font_size =label_size,template="plotly_white",
                      font_family="Arial",
            
                      showlegend=False,margin=dict(t=30, b=15, r=10, l=15,))
    fig.update_layout(title={'text':plot_title,'y':0.995,'x':0.5,
                            'xanchor': 'center','yanchor': 'top',
                            "font_family":"Arial","font_size":10})

    fig.update_polars(radialaxis=dict(
                          visible=True,nticks=7,
                          angle=90,
                          range=[30, 100],
                          tickfont=dict(size=tick_size)
                        ),
        angularaxis = dict(showticklabels=False, ticks='', linewidth = 0.2,showline=True,linecolor='black'))
    
    #fig.update_traces(marker=dict(size=6,line_color="black",color=px.colors.qualitative.Set1), selector=dict(type='scatterpolar'))
    fig.update_traces(marker=dict(size=marker_size,line_color="black",color=px.colors.sequential.Viridis), selector=dict(type='scatterpolar'))

    fig.update_polars(angularaxis = dict(showticklabels=True))
    
    #for i in range(1,7):
     #   fig.update_polars(angularaxis = dict(showticklabels=True),row=1,col=i)

    #for i in range(30,36):
        #fig.layout.annotations[i].update(y=0.13)
        
    fig.write_image(plotName+".png",engine="kaleido", scale =20.0)
    
def getSunburstPlot(results,cvName,plot_title,line_color):
    df = pd.DataFrame({'Accuracy': results["Acc"],
                   'Balanced Acc': results["Bal_acc"],
                    'F1': results["F1"],
                    'Recall': results["recall"],
                    'Precision': results["precision"],
                   'Avg precision': results["average_precision"],
                    'roc_auc': results["roc_auc"],
                  "model":results["model"]}, index=results["model"])

    df_sorted= df.sort_values('F1')
    generatePlot(cvName,6,6,df_sorted,850,600,0.08,0.045,10,4,7,7,plot_title,line_color)
    writeData(df_sorted,cvName)
#plotName,rows,cols,data,width,height,vertical_spacing,
#horizontal_spacing,title_font,marker_size,label_size,tick_size ,plot_title    
    


def getSunburstPlot_subset(results,cvName,plot_title,line_color):
    df = pd.DataFrame({'Accuracy': results["Acc"],
                   'Balanced Acc': results["Bal_acc"],
                    'F1': results["F1"],
                    'Recall': results["recall"],
                    'Precision': results["precision"],
                   'Avg precision': results["average_precision"],
                    'roc_auc': results["roc_auc"],
                  "model":results["model"]}, index=results["model"])

    df_sorted= df.sort_values('F1')
    df_sorted=df_sorted.iloc[[0,1,2,33,34,35]]
        
    return(generatePlot(cvName,3,2,df_sorted,275,300,0.2,0.07,9,3,7,7,plot_title,line_color))


# Generate plots and data files

In [3]:
    
                        ##############################
                        ####### Pain Vs Control ######
                        ##############################

os.chdir("pain_control_fs")
results_whole=joblib.load("results_whole.pickle","r")
nested_results_whole=joblib.load("nested_results_whole.pickle","r")

getSunburstPlot(results_whole,"result_whole","PC_KFCV","#B9E4E8")
getSunburstPlot(nested_results_whole,"nested_whole","PC_NCV","#8CBFAA")
getSunburstPlot_subset(results_whole,"result_whole_subset","PC_KFCV","#B9E4E8")
errorPlot(results_whole,nested_results_whole)



#                         ##############################
#                         ####### DO Vs Control ######
#                         ##############################

os.chdir("../do_control_fs")
results_whole=joblib.load("results_whole.pickle","r")
nested_results_whole=joblib.load("nested_results_whole.pickle","r")

getSunburstPlot(results_whole,"result_whole","DC_KFCV","#B9E4E8")
getSunburstPlot(nested_results_whole,"nested_whole","DC_NCV","#8CBFAA")
fig=getSunburstPlot_subset(results_whole,"result_whole_subset","DC_KFCV","#B9E4E8")
errorPlot(results_whole,nested_results_whole)


#                         ##############################
#                         ####### BPS Vs Do ######
#                         ##############################

os.chdir("../bps_do_fs")
results_whole=joblib.load("results_whole.pickle","r")
nested_results_whole=joblib.load("nested_results_whole.pickle","r")

getSunburstPlot(results_whole,"result_whole","BD_KFCV","#B9E4E8")
getSunburstPlot(nested_results_whole,"nested_whole","BD_NCV","#8CBFAA")
fig=getSunburstPlot_subset(results_whole,"result_whole_subset","BD_KFCV","#B9E4E8")
errorPlot(results_whole,nested_results_whole)


Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.


Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.


Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.


Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.



# Session Info

In [4]:
import session_info
session_info.show(html=False)

-----
ipykernel           5.3.4
joblib              1.0.1
numpy               1.20.3
pandas              1.3.2
plotly              5.2.1
session_info        1.0.0
-----
IPython             7.26.0
jupyter_client      6.1.12
jupyter_core        4.7.1
jupyterlab          3.1.7
notebook            6.4.2
-----
Python 3.9.6 (default, Jul 30 2021, 09:31:09) [Clang 10.0.0 ]
macOS-10.16-x86_64-i386-64bit
-----
Session information updated at 2022-02-09 16:06
