In [1]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
import math
colors = ['#BB4430','#255F85', '#F08700', '#50514f']
shapes = ['dash', 'dashdot', 'dot', 'solid']
queries = ['Prediction Entropy','Breaking Ties', 'Least Confidence', 'Random Sampling']
color_dct = {query:color for query, color in zip(queries, colors)}
shape_dct = {query:shape for query, shape in zip(queries, shapes)}
data_sizes = {'ClaimDetection':'3k', 'NewsTopic':'7k', 'Claimbuster':'17k', 'Subjectivity':'7k', 'Hatespeech':'12k','Sentiment':'50k'}
emission_rate = 485 #gCO2/kWh Icha, P., Lauf, T., & Kuhs, G. (2022). Entwicklung der spezifischen Treibhausgas-Emissionen des deutschen Strommix in den Jahren 1990—2021. Umweltbundesamt. https://www.umweltbundesamt.de/publikationen/entwicklung-der-spezifischen-kohlendioxid-8
hour_4K = 1.3 #kWh https://www.borderstep.de/wp-content/uploads/2020/06/Videostreaming-2020.pdf
hour_4K *= emission_rate


In [102]:
def plot(df:pd.DataFrame, datasets:list, file_name:str, trainsplit:float = .7):
    df = df[df.Data.isin(datasets)]
    df = df[~((df.Data.isin(['AG_News', 'Yahoo'])) & (df.Percent > 5.1))]
    df = df[~((df.Data.isin(['imbd', 'PatientReviews'])) & (df.Percent > 7.1))]
    fig = make_subplots(rows=2, 
                    cols=df.Data.unique().shape[0],
                    specs=[[{} for x in range(df.Data.unique().shape[0])],[{} for x in range(df.Data.unique().shape[0])]],
                    horizontal_spacing=.05,
                    vertical_spacing=.05,
                    subplot_titles=[f"{name} (n={int(data_group.Dataset_size.to_list()[0] * trainsplit)})" for name, data_group in df.groupby('Data') if name in datasets]
                   )
    fig.update_layout(template='plotly_white')
    y_title = True
    showlegend = True
    shapes = []
    add_shape = True
    for idx, (name, data_group) in enumerate(df.groupby('Data')):
        try:
            baseline_df = pd.read_csv('PATH/ActiveVisualsBaseline.csv')
            baseline_df = baseline_df[baseline_df.Data == name]
            f1_baseline = baseline_df[baseline_df.Data == name].F1.values[0]
            emissions_baseline = baseline_df[baseline_df.Data == name].Train_energy.values[0] * emission_rate
            add_shape = True
        except Exception as e:
            add_shape = False
        for query, query_group in data_group.groupby('Query'):
            fig.add_trace(go.Scatter(x=query_group.Percent, y=query_group.F1, name=query, showlegend=showlegend,
                        line = dict(color=color_dct[query], width=2, dash=shape_dct[query]), 
                        marker=dict(size=0, opacity=0)), row=1, col=idx+1)
            fig.add_trace(go.Scatter(x=query_group.Percent, y=query_group['Emissions (gr)'].cumsum(), name=query, showlegend=False,
                        line = dict(color=color_dct[query], width=2, dash=shape_dct[query]), 
                        marker=dict(size=0, opacity=0)), row=2, col=idx+1)
            if add_shape:
                shapes.append(dict(type="line", xref=f"x{idx+1}", yref=f"y{idx+1}",
                                x0=1, y0=f1_baseline, 
                                x1=query_group.Percent.max(), y1=f1_baseline, 
                                line_color="gray", 
                                line_width=1, opacity=1, 
                                line_dash="dash"))
                shapes.append(dict(type="line", xref=f"x{idx+1+len(df.groupby('Data'))}", yref=f"y{idx+1+len(df.groupby('Data'))}",
                                x0=1, y0=emissions_baseline, 
                                x1=query_group.Percent.max(), y1=emissions_baseline, 
                                line_color="gray", 
                                line_width=1, opacity=1, 
                                line_dash="dash"))
            upper_bound = min(1, query_group.F1.max()+.3)
            lower_bound = query_group.F1.min()
            range_ = max(math.ceil((upper_bound-lower_bound)/.05) + 1, 10)
            fig.update_xaxes(nticks=10, showticklabels=False,row=1, col=idx+1)
            fig.update_xaxes(title_text='% of Training Set', title_font_color ='black', nticks=10, row=2, col=idx+1)
            if y_title:
                fig.update_yaxes(title_text='F1', title_font_color ='black', range=[lower_bound,upper_bound],nticks=range_, row=1, col=idx+1)
                fig.update_yaxes(title_text='Emissions (gr)', title_font_color ='black', nticks=10, row=2, col=idx+1)
                y_title = False
            else:
                fig.update_yaxes(range=[lower_bound,upper_bound],nticks=range_, row=1, col=idx+1)
                fig.update_yaxes(nticks=10, row=2, col=idx+1)
        fig.add_annotation(x=max(2.3,query_group.Percent.max()//2), y=f1_baseline+(upper_bound-lower_bound)/30,
        text="Passive Performance Baseline",
        xref=f"x{idx+1}", yref=f"y{idx+1}",
        showarrow=False,
        textangle=0,
        align="left",
        font=dict(size=12, color="black"))
        fig.add_annotation(x=max(2.8,query_group.Percent.max()//2), y=emissions_baseline+(upper_bound-lower_bound)*1.3,
        text="Passive Emission Baseline",
        xref=f"x{idx+1+len(df.groupby('Data'))}", yref=f"y{idx+1+len(df.groupby('Data'))}",
        showarrow=False,
        textangle=0,
        align="left",
        font=dict(size=12, color="black"))
        showlegend = False
    fig['layout'].update(shapes=shapes)      
    fig.update_layout(font_color='black')    
    pio.write_image(fig, f'PATH/PNG/Active_Vision_{file_name}.png',scale=10, width=1080, height=650)
    pio.write_image(fig, f'PATH/SVG/Active_Vision_{file_name}.svg',scale=10, width=1080, height=650)
    pio.write_image(fig, f'PATH/JPG/Active_Vision_{file_name}.jpg',scale=10, width=1080, height=650)
    pio.write_image(fig, f'PATH/PDF/Active_Vision_{file_name}.pdf',scale=10, width=1080, height=650)
    fig.write_html(f"PATH/HTML/Active_Vision_{file_name}.html")
    fig.show()
    
    
df = pd.read_csv('/home/sami/READER_REPO/Stats/Data/ActiveLearner/ActiveVisuals.csv')
datasets = ['Liar', 'MedicalAbstracts', 'Go_Emotions', 'imdb', 'Cola','PatientReviews',  'Twitter_Sentiment', 'AG_News','Yahoo'] #
datasets = ['AG_News', 'NewsTopic', 'Claimbuster',]
name = 'Positives'
plot(df = df, datasets=datasets, file_name=name)


In [83]:
stats = pd.read_csv('PATH/for_plotting.csv').sort_values('Size')
stats['colors'] = stats['Query'].apply(lambda x: colors[0] if x != 'Breaking Ties' else colors[1])

fig = make_subplots(rows=2, 
                    cols=2,
                    specs=[[{}, {}],[{} ,{}]],
                    horizontal_spacing=.1,
                    vertical_spacing=.1,
                    subplot_titles=['Passive Baseline', 'Random Baseline', 'Active Training Emissions', 'Active Query Emissions']
                   )
fig.update_layout(template='plotly_white', font_color='black')
fig.add_trace(go.Scatter(x=stats.Data, y=stats['Max Difference'],
                    mode='lines+markers+text', showlegend=False,
                    text=[f"{int(percent)}%" for percent in stats['Max Difference'].to_list()],
                    textposition='bottom left',
                    line = dict(color='black', width=1),
                    name='lines'), row=1, col=1)
fig.add_trace(go.Scatter(x=stats.Data, y=stats['Random Difference'],
                    mode='lines+markers+text', showlegend=False,
                    text=[f"{int(percent)}%" for percent in stats['Random Difference'].to_list()],
                     textposition=['bottom center'] + ['top center'] * (len(stats)-1),
                    line = dict(color='black', width=1),
                    name='lines'), row=1, col=2)
fig.add_trace(go.Scatter(x=stats.Data, y=stats['Max Training Factor'],
                    mode='lines+markers', showlegend=False,
                    line = dict(color='black', width=1),
                    name='lines'), row=2, col=1)
fig.add_trace(go.Scatter(x=stats.Data, y=stats['Max Query Factor'],
                    mode='lines+markers', showlegend=False,
                    line = dict(color='black', width=1),
                    name='lines'), row=2, col=2)

fig.add_trace(go.Bar(x=stats.Data, y=stats['Percent'],
                     #text=stats['Percent'].round(1),
                    showlegend=False, marker_color=stats.colors), row=1, col=1)
fig.add_trace(go.Bar(x=stats.Data, y=stats['Percent Difference'],
                    # text=stats['Percent Difference'].round(1),
                    showlegend=False, marker_color=stats.colors), row=1, col=2)


fig.add_shape(
    type="line", line_color="gray", line_width=1, opacity=.8, line_dash="solid",
    x0=0, x1=len(stats)-1, y0=0, y1=0, xref='x1', yref='y1'
    )
fig.add_shape(
    type="line", line_color="gray", line_width=1, opacity=.8, line_dash="solid",
    x0=0, x1=len(stats)-1, y0=0, y1=0, xref='x2', yref='y2'
    )
fig.add_shape(
    type="line", line_color="gray", line_width=1, opacity=.8, line_dash="solid",
    x0=0, x1=len(stats)-1, y0=1, y1=1, xref='x3', yref='y3'
    )
fig.add_shape(
    type="line", line_color="gray", line_width=1, opacity=.8, line_dash="solid",
    x0=0, x1=len(stats)-1, y0=1, y1=1, xref='x4', yref='y4'
    )

fig.add_annotation(x=2.5, y=1.05,
            text="Passive Training Emissions",
            xref='x3',
            yref='y3',
            showarrow=False,
            textangle=0,
            align="left",
            font=dict(size=12, color="black"))
fig.add_annotation(x=9, y=1.1,
            text="Passive Training Emissions",
            xref='x4',
            yref='y4',
            showarrow=False,
            textangle=0,
            align="left",
            font=dict(size=12, color="black"))

fig.update_xaxes(showticklabels=False, row=1, col=1)
fig.update_xaxes(showticklabels=False, row=1, col=2)
fig.update_yaxes(title_text='F1 & Trainset Percentage', row=1, col=1)
fig.update_yaxes(title_text='Emission Factor', row=2, col=1)

pio.write_image(fig, f'PATH/PNG/Active_Summary.png',scale=10, width=1080, height=650)
pio.write_image(fig, f'PATH/SVG/Active_Summary.svg',scale=10, width=1080, height=650)
pio.write_image(fig, f'PATH/JPG/Active_Summary.jpg',scale=10, width=1080, height=650)
pio.write_image(fig, f'PATH/PDF/Active_Summary.pdf',scale=10, width=1080, height=650)
fig.write_html(f"PATH/HTML/Active_Summary.html")
fig.show()  

In [4]:
def param_tuning(path:str = 'PATH/ParamTweaking.csv'):
       colors = ['#BB4430','#255F85', '#F08700','#0091AD', '#6F7C12']
       shapes = ['circle', 'square', 'star-diamond', 'hexagram', 'star-triangle-up']
       params = ['Original','Step 100', 'Step 250', 'Pool 50%', 'Pool 70%']
       param_dict = {orig:modi for orig, modi in zip(['Step50', 'Step100', 'Step250', 'Pool50', 'Pool70'], params)}
       color_dct = {param:color for param, color in zip(params, colors)}
       shape_dct = {param:shape for param, shape in zip(params, shapes)}
       df = pd.read_csv(path)
       df.Parameter = df.Parameter.apply(lambda x: param_dict[x])
       df['Color'] = df.Parameter.apply(lambda x: color_dct[x])
       df['Shape'] = df.Parameter.apply(lambda x: shape_dct[x])

       datasets = ['NewsTopic', 'Claimbuster', 'AG_News']
       fig = make_subplots(rows=1, 
                     cols=3,
                     specs=[[{}, {}, {}]],
                     subplot_titles=['NewsTopic', 'Claimbuster', 'AG News']
                     )
       fig.update_layout(template='plotly_white', font_color='black')


       for idx, dataset in enumerate(datasets):
              temp = df[df.Data == dataset]
              f1_baseline = temp.loc[:,'Baseline_F1'].to_list()[0]
              em_baseline = temp.loc[:,'Baseline_Emissions'].to_list()[0]
              em_min, em_max = temp['Emissions (gr)'].min(), temp['Emissions (gr)'].max()
              fig.add_trace(go.Scatter(x=temp['Emissions (gr)'], 
                                   y=temp['F1'],
                                   mode='markers',
                                   opacity=.8,
                                   showlegend=False,
                                   marker=dict(
                                          size = 15,
                                          line=dict(width=2, color="black"),
                                          color=temp.Color,
                                          symbol=temp.Shape)), 
                            row=1, col=idx+1)
              fig.add_trace(go.Scatter(x=temp['Random_Emissions (gr)'], 
                                   y=temp['Random_F1'],
                                   mode='markers',
                                   opacity=.8,
                                   showlegend=False,
                                   marker=dict(
                                          size = 15,
                                          line=dict(width=2, color="black"),
                                          color=temp.Color,
                                          symbol=[shape + '-open' for shape in temp.Shape])), 
                            row=1, col=idx+1)
              fig.add_shape(
                     type="line", line_color="gray", line_width=1, opacity=1, line_dash="dash",
                     x0=0, x1=em_max, y0=f1_baseline, y1=f1_baseline, xref=f'x{idx+1}', yref=f'y{idx+1}'
                     )
              fig.add_shape(
                     type="line", line_color="gray", line_width=1, opacity=1, line_dash="dash",
                     x0=em_baseline, x1=em_baseline, y0=0.47, y1=.98, xref=f'x{idx+1}', yref=f'y{idx+1}'
                     )
              fig.add_annotation(x=temp['Emissions (gr)'].mean(), y=f1_baseline+.01,
              text="Passive Performance Baseline",
              xref=f'x{idx+1}',
              yref=f'y{idx+1}',
              showarrow=False,
              textangle=0,
              align="left",
              font=dict(size=12, color="black"))
              fig.add_annotation(x=em_baseline+(em_baseline*.1), y=.65,
              text="Passive Emission Baseline",
              xref=f'x{idx+1}',
              yref=f'y{idx+1}',
              showarrow=False,
              textangle=90,
              align="left",
              font=dict(size=12, color="black"))
              
              fig.update_yaxes(range=[0.45,1],nticks=21, row=1, col=idx+1)
              fig.update_xaxes(range=[0,em_max+(em_max*.05)],nticks=5, row=1, col=idx+1)
              if idx > 0:
                     fig.update_yaxes(showticklabels=False, row=1, col=idx+1)
              
       # Add Legend
       for idx, row in df.groupby('Parameter').sample(n=1).iterrows():
              fig.add_trace(go.Scatter(
                            x=[None],
                            y=[None],
                            mode="markers",
                            name=row.Parameter,
                            marker=dict(size=10, color=row.Color, symbol=row.Shape),
                     ), row=1, col=1)
       fig.add_trace(go.Scatter(
                            x=[None],
                            y=[None],
                            mode="markers",
                            name='Active',
                            marker=dict(size=10, color='black', symbol='circle'),
                     ), row=1, col=1)
       fig.add_trace(go.Scatter(
                            x=[None],
                            y=[None],
                            mode="markers",
                            name='Random',
                            marker=dict(size=10, color='black', symbol='circle-open'),
                     ), row=1, col=1)

       fig.update_yaxes(title = 'F1', row=1, col=1)
       fig.update_xaxes(title = 'Emissions (gr)', row=1, col=2)
       pio.write_image(fig, f'PATH/PNG/Active_Params.png',scale=10, width=1080, height=650)
       pio.write_image(fig, f'PATH/SVG/Active_Params.svg',scale=10, width=1080, height=650)
       pio.write_image(fig, f'PATH/JPG/Active_Params.jpg',scale=10, width=1080, height=650)
       pio.write_image(fig, f'PATH/PDF/Active_Params.pdf',scale=10, width=1080, height=650)
       fig.write_html(f"PATH/HTML/Active_Params.html")
       fig.show()
param_tuning()