In [5]:
import pandas as pd
import plotly.express as px

### Code quality bar plots

In [20]:
# Create a dictionary with the data
data_training = {
    "Model": ["GPT4", "GPT4", "GPT4", "GPT4", "GPT4"],
    "Dataset": ["RTE_Quant", "RedditNLI", "NewsNLI", "AWPNLI", "StressTest"],
    "Compliant": [0.60, 0.68, 0.62, 0.98, 0.60],
    "Partially compliant": [0.28, 0.20, 0.18, 0.02, 0.34],
    "Non-compliant": [0.12, 0.12, 0.20, 0, 0.06]
}

data_training = {
    "Model": ["GPT4", "GPT4", "GPT4", "GPT4", "GPT4"],
    "Dataset": ["RTE_Quant", "RedditNLI", "NewsNLI", "AWPNLI", "StressTest"],
    "Compliant": [60, 68, 62, 98, 60],
    "Partially compliant": [28, 20, 18, 2, 34],
    "Non-compliant": [12, 12, 20, 0, 6]
}

data_finetuned_correct = {
    "Model": ["CL-I (13B)", "CL-I (13B)", "CL-I (13B)", "CL-I (13B)", "CL-I (13B)"],
    "Dataset": ["RTE_Quant", "RedditNLI", "NewsNLI", "AWPNLI", "StressTest"],
    "Compliant": [0.46, 0.72, 0.52, 0.92, 0.52],
    "Partially compliant": [0.40, 0.18, 0.28, 0.06, 0.36],
    "Non-compliant": [0.14, 0.09, 0.18, 0.02, 0.10]
}

# Create DataFrame
df = pd.DataFrame(data_training)

# Display DataFrame
print(df)

        Model     Dataset  Compliant  Partially compliant  Non-compliant
0  CL-I (13B)   RTE_Quant       0.46                 0.40           0.14
1  CL-I (13B)   RedditNLI       0.72                 0.19           0.09
2  CL-I (13B)     NewsNLI       0.52                 0.30           0.18
3  CL-I (13B)      AWPNLI       0.92                 0.06           0.02
4  CL-I (13B)  StressTest       0.54                 0.36           0.10


In [21]:
# Melt the DataFrame to long format
# df_melted = df.melt(id_vars=["Dataset"], var_name="Evaluation", value_name="Percentage")

# Rename the categories
# df_melted["Evaluation"] = df_melted["Evaluation"].replace({
#     "Pass": "Meets all evaluation criteria",
#     "Soft pass": "Meets most of evaluation criteria",
#     "Fail": "Meets few or none of the evaluation criteria"
# })

# Define colors for bars
colors = {"Compliant": "#90EE90", "Partially compliant": "#FFFFE0", "Non-compliant": "#FFCCCC"}

# Create stacked bar chart
fig = px.bar(df_melted, x="Dataset", y="Percentage", color="Evaluation", color_discrete_map=colors, barmode="stack",
             text="Percentage", title="Code quality for script obtained with CL-Instruct 13B",
             labels={"Percentage": "Percentage (%)", "Dataset": "Dataset", "Evaluation": "Evaluation"})

# Show percentages on bars
fig.update_traces(textposition='inside')
# Adjust font color to black
fig.update_layout(font=dict(color="black"))
# Set background color to white (or fully transparent)
fig.update_layout(plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)')
# Make grid lines disappear
fig.update_xaxes(showgrid=False, zeroline=False)
fig.update_yaxes(showgrid=False, zeroline=False)
# Show the plot
fig.show()


In [22]:
fig.write_image("finetuned_cli13b_code_quality_correct_samples.pdf")

### Balanced and zero-shot finetuning results

In [None]:
# Create a dictionary with the data
cli13_balanced = {
    "Model": ["CLI-13 (b)", "CLI-13 (b)", "CLI-13 (b)", "CLI-13 (b)", "CLI-13 (b)", "CLI-13 (b)"],
    "Dataset": ["RTE_Quant", "RedditNLI", "NewsNLI", "AWPNLI", "StressTest", "EQUATE"],
    "weighted F1": [65.18, 62.24, 47.92, 76.72, 36.41, 44.45]
}

normal_vs_balanced_res = [
    ["CLI-13B (balanced)", "RTE_Quant", 65.18],
    ["CLI-13B", "RTE_Quant", 72.60],
    ["Llama2-7B (balanced)", "RTE_Quant", 66.53],
    ["Llama2-7B", "RTE_Quant", 36.95],
    ["CLI-13B (balanced)", "RedditNLI", 62.24],
    ["CLI-13B", "RedditNLI", 67.83],
    ["Llama2-7B (balanced)", "RedditNLI", 58.82],
    ["Llama2-7B", "RedditNLI", 53.64],
    ["CLI-13B (balanced)", "NewsNLI", 47.92],
    ["CLI-13B", "NewsNLI", 72.69],
    ["Llama2-7B (balanced)", "NewsNLI", 40.55],
    ["Llama2-7B", "NewsNLI", 69.33],
    ["CLI-13B (balanced)", "AWPNLI", 76.72],
    ["CLI-13B", "AWPNLI", 95.78],
    ["Llama2-7B (balanced)", "AWPNLI", 86.86],
    ["Llama2-7B", "AWPNLI", 93.26],
    ["CLI-13B (balanced)", "StressTest", 36.41],
    ["CLI-13B", "StressTest", 96.96],
    ["Llama2-7B (balanced)", "StressTest", 48.81],
    ["Llama2-7B", "StressTest", 93.23],
    ["CLI-13B (balanced)", "EQUATE", 44.45],
    ["CLI-13B", "EQUATE", 91.97],
    ["Llama2-7B (balanced)", "EQUATE", 53.80],
    ["Llama2-7B", "EQUATE", 87.68],
]

normal_vs_balanced_df = pd.DataFrame(normal_vs_balanced_res, columns=["Model", "Dataset", "weighted-F1"])

In [None]:
normal_vs_balanced_df.head()

In [None]:
import plotly.express as px
import pandas as pd

# Create the bar chart
fig = px.bar(
    normal_vs_balanced_df,
    x='Dataset',
    y='weighted-F1',
    color='Model',
    barmode='group',
    title='Finetuning results for models trained on a balanced and an unbalanced dataset'
)

# Define custom colors for each model
custom_colors = {
    "CLI-13B": "#87CEEB",
    "Llama2-7B": "#FFA07A",
    "CLI-13B (balanced)": "#00008B",
    "Llama2-7B (balanced)": "#8B0000"
}

# Update the traces to apply custom colors
for trace in fig.data:
    if trace.name in custom_colors:
        trace.marker.color = custom_colors[trace.name]

# Update the layout to customize appearance
fig.update_layout(
    plot_bgcolor='white',
    paper_bgcolor='white',
    font=dict(color='black'),
    xaxis=dict(showgrid=True, gridcolor='white'),
    yaxis=dict(showgrid=True, gridcolor='white'),
        title=dict(
        text='Fine-tuning results using an imbalanced and a balanced training dataset',
        font=dict(color='black')
    ),
    xaxis_title=dict(font=dict(color='black')),
    yaxis_title=dict(font=dict(color='black')),
    legend=dict(
        title_font=dict(color='black'),
        font=dict(color='black'),
        orientation="h",
        yanchor="bottom",
        y=-0.5,  # Adjust this value to add top margin
        xanchor="center",
        x=0.5
    )
)

# Show the plot
fig.show()

fig.write_image("balanced_vs_unbalanced_barplot.pdf")

In [None]:
# Create the line chart using plotly
fig = px.line(
    normal_vs_balanced_df,
    x='Dataset',
    y='weighted-F1',
    color='Model',
    markers=True,
    # symbol='Dataset',
    title='Fine-tuned models trained on a balanced and an unbalanced dataset'
)

# Update the layout to customize appearance
fig.update_layout(
    plot_bgcolor='white',
    paper_bgcolor='white',
    font=dict(color='black'),
    xaxis=dict(showgrid=True, gridcolor='white'),
    yaxis=dict(showgrid=True, gridcolor='white'),
    title=dict(font=dict(color='black')),
    xaxis_title=dict(font=dict(color='black')),
    yaxis_title=dict(font=dict(color='black')),
    legend=dict(
        title_font=dict(color='black'),
        font=dict(color='black'),
        orientation="h",
        yanchor="bottom",
        y=-0.3,
        xanchor="center",
        x=0.5
    )
)

custom_colors = {
    "CLI-13B (balanced)": "#8B0000",  # Very dark red
    "CLI-13B": "#FFC0CB",             # Very light red
    "Llama2-7B (balanced)": "#00008B", # Very dark blue
    "Llama2-7B": "#ADD8E6"             # Very light blue
}

# Update the traces to apply custom colors
for trace in fig.data:
    trace.line.color = custom_colors[trace.name]

# Show the plot
fig.show()

In [None]:
fig.write_image("balanced_vs_unbalanced_finetuning_legend_below.pdf")

### ZERO-SHOT fine-tuning

In [None]:
zero_shot_vs_few_shot_res = [
    ["CLI-7B (base, FS)", "RTE_Quant", 63.21],
    ["CLI-7B (fine-tuned, FS)", "RTE_Quant", 70.00],
    ["CLI-7B (fine-tuned, ZS)", "RTE_Quant", 29.32],
    ["CLI-7B (base, FS)", "RedditNLI", 46.96],
    ["CLI-7B (fine-tuned, FS)", "RedditNLI", 64.21],
    ["CLI-7B (fine-tuned, ZS)", "RedditNLI", 43.60],
    ["CLI-7B (base, FS)", "NewsNLI", 56.25],
    ["CLI-7B (fine-tuned, FS)", "NewsNLI", 68.50],
    ["CLI-7B (fine-tuned, ZS)", "NewsNLI", 21.54],
    ["CLI-7B (base, FS)", "AWPNLI", 59.46],
    ["CLI-7B (fine-tuned, FS)", "AWPNLI", 94.40],
    ["CLI-7B (fine-tuned, ZS)", "AWPNLI", 42.55],
    ["CLI-7B (base, FS)", "StressTest", 31.96],
    ["CLI-7B (fine-tuned, FS)", "StressTest", 95.10],
    ["CLI-7B (fine-tuned, ZS)", "StressTest", 32.42],
    ["CLI-7B (base, FS)", "EQUATE", 40.25],
    ["CLI-7B (fine-tuned, FS)", "EQUATE", 89.76],
    ["CLI-7B (fine-tuned, ZS)", "EQUATE", 33.26],
    ["Random baseline", "RTE_Quant", 51.10],
    ["Random baseline", "RedditNLI", 45.45],
    ["Random baseline", "NewsNLI", 50.00],
    ["Random baseline", "AWPNLI", 50.00],
    ["Random baseline", "StressTest", 33.71],
    ["Random baseline", "EQUATE", 33.55],
]

random_baseline = [
    ["Random baseline", "RTE_Quant", 51.10],
    ["Random baseline", "RedditNLI", 45.45],
    ["Random baseline", "NewsNLI", 50.00],
    ["Random baseline", "AWPNLI", 50.00],
    ["Random baseline", "StressTest", 33.71],
    ["Random baseline", "EQUATE", 33.55],
]


random_baseline_df = pd.DataFrame(random_baseline, columns=["Model", "Dataset", "weighted-F1"])
zero_shot_vs_few_shot_df = pd.DataFrame(zero_shot_vs_few_shot_res, columns=["Model", "Dataset", "weighted-F1"])

In [None]:
zero_shot_vs_few_shot_df.head()

In [None]:
import plotly.graph_objects as go
import pandas as pd

# Define custom colors for each model
# Define custom colors for each model
custom_colors = {
    "CLI-7B (fine-tuned, FS)": "#ADD8E6",
    "CLI-7B (base, FS)": "#0000CD",
    "CLI-7B (fine-tuned, ZS)": "#00008B",
    "Random baseline": "gray"
}

# Create a figure
fig = go.Figure()

# Add bar traces
for model in zero_shot_vs_few_shot_df['Model'].unique():
    model_data = zero_shot_vs_few_shot_df[zero_shot_vs_few_shot_df['Model'] == model]
    fig.add_trace(
        go.Bar(
            x=model_data['Dataset'],
            y=model_data['weighted-F1'],
            name=model,
            marker_color=custom_colors[model]
        )
    )

# # Add line traces
# fig.add_trace(
#     go.Scatter(
#         x=random_baseline_df['Dataset'],
#         y=random_baseline_df['weighted-F1'],
#         mode='lines+markers',
#         name="Random baseline (line)",
#         line=dict(color=custom_colors["baseline"], dash='dash'),  # Add dash style here line=dict(color=custom_colors["baseline"]),
#         marker=dict(color=custom_colors["baseline"])
#     )
# )

# Update the layout to customize appearance
fig.update_layout(
    plot_bgcolor='white',
    paper_bgcolor='white',
    font=dict(color='black'),
    xaxis=dict(showgrid=True, gridcolor='white'),
    yaxis=dict(showgrid=True, gridcolor='white'),
        title=dict(
        text='Fine-tuning results with CodeLlama-Instruct 7B,<br>in zero-shot (ZS) and few-shot (FS) contexts',
        font=dict(color='black')
    ),
    xaxis_title=dict(font=dict(color='black')),
    yaxis_title=dict(font=dict(color='black')),
    legend=dict(
        title_font=dict(color='black'),
        font=dict(color='black'),
        orientation="h",
        yanchor="bottom",
        y=-0.5,  # Adjust this value to add top margin
        xanchor="center",
        x=0.5
    )
)

# Show the plot
fig.show()
fig.write_image("zs_vs_fz_results_barplot_without_line.pdf")

In [None]:
import plotly.express as px
import pandas as pd

# Create the bar chart
fig = px.bar(
    zero_shot_vs_few_shot_df,
    x='Dataset',
    y='weighted-F1',
    color='Model',
    barmode='group',
    title='Fine-tuning results with CodeLlama-Instruct 7B,<br>in zero-shot (ZS) and few-shot (FS) contexts'
)

# Define custom colors for each model
custom_colors = {
    "CLI-7B (fine-tuned, FS)": "#ADD8E6",
    "CLI-7B (base, FS)": "#0000CD",
    "CLI-7B (fine-tuned, ZS)": "#00008B",
    "Random baseline": "gray"
}

# Update the traces to apply custom colors
for trace in fig.data:
    if trace.name in custom_colors:
        trace.marker.color = custom_colors[trace.name]

# Update the layout to customize appearance
fig.update_layout(
    plot_bgcolor='white',
    paper_bgcolor='white',
    font=dict(color='black'),
    xaxis=dict(showgrid=True, gridcolor='white'),
    yaxis=dict(showgrid=True, gridcolor='white'),
    title=dict(font=dict(color='black')),
    xaxis_title=dict(font=dict(color='black')),
    yaxis_title=dict(font=dict(color='black')),
    legend=dict(
        title_font=dict(color='black'),
        font=dict(color='black'),
        orientation="h",
        yanchor="bottom",
        y=-0.5,  # Adjust this value to add top margin
        xanchor="center",
        x=0.5
    )
)

# Show the plot
fig.show()

fig.write_image("zs_vs_fs_barplot.pdf")

In [None]:
import plotly.express as px
import pandas as pd

# Create the bar chart
fig = px.bar(
    zero_shot_vs_few_shot_df,
    x='Model',
    y='weighted-F1',
    color='Dataset',
    barmode='group',
    title='Fine-tuning results with CodeLlama-Instruct 7B,<br>in zero-shot (ZS) and few-shot (FS) contexts'
)

# Define custom colors for each model
custom_colors = {
    "RTE_Quant": "#ADD8E6",  #"green",
    "RedditNLI": "#0000CD",  #"blue",
    "NewsNLI": "#00008B",  #"red",
    "StressTest": "#FFA07A",  #"orange",
    "AWPNLI": "#8B0000",  #"pink",
    "EQUATE": "green",
    # "Random baseline": "gray"
}

# Update the traces to apply custom colors
for trace in fig.data:
    if trace.name in custom_colors:
        trace.marker.color = custom_colors[trace.name]

# Update the layout to customize appearance
fig.update_layout(
    plot_bgcolor='white',
    paper_bgcolor='white',
    font=dict(color='black'),
    xaxis=dict(showgrid=True, gridcolor='white'),
    yaxis=dict(showgrid=True, gridcolor='white'),
    title=dict(font=dict(color='black')),
    xaxis_title=dict(font=dict(color='black')),
    yaxis_title=dict(font=dict(color='black')),
    legend=dict(
        title_font=dict(color='black'),
        font=dict(color='black'),
        orientation="h",
        yanchor="bottom",
        y=-0.5,  # Adjust this value to add top margin
        xanchor="center",
        x=0.5
    )
)

# Show the plot
fig.show()

In [None]:
# Create the line chart using plotly
fig = px.line(
    zero_shot_vs_few_shot_df,
    x='Dataset',
    y='weighted-F1',
    color='Model',
    markers=True,
    # symbol='Dataset',
    title='Fine-tuning results with CodeLlama-Instruct 7B,<br>in zero-shot (ZS) and few-shot (FS) contexts'
)

# Update the layout to customize appearance
fig.update_layout(
    plot_bgcolor='white',
    paper_bgcolor='white',
    font=dict(color='black'),
    xaxis=dict(showgrid=True, gridcolor='white'),
    yaxis=dict(showgrid=True, gridcolor='white'),
    title=dict(font=dict(color='black')),
    xaxis_title=dict(font=dict(color='black')),
    yaxis_title=dict(font=dict(color='black')),
    legend=dict(
        title_font=dict(color='black'),
        font=dict(color='black'),
        orientation="h",
        yanchor="bottom",
        y=-0.5,  # Adjust this value to add top margin
        xanchor="center",
        x=0.5
    )
)

custom_colors = {
    "CLI-7B (fine-tuned, FS)": "green",  # Very dark red
    "CLI-7B (base, FS)": "blue",             # Very light red
    "CLI-7B (fine-tuned, ZS)": "red", # Very dark blue
    "Random baseline": "gray"             # Very light blue
}

# Update the traces to apply custom colors
for trace in fig.data:
    trace.line.color = custom_colors[trace.name]

# Show the plot
fig.show()

In [None]:
fig.write_image("zero_shot_vs_few_show_results.pdf")