In [20]:
import os
import pandas as pd

# Folder containing the CSV files
results_folder = 'results'

# Load all CSV files containing 'eval_dev_test' into a dictionary of DataFrames
dataframes = {}
for file in os.listdir(results_folder):
    if 'eval_dev_test' in file and file.endswith('.csv'):
        file_path = os.path.join(results_folder, file)
        key = file.split('_', 1)[0]  # Extract substring before the first '_'
        df = pd.read_csv(file_path)
        dataframes[key] = df

# Now `dataframes` is a dictionary containing all the loaded DataFrames

In [21]:
for key, df in dataframes.items():
    # Calculate the fraction of correct predictions
    df['fraction_correct'] = df[['prediction0', 'prediction1', 'prediction2', 'prediction3', 'prediction4']].eq(df['class_label'], axis=0).mean(axis=1)

In [22]:
dataframes['mixtral'][10:20]

Unnamed: 0,Sentence_id,Text,class_label,prompt,prediction0,prediction1,prediction2,prediction3,prediction4,prediction,fraction_correct
10,37531,Why is it on the ballot?,No,### Instruction:\nRead the statement provided ...,No,No,No,No,No,No,1.0
11,37571,And we will protect people.,No,### Instruction:\nRead the statement provided ...,No,No,No,No,No,No,1.0
12,37589,I'll give you an example.,No,### Instruction:\nRead the statement provided ...,No,No,No,No,No,No,1.0
13,37593,Take a look at all of the drugs that what we'r...,No,### Instruction:\nRead the statement provided ...,No,No,No,No,No,No,1.0
14,37623,"On Super Tuesday, you got very lucky.",No,### Instruction:\nRead the statement provided ...,No,No,No,No,No,No,1.0
15,37628,And tonight I'm going to make sure.,No,### Instruction:\nRead the statement provided ...,No,No,No,No,No,No,1.0
16,37630,"Because here's the deal, here's the deal.",No,### Instruction:\nRead the statement provided ...,No,No,No,No,No,No,1.0
17,37631,The fact is that everything he's saying so far...,Yes,### Instruction:\nRead the statement provided ...,No,No,No,No,No,No,0.0
18,37643,"The wrong guy, the wrong night, at the wrong t...",No,### Instruction:\nRead the statement provided ...,No,No,No,No,No,No,1.0
19,37651,You agreed with Bernie Sanders on a plan that ...,Yes,### Instruction:\nRead the statement provided ...,Yes,Yes,Yes,Yes,Yes,Yes,1.0


In [23]:
# Create a new dataframe with only the first 3 columns of dataframes['gemma']
fractions = dataframes['gemma'].iloc[:, :3]
fractions

Unnamed: 0,Sentence_id,Text,class_label
0,37436,He's been a professor for a long time at a gre...,No
1,37440,There's no way they would give it up.,No
2,37463,They're able to charge women more for the same...,Yes
3,37470,"As far as a say is concerned, the people alrea...",No
4,37504,I am the Democratic Party right now.,No
...,...,...,...
313,40950,We had the best Black unemployment numbers in ...,Yes
314,40954,Success is going to bring us together.,No
315,40960,Your 401K's will go to hell and it'll be a ver...,No
316,40971,We're going to choose to move forward because ...,No


In [35]:
for key, df in dataframes.items():
    fractions[f'{key}_fractions_correct'] = df['fraction_correct']

In [39]:
# Number of complete misclassifications @5
below_one_counts = {
    'llama_non_perfect': (fractions['llama_fractions_correct'] < 1.0).sum().item(),
    'mixtral_non_perfect': (fractions['mixtral_fractions_correct'] < 1.0).sum().item(),
    'gemma_non_perfect': (fractions['gemma_fractions_correct'] < 1.0).sum().item()
}
below_one_counts

{'llama_non_perfect': 36, 'mixtral_non_perfect': 41, 'gemma_non_perfect': 37}

In [40]:
# Number of rows where at least n models have a misclassification
n = 1
filtered_rows = fractions[(fractions[['llama_fractions_correct', 'mixtral_fractions_correct', 'gemma_fractions_correct']] < 1).sum(axis=1) >= n]
filtered_rows

Unnamed: 0,Sentence_id,Text,class_label,llama_fractions_correct,mixtral_fractions_correct,gemma_fractions_correct
17,37631,The fact is that everything he's saying so far...,Yes,0.4,0.0,0.8
20,37664,He has never offered a plan.,Yes,1.0,1.0,0.6
37,37777,"I laid out back in March, exactly what we shou...",No,1.0,0.8,1.0
40,37802,"Many of your Democrat Governors said, \Preside...",Yes,1.0,0.4,0.2
53,37971,He wants to shut down the country.,Yes,0.0,0.0,0.0
57,38023,Dr. Fauci said the opposite.,Yes,1.0,0.6,1.0
58,38075,He's been totally irresponsible the way in whi...,Yes,0.8,0.8,0.2
64,38156,Because it costs a lot of money to open them s...,Yes,0.0,0.0,0.0
80,38294,"And plenty of companies that are already here,...",Yes,0.0,0.0,0.0
87,38510,"He came out of his bunker, had the military us...",Yes,0.8,1.0,1.0


In [41]:
filtered_rows.to_csv('results/filtered.csv', index=False)

In [None]:
import plotly.graph_objects as go
# Create a 3D scatter plot
fig = go.Figure(data=[go.Scatter3d(
    x=x,
    y=y,
    z=z,
    mode='markers',
    marker=dict(
        size=5,
        color='blue',
        opacity=0.8
    )
)])

fig.update_layout(height=500)
# Set labels
fig.update_layout(
    scene=dict(
        xaxis_title='Llama',
        yaxis_title='Mixtral',
        zaxis_title='Gemma'
    ),
    title='Interactive 3D Scatter Plot'
)

# Show the plot
fig.show()