In [14]:
import json
import pandas as pd

# Read in a JSON file and transform it into a DataFrame
filename = '26306-motive-analysis.json'

with open(filename, 'r') as file:
    data = json.load(file)
df = pd.json_normalize(data['results'])
print(df.head())

   chunk_id                                            passage motive_yn  \
0         0  ﻿The Project Gutenberg eBook of Simon\n    \nT...         n   
1         1  The Yarn                                      ...         y   
2         2  Even as they were slowing down, he fell\non hi...         n   
3         3  "It doesn't seem to have changed much," said t...         y   
4         4  Professionally, he was lawyer to several of th...         n   

                                       motive               character  \
0                                                                       
1  singularly strong inclination for solitude  the solitary passenger   
2                                                                       
3                           very thoughtfully              Mr. Rattar   
4                                                                       

                                            behavior  
0                                                

In [15]:
# print keys in data
print(data.keys())
print(data['model_used'])

dict_keys(['source_chunks_file', 'model_used', 'total_chunks', 'motive_chunks', 'results'])
gpt-oss:20b


In [7]:
# add two new columns to df ('is_motive' and 'is_useful')
# they can all be initialed to empty string
df['is_motive'] = ''
df['is_useful'] = ''

# save df to a csv file
outfilename = filename.replace('.json', '.csv')
df.to_csv(outfilename, index=False)

In [8]:
df.head()

Unnamed: 0,chunk_id,passage,motive_yn,motive,character,behavior,is_motive,is_useful
0,0,﻿The Project Gutenberg eBook of Simon\n \nT...,n,,,,,
1,1,The Yarn ...,y,singularly strong inclination for solitude,the solitary passenger,He moved to an empty carriage and chose a comp...,,
2,2,"Even as they were slowing down, he fell\non hi...",n,,,,,
3,3,"""It doesn't seem to have changed much,"" said t...",y,very thoughtfully,Mr. Rattar,He stood for a minute or two at the window gaz...,,
4,4,"Professionally, he was lawyer to several of th...",n,,,,,


In [None]:
# Read in both '26306-motive-analysis.csv' and 'qwen26306-motive-analysis.csv'

# for each file, count the number of ys in three columns: 'motive_yn', 'is_motive', 'is_useful'
df1 = pd.read_csv('26306-motive-analysis.csv')
df2 = pd.read_csv('qwen26306-motive-analysis.csv')

# limit each file to its first 16 rows
# df1 = df1.head(16)
# df2 = df2.head(16)

print(df1[['motive_yn', 'is_motive', 'is_useful']].apply(pd.Series.value_counts))
print(df2[['motive_yn', 'is_motive', 'is_useful']].apply(pd.Series.value_counts))

# save df1 as 'gptoss-26306a-motive-analysis.csv'
# # df1.to_csv('gptoss-26306a-motive-analysis.csv', index=False)
# save df2 as 'qwen-26306a-motive-analysis.csv'
# df2.to_csv('qwen-26306a-motive-analysis.csv', index=False)

   motive_yn  is_motive  is_useful
n         25         10         16
y         25         15          9
   motive_yn  is_motive  is_useful
n         26         19         22
y         24          5          2


In [19]:
#Run a chi-square test on is_motive value_counts in both files
# to determine if the y/n distribution is significantly different between the two models
from scipy.stats import chi2_contingency
contingency_table = pd.DataFrame({
    'gptoss': df1['is_motive'].value_counts(),
    'qwen': df2['is_motive'].value_counts()
}).fillna(0)
chi2, p, dof, ex = chi2_contingency(contingency_table)
print(f'Chi-square test p-value: {p}')
# if p < 0.05, then the distributions are significantly different
if p < 0.05:
    print('The distributions are significantly different.')
else:
    print('The distributions are not significantly different.')
# print the contingency table
print(contingency_table)

Chi-square test p-value: 0.012496088398200007
The distributions are significantly different.
           gptoss  qwen
is_motive              
n              10    19
y              15     5


In [20]:
# run the same chi-square test on is_useful value counts in both files
contingency_table_useful = pd.DataFrame({
    'gptoss': df1['is_useful'].value_counts(),
    'qwen': df2['is_useful'].value_counts()
}).fillna(0)
chi2_useful, p_useful, dof_useful, ex_useful = chi2_contingency(contingency_table_useful)
print(f'Chi-square test p-value for is_useful: {p_useful}')
# if p < 0.05, then the distributions are significantly different
if p_useful < 0.05:
    print('The distributions for is_useful are significantly different.')
else:
    print('The distributions for is_useful are not significantly different.')
# print the contingency table
print(contingency_table_useful)

Chi-square test p-value for is_useful: 0.047946986604986114
The distributions for is_useful are significantly different.
           gptoss  qwen
is_useful              
n              16    22
y               9     2


In [21]:
# comparing the motive_yn column in both files
# we ask: do they have y and n in the same places?
# this is not a question about value counts, or a
# chi-square test, but rather a direct comparison of the two columns
comparison = df1['motive_yn'] == df2['motive_yn']
print(comparison.value_counts())

motive_yn
True     33
False    17
Name: count, dtype: int64
