In [2]:
import json
import pandas as pd

# Load the JSON data
with open('results/ChatGPT/intermediate_results/chatgpt_results_non_hierarchical.json', 'r') as file:
    data = json.load(file)

# Initialize a list to store the results
results = []

# Iterate through each PUBID and its categories
for pubid, categories in data.items():
    # Initialize a list to store the disciplines with a final score above 0.75
    retained_disciplines = []
    highest_score = 0
    highest_discipline = None
    
    for discipline, score in categories.items():
        final_score = float(score)/ 100
        if final_score >= 0.75:
            retained_disciplines.append(discipline)
        # Update the highest score discipline if no discipline is above 0.75
        if final_score > highest_score:
            highest_score = final_score
            highest_discipline = discipline
                
    # If no discipline has a final score above 0.5, add the discipline with the highest score
    if not retained_disciplines and highest_discipline:
        retained_disciplines.append(highest_discipline)
    
    # Append the result
    results.append({'PUBID': pubid, 'chatgpt': retained_disciplines})

# Convert the results to a DataFrame
results_df = pd.DataFrame(results)

results_df

Unnamed: 0,PUBID,chatgpt
0,490689400001,"[agriculture, forestry, and fisheries, animal ..."
1,494013500001,[political science]
2,516167800001,"[chemical engineering, agriculture, forestry, ..."
3,524076900001,[languages and linguistics]
4,527207200001,"[civil engineering, mechanical engineering]"
...,...,...
186,603787200001,"[psychology, sociology]"
187,672107000001,[mathematics]
188,709228700001,[computer and information sciences]
189,634605700001,[archaeology]


In [3]:
results_df['prediction_length'] = results_df.chatgpt.apply(len)

In [4]:
results_df.prediction_length.value_counts()

prediction_length
1    116
2     33
3     27
5      9
4      6
Name: count, dtype: int64

In [6]:
import pandas as pd
test_data = pd.read_csv('H:/data/WoS_data/data_gpt10.csv', usecols=['PUBID', 'ABSTRACT', 'final_disciplines'], dtype=str)

In [7]:
test_data = test_data.merge(results_df, on='PUBID')

In [8]:
test_data['final_disciplines'] = test_data.final_disciplines.str.lower()
replace_dict = {
    'arts (arts, history of arts, performing arts, music)':'art (arts, history of arts, performing arts, music)'
}
test_data['chatgpt'] = test_data.chatgpt.apply(lambda x: list(set(replace_dict.get(y,y) for y in x)))
data = test_data

In [9]:
data['final_disciplines'] = data.final_disciplines.apply(lambda x: x.split(';'))

In [10]:
data['arts'] = data.chatgpt.apply(lambda x: any(el.startswith('art') for el in x))

In [11]:
data[data.arts]

Unnamed: 0,PUBID,ABSTRACT,final_disciplines,chatgpt,prediction_length,arts
4,500234900001,Glamour is usually theorized within fashion st...,"[literature, physical sciences, other humaniti...","[history, art (arts, history of arts, performi...",2,True
7,521535000001,This paper contributes to a theoretical discus...,"[other humanities, media and communications, a...","[art (arts, history of arts, performing arts, ...",1,True
80,683709800001,Museum staff strive to create environments ric...,"[educational sciences, media and communication...","[art (arts, history of arts, performing arts, ...",3,True
185,827047500001,This article explores how the passages in Isai...,[religion],"[history, religion, economics and business, ph...",5,True


In [12]:
from sklearn.preprocessing import MultiLabelBinarizer
true_labels = list(data.final_disciplines)
predicted_labels = list(data.chatgpt)
mlb = MultiLabelBinarizer()
true_label_binarized = mlb.fit_transform(true_labels)
predicted_labels_binarized = mlb.transform(predicted_labels)
from sklearn.metrics import classification_report
print(classification_report(true_label_binarized, predicted_labels_binarized, target_names=mlb.classes_))

                                                                         precision    recall  f1-score   support

                                   agriculture, forestry, and fisheries       0.35      0.80      0.48        10
                                               animal and dairy science       1.00      0.36      0.53        14
                                                            archaeology       0.70      0.58      0.64        12
                    art (arts, history of arts, performing arts, music)       0.50      0.14      0.22        14
                                                         basic medicine       0.00      0.00      0.00        17
                                                    biological sciences       0.83      0.47      0.60        32
                                                   chemical engineering       0.36      0.73      0.48        11
                                                      chemical sciences       0.00      0.00   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [13]:
from sklearn.metrics import accuracy_score
accuracy_score(true_label_binarized, predicted_labels_binarized)

0.02617801047120419

In [14]:
def filter_labels(labels_list):
    filtered_labels = []
    for labels in labels_list:
        filtered_labels.append([label for label in labels if not label.startswith('other')])
    return filtered_labels

# Filter both true and predicted labels
true_labels_filtered = filter_labels(list(data.final_disciplines))
predicted_labels_filtered = filter_labels(list(data.chatgpt))

# Initialize MultiLabelBinarizer
mlb = MultiLabelBinarizer()

# Transform labels
true_label_binarized = mlb.fit_transform(true_labels_filtered)
predicted_labels_binarized = mlb.transform(predicted_labels_filtered)

# Generate and print classification report
print(classification_report(true_label_binarized, predicted_labels_binarized, target_names=mlb.classes_))

                                                                         precision    recall  f1-score   support

                                   agriculture, forestry, and fisheries       0.35      0.80      0.48        10
                                               animal and dairy science       1.00      0.36      0.53        14
                                                            archaeology       0.70      0.58      0.64        12
                    art (arts, history of arts, performing arts, music)       0.50      0.14      0.22        14
                                                         basic medicine       0.00      0.00      0.00        17
                                                    biological sciences       0.83      0.47      0.60        32
                                                   chemical engineering       0.36      0.73      0.48        11
                                                      chemical sciences       0.00      0.00   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
