In [None]:
import openai
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from matplotlib.ticker import PercentFormatter
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_recall_curve, average_precision_score
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_excel("path_to_child(x)'s_excel_file")

# Pareto Chart for child x 

In [None]:
file_name = "path_to_child(x)'s_excel_file"
df_temp = pd.read_excel(file_name)
data = df_temp['1']

bins = [0, 0.2, 0.4, 0.6, 0.8, 1.0]
group_names = ['0-0.2', '0.2-0.4', '0.4-0.6', '0.6-0.8', '0.8-1.0']
data_grouped = pd.cut(data, bins=bins, labels=group_names, include_lowest=True)

grouped_counts = data_grouped.value_counts()

cumulative_percentage = (grouped_counts.cumsum() / grouped_counts.sum()) * 100

fig, ax1 = plt.subplots(figsize=(10, 6))
ax2 = ax1.twinx()

ax1.bar(grouped_counts.index, grouped_counts, color='tab:blue', alpha=0.6)

ax2.plot(grouped_counts.index, cumulative_percentage, color='tab:red', marker='o', ms=5)

ax1.set_title("Pareto Plot of expert prompting for Child 8", fontsize=20)
ax1.set_xlabel("Probability", fontsize=20)
ax1.set_ylabel("Count", color='tab:blue', fontsize=20)
ax2.set_ylabel("Cumulative Percentage (%)", color='tab:red', fontsize=20)

# Increase the font size for x-axis tick labels
plt.xticks(ha='right')

ax1.tick_params(axis='x', labelsize=18)
ax2.tick_params(axis='x', labelsize=18)

# Increase the font size for y-axis tick labels
ax1.tick_params(axis='y', labelsize=18)
ax2.tick_params(axis='y', labelsize=18)

plt.tight_layout()
plt.show()

# Box plot for child x

In [None]:
from matplotlib.ticker import MultipleLocator
import numpy as np

autism_dict = {}
for i in range(50):
    file_name = "path_to_child(x)'s_excel_file"
    df_temp = pd.read_excel(file_name)
    data = df_temp['1']
    autism_dict[str(i + 1)] = data

df_all = pd.DataFrame(autism_dict)
plt.figure(figsize=(20, 10))

plt.xlabel('Children', fontsize=28)
plt.ylabel('Probability', fontsize=28)

plt.title('Autism Boxplot for 50 Children(zero-shot)', fontsize=30)

plt.xticks(fontsize=25)
plt.yticks(fontsize=25)

plt.tick_params(axis='y', labelsize=20)

sns.boxplot(data=df_all, showfliers=False)

x_major_locator = MultipleLocator(2)
ax = plt.gca()
ax.xaxis.set_major_locator(x_major_locator)

df = pd.read_excel("path_to_ground_truth_excel")
first_50_data = df['自閉症'].head(50)

x_values = np.arange(0, 50)

plt.scatter(x_values, first_50_data, color='blue', alpha=0.7, marker='o')

plt.show()

# Accuracy 

In [None]:
for i in ["zero", "few", "chain", "expert", "expert_gpt4"]:
    df_final_zero = pd.read_excel("final_data.xlsx", sheet_name = i)
    df_processed = df_final_zero.applymap(lambda x: 0 if x < 0.5 else 1)
    
    accuracies = {}
    overkill_rates = {}
    print(f'{i}:')

    for column in df_processed.columns:
        accuracies[column] = accuracy_score(ground_truth[class_mapping[column]], df_processed[column])

    # 格式化輸出
    for column in accuracies.keys():
        print(f"{column[:-2]}: Accuracy = {accuracies[column]:.4f}")
    print("\n")

# Accuracy for different approach

In [None]:
ground_truth_df = pd.read_excel("data.xlsx")

file_names = [
    "exp4/output_zero_prescribeonly.xlsx",
    "exp4/output_few_prescribeonly.xlsx",
    "exp4/output_chain_prescribeonly.xlsx",
    "exp4/output_expert_prescribeonly.xlsx",
    "exp4/output_zero_bgonly.xlsx",
    "exp4/output_few_bgonly.xlsx",
    "exp4/output_chain_bgonly.xlsx",
    "exp4/output_expert_bgonly.xlsx"
]

accuracies = []

for file_name in file_names:
    prob_df = pd.read_excel(file_name)

    binary_predictions = (prob_df['1'] >= 0.5).astype(int)

    accuracy = accuracy_score(binary_predictions, ground_truth_df['自閉症'])

    accuracies.append(accuracy)

accuracies_reordered = [
    accuracies[0], accuracies[4],
    accuracies[1], accuracies[5],
    accuracies[2], accuracies[6],
    accuracies[3], accuracies[7]
]

output_df = pd.DataFrame({'Accuracy': accuracies_reordered})
output_df.index = ['Zero_prescribeonly', 'Zero_bgonly', 'Few_prescribeonly', 'Few_bgonly', 'Chain_prescribeonly', 'Chain_bgonly', 'Expert_prescribeonly', 'Expert_bgonly']

print(output_df)