# ChatGPT API Chinese Semantic Checker

Author: Zexin Xu, Zilu Zhang

In [10]:
import pandas as pd

## Data Preprocessing

For this dataset only. Do not run this for other datasets.

* `tunit_df` includes tunits data
* `sen_df` includes sentences data

In [None]:
df = pd.read_excel('mysen_edit.xlsx')  # read excel file
df.dropna(subset=['sentences', 'correct_final'], inplace=True)
df = df.reset_index(drop=True)  # drop empty row 
df['sentences'] = df['sentences'].str.replace(r'_x000D_\n', '', regex=True).replace(r'\n', '', regex=True) # remove _x000D_\n
df.head()

In [None]:
# Added new correct label
new_truth_df = pd.read_csv("ForGPT4.csv")

tunit_df = pd.DataFrame({
    'sentence': df['sentences'], 
    'ground_truth_label': new_truth_df['ground_truth_label0306'],
    'sen': df['sen']
})
tunit_df.head()

In [None]:
sent = ""
correct = True
sent_arr = []
correct_arr = []
for i, row in df.iterrows():
    sent += row['sentences']
    # correct = correct and row['correct_final']
    if row['sen'] == 0:
        sent += "，"
    else:
        sent += "。"
        sent.replace("_x000D_\n", "")
        sent_arr.append(sent)
        # correct_arr.append(correct)
        # reset
        sent = ""
        # correct = True

# Add new ground truth label df
new_truth_sen_df = pd.read_csv("ForGPT4_sen.csv")

sen_df = pd.DataFrame({
    'sentence': sent_arr, 
    'ground_truth_label': new_truth_sen_df['ground_truth_label_0306']
})
sen_df.head()   

## ChatGPT API Query

Source: 
- https://platform.openai.com/docs/api-reference/authentication
- https://learndataanalysis.org/getting-started-with-openai-gpt-gpt-3-5-model-api-in-python/

In [14]:
import openai
API_KEY = ""

openai.api_key = API_KEY
model_id = "gpt-4"

def ChatGPT_conversation(conversation):
    response = openai.ChatCompletion.create(
        model=model_id,
        messages=conversation
    )
    conversation.append({'role': response.choices[0].message.role, 'content': response.choices[0].message.content})
    return conversation

### Query Logic

Using different question to check whether a sentence is grammatically correct.
1. `语病`: an overall check of the sentence
2. `拼写错误`: a check whether the sentence contains spelling errors
3. `语法错误`: a check whether the sentence contains grammatical errors

In [None]:
%%time

from datetime import datetime

# question_type = ['语病', '字词错误', '语法错误'']
question_type = ['语病']
question_suffix = '，这句话是否有'

chatgpt_tunit_df = tunit_df.copy()

#NOTE This setup is to solve ChatGPT hour limit issue. Generating multiple files and combine them later.
data_df = chatgpt_tunit_df
start_index = 1933
stop_index = 0
conversation = []
try:
    for i, row in data_df.loc[start_index:, :].iterrows():
        for q_type in question_type:
            question = '“' + row['sentence'] + "”" + question_suffix + q_type + "？"
            conversation.append({'role': 'user', 'content': question})
            conversation = ChatGPT_conversation(conversation)
            data_df.loc[i, q_type] = conversation[-1]['content'].strip()
            if row['sen'] == 1:
                conversation = []
        stop_index = i
        if i % 20 == 0:
            print(f"{i}th iteration done...")
except Exception as e:
    print(type(e).__name__, "/", str(e))
    print("stop at index: ", stop_index - 1)
    now = datetime.now()
    print("current time = ", now.strftime("%H:%M:%S"))

#NOTE Prevent overwriting existing file
try:
    data_df.to_csv('chatgpt/GPT4/yubing/tunit_df_result_context7.csv', mode='x', index=False, encoding='utf-8-sig')
except FileExistsError:
    print('File already exists! Change it to another name.')
    


In [None]:
%%time

from datetime import datetime

# question_type = ['语病', '字词错误', '语法错误'']
question_type = ['语病']
question_suffix = '，这句话是否有'

chatgpt_tunit_df = sen_df.copy()

#NOTE This setup is to solve ChatGPT hour limit issue. Generating multiple files and combine them later.
data_df = chatgpt_tunit_df
start_index = 283
stop_index = 0
conversation = []
try:
    for i, row in data_df.loc[start_index:, :].iterrows():
        for q_type in question_type:
            question = '“' + row['sentence'] + "”" + question_suffix + q_type + "？"
            conversation = [{'role': 'user', 'content': question}]
            conversation = ChatGPT_conversation(conversation)
            data_df.loc[i, q_type] = conversation[-1]['content'].strip()
        stop_index = i
        if i % 20 == 0:
            print(f"{i}th iteration done...")
except Exception as e:
    print(type(e).__name__, "/", str(e))
    print("stop at index: ", stop_index - 1)
    now = datetime.now()
    print("current time = ", now.strftime("%H:%M:%S"))

#NOTE Prevent overwriting existing file
try:
    data_df.to_csv('chatgpt/GPT4/yubing/sen_df_result1.csv', mode='x', index=False, encoding='utf-8-sig')
except FileExistsError:
    print('File already exists! Change it to another name.')

### Time/Cost Result (GPT4)
1. tunit dataset
    - Time: 
        - 33min 11s
        - 59min 46s
        - 5min 24s
        - 2h 46min 17s
        - 20min 58.9s
        - 6min 15s
        - 58min 17s
        - 41min 28s
        - = 391min 39.6s = 6h 31m 39.6s
    - Cost: 
        - 11.27$
2. sen dataset
    - Time:
        - 47min 39s
        - 2h 34min 12s
        - = 201min 51s = 3h 31m 51s
    - Cost:
        - 5.78$


### Time/Cost Result
1. tunit dataset
    - 40m56s
    - $0.47

2. sen dataset
    - 19m52s+ 8m31.4s = 27m23.4s
    - $0.17

3. modified tunit dataset
    - 1min 52
    - 2min 52
    - 3min 1.2
    - 10min 10s
    - 7min 36s
    - 1min 30s
    - 2min 35s
    - 4min 12s
    - 2min 54s
    - 8min 21
    - 5min 49
    - 1min 23
    - 7min 53
    - 4min 55s
    - 4min 55s
    - Total: 65min 3s
    - $0.40

## Result processsing


In [17]:
###############################
#NOTE Dataframe Processing
###############################
import pandas as pd

result_tunit_df = pd.read_csv('chatgpt/GPT3.5-turbo/yubing/tunit_df_result.csv', encoding='utf-8-sig')
result_tunit_context_df = pd.read_csv('chatgpt/GPT3.5-turbo/yubing/tunit_df_result_context.csv', encoding='utf-8-sig')
result_sen_df = pd.read_csv('chatgpt/GPT3.5-turbo/yubing/sen_df_result.csv', encoding='utf-8-sig')


###############################
#NOTE 语病 result df processing
###############################

def modify_yubing_result(df):
    for i, row in df.iterrows():
        if row['语病'] == "1":
            df.loc[i, 'yubing_label'] = 1
        elif row['语病'] == "0":
            df.loc[i, 'yubing_label'] = 0
        else:
            if ('没有语病' in row['语病'] or
                '没有明显语病' in row['语病'] or
                '无语病' in row['语病'] or
                '不是' in row['语病'] or
                '否' in row['语病'] or
                '没有' in row['语病'] or
                '没有语法错误' in row['语病'] or
                '没有问题' in row['语病'] or
                '语法正确' in row['语病'] or
                '语法上是正确的' in row['语病'] or
                '语法上没有错误' in row['语病'] or
                '语法没有错误' in row['语病'] or
                '没有语法问题' in row['语病'] or
                '没有明显错误' in row['语病'] or
                '不算是语病' in row['语病'] or 
                '语法上可以说是正确的' in row['语病'] or
                '语法上没有明显错误' in row['语病'] or 
                '没有明显的' in row['语病'] or 
                '没有显著的' in row['语病'] or
                '没有错误' in row['语病'] or
                '完全正确' in row['语病'] or
                '基本正确' in row['语病'] or 
                '是正确的语法' in row['语病'] or
                '标准的英语表达' in row['语病'] ):
                df.loc[i, 'yubing_label'] = 1
            else:
                df.loc[i, 'yubing_label'] = 0

modify_yubing_result(result_tunit_df)
modify_yubing_result(result_tunit_context_df)
modify_yubing_result(result_sen_df)

result_tunit_df['语病_context'] = result_tunit_context_df['语病']
result_tunit_df['yubing_label_context'] = result_tunit_context_df['yubing_label']
result_tunit_df.head()

result_tunit_df.to_csv('chatgpt/GPT3.5-turbo/tunit_df_result_mod.csv', index=False, encoding='utf-8-sig')
result_sen_df.to_csv('chatgpt/GPT3.5-turbo/sen_df_result_mod.csv', index=False, encoding='utf-8-sig')

In [8]:
###############################
#NOTE 语病 result df processing GPT4
###############################
import pandas as pd 

result_tunit_df = pd.read_csv('chatgpt/GPT4/yubing/tunit_df_result_context.csv', encoding='utf-8-sig')
result_sen_df = pd.read_csv('chatgpt/GPT4/yubing/sen_df_result.csv', encoding='utf-8-sig')

def modify_yufa_result(df):
    for i, row in df.iterrows():
        if ('没有语病' in row['语病'] or
            '没有语病错误' in row['语病'] or
            '不算有语病' in row['语病'] or
            '没有问题' in row['语病'] or
            '语病正确' in row['语病'] or
            '语病上是正确的' in row['语病'] or
            '语病上没有错误' in row['语病'] or
            '语病没有错误' in row['语病'] or
            '没有语病问题' in row['语病'] or
            '没有明显错误' in row['语病'] or
            '不算是语病' in row['语病'] or 
            '语法上可以说是正确的' in row['语病'] or
            '语法上没有明显错误' in row['语病'] or 
            '没有明显的' in row['语病'] or 
            '没有显著的' in row['语病'] or
            '没有错误' in row['语病'] or
            '完全正确' in row['语病'] or
            '基本正确' in row['语病'] or    
            '基本上正确' in row['语病'] or 
            '是正确的语法' in row['语病'] or
            '标准的英语表达' in row['语病'] or 
            '语法是正确的' in row['语病'] or
            '语法正确' in row['语病'] or
            '没有语法错误' in row['语病'] or
            '没有。' in row['语病']):
            df.loc[i, 'yubing_label'] = 1
        else:
            df.loc[i, 'yubing_label'] = 0

modify_yufa_result(result_tunit_df)
modify_yufa_result(result_sen_df)
result_tunit_df.to_csv('chatgpt/GPT4/tunit_df_result_mod.csv', index=False, encoding='utf-8-sig')
result_sen_df.to_csv('chatgpt/GPT4/sen_df_result_mod.csv', index=False, encoding='utf-8-sig')

In [93]:
###############################
#NOTE 语法 result df processing
###############################
result_tunit_df = pd.read_csv('chatgpt/yufa/tunit_df_result.csv', encoding='utf-8-sig')
result_sen_df = pd.read_csv('chatgpt/yufa/sen_df_result.csv', encoding='utf-8-sig')

def modify_yufa_result(df):
    for i, row in df.iterrows():
        if row['语法错误'] == "1":
            df.loc[i, 'yufa_label'] = 1
        elif row['语法错误'] == "0":
            df.loc[i, 'yufa_label'] = 0
        else:
            if ('没有语法错误' in row['语法错误'] or
                '没有问题' in row['语法错误'] or
                '语法正确' in row['语法错误'] or
                '语法上是正确的' in row['语法错误'] or
                '语法上没有错误' in row['语法错误'] or
                '语法没有错误' in row['语法错误'] or
                '没有语法问题' in row['语法错误'] or
                '没有明显错误' in row['语法错误'] or
                '不算是语法错误' in row['语法错误'] or 
                '语法上可以说是正确的' in row['语法错误'] or
                '语法上没有明显错误' in row['语法错误'] or 
                '没有明显的' in row['语法错误'] or 
                '没有显著的' in row['语法错误'] or
                '没有错误' in row['语法错误'] or
                '完全正确' in row['语法错误'] or
                '基本正确' in row['语法错误'] or    
                '基本上正确' in row['语法错误'] or 
                '是正确的语法' in row['语法错误'] or
                '标准的英语表达' in row['语法错误'] or 
                '语法是正确的' in row['语法错误'] or
                '语法正确' in row['语法错误']):
                df.loc[i, 'yufa_label'] = 1
            else:
                df.loc[i, 'yufa_label'] = 0

modify_yufa_result(result_tunit_df)
modify_yufa_result(result_sen_df)
result_tunit_df.to_csv('chatgpt/yufa/tunit_df_result_mod.csv', index=False, encoding='utf-8-sig')
result_sen_df.to_csv('chatgpt/yufa/sen_df_result_mod.csv', index=False, encoding='utf-8-sig')

In [125]:
###############################
#NOTE 字词 result df processing
###############################
result_tunit_df = pd.read_csv('chatgpt/zici/tunit_df_result.csv', encoding='utf-8-sig')
result_sen_df = pd.read_csv('chatgpt/zici/sen_df_result.csv', encoding='utf-8-sig')

def modify_zici_result(df):
    for i, row in df.iterrows():
        if row['字词错误'] == "1":
            df.loc[i, 'zici_label'] = 1
        elif row['字词错误'] == "0":
            df.loc[i, 'zici_label'] = 0
        else:
            if ('没有语法错误' in row['字词错误'] or
                '没有问题' in row['字词错误'] or
                '语法正确' in row['字词错误'] or
                '语法上是正确的' in row['字词错误'] or
                '语法上没有错误' in row['字词错误'] or
                '语法没有错误' in row['字词错误'] or
                '没有语法问题' in row['字词错误'] or
                '没有明显错误' in row['字词错误'] or
                '不算是语法错误' in row['字词错误'] or 
                '语法上可以说是正确的' in row['字词错误'] or
                '语法上没有明显错误' in row['字词错误'] or 
                '没有明显的' in row['字词错误'] or 
                '没有显著的' in row['字词错误'] or
                '没有错误' in row['字词错误'] or
                '完全正确' in row['字词错误'] or
                '基本正确' in row['字词错误'] or    
                '基本上正确' in row['字词错误'] or 
                '是正确的语法' in row['字词错误'] or
                '标准的英语表达' in row['字词错误'] or 
                '正确的中文表达方式' in row['字词错误'] or
                '语法是正确的' in row['字词错误'] or
                '语法正确' in row['字词错误'] or
                '一个字词错误' in row['字词错误'] or
                '没有拼写错误' in row['字词错误']):
                #NOTE Special case: 没有拼写错误，但是有语法错误
                df.loc[i, 'zici_label'] = 1
            else:
                df.loc[i, 'zici_label'] = 0

modify_zici_result(result_tunit_df)
modify_zici_result(result_sen_df)
result_tunit_df.to_csv('chatgpt/zici/tunit_df_result_mod.csv', index=False, encoding='utf-8-sig')
result_sen_df.to_csv('chatgpt/zici/sen_df_result_mod.csv', index=False, encoding='utf-8-sig')

In [6]:
##########################
#NOTE Merge everything!!!
##########################
yubing_tunit_df = pd.read_csv('chatgpt/yubing/tunit_df_result_mod.csv', encoding='utf-8-sig')
yufa_tunit_df = pd.read_csv('chatgpt/yufa/tunit_df_result_mod.csv', encoding='utf-8-sig')
zici_tunit_df = pd.read_csv('chatgpt/zici/tunit_df_result_mod.csv', encoding='utf-8-sig')

yubing_sen_df = pd.read_csv('chatgpt/yubing/sen_df_result_mod.csv', encoding='utf-8-sig')
yufa_sen_df = pd.read_csv('chatgpt/yufa/sen_df_result_mod.csv', encoding='utf-8-sig')
zici_sen_df = pd.read_csv('chatgpt/zici/sen_df_result_mod.csv', encoding='utf-8-sig')

final_tunit_df = pd.DataFrame({
    'sentence': yubing_tunit_df['sentence'],
    'ground_truth_label': tunit_df['ground_truth_label'],
    'yubing_label': yubing_tunit_df['yubing_label'],
    'yubing_content': yubing_tunit_df['语病'],
    'yufa_label': yufa_tunit_df['yufa_label'],
    'yufa_content': yufa_tunit_df['语法错误'],
    'zici_label': zici_tunit_df['zici_label'],
    'zici_content': zici_tunit_df['字词错误'],
})

final_sen_df = pd.DataFrame({
    'sentence': yubing_sen_df['sentence'],
    'ground_truth_label': sen_df['ground_truth_label'],
    'yubing_label': yubing_sen_df['yubing_label'],
    'yubing_content': yubing_sen_df['语病'],
    'yufa_label': yufa_sen_df['yufa_label'],
    'yufa_content': yufa_sen_df['语法错误'],
    'zici_label': zici_sen_df['zici_label'],
    'zici_content': zici_sen_df['字词错误']
})

final_tunit_df.to_csv('chatgpt/final_tunit_df.csv', index=True, encoding='utf-8-sig')
final_sen_df.to_csv('chatgpt/final_sen_df.csv', index=True, encoding='utf-8-sig')

In [None]:
final_tunit_df.head()

## Confustion Matrix

In [6]:
def print_evaluation(golds, predictions, beta=1):
    """
    Prints evaluation statistics comparing golds and predictions, each of which is a sequence of 0/1 labels.
    Prints accuracy as well as precision/recall/F1 of the positive class, which can sometimes be informative if either
    the golds or predictions are highly biased.

    :param golds: gold labels
    :param predictions: pred labels
    :return:
    """
    num_correct = 0
    num_pos_correct = 0
    num_pred = 0
    num_gold = 0
    num_total = 0
    if len(golds) != len(predictions):
        raise Exception("Mismatched gold/pred lengths: %i / %i" % (len(golds), len(predictions)))
    for idx in range(0, len(golds)):
        gold = golds[idx]
        prediction = predictions[idx]
        if prediction == gold:
            num_correct += 1
        if prediction == 1:
            num_pred += 1
        if gold == 1:
            num_gold += 1
        if prediction == 1 and gold == 1:
            num_pos_correct += 1
        num_total += 1
    acc = float(num_correct) / num_total
    output_str = "Accuracy: %i / %i = %f" % (num_correct, num_total, acc)
    prec = float(num_pos_correct) / num_pred if num_pred > 0 else 0.0
    rec = float(num_pos_correct) / num_gold if num_gold > 0 else 0.0
    f1 = (1 + beta ** 2) * prec * rec / (beta**2 * prec + rec) if prec > 0 and rec > 0 else 0.0
    output_str += ";\nPrecision (fraction of predicted positives that are correct): %i / %i = %f" % (num_pos_correct, num_pred, prec)
    output_str += ";\nRecall (fraction of true positives predicted correctly): %i / %i = %f" % (num_pos_correct, num_gold, rec)
    output_str += ";\nF1 (harmonic mean of precision and recall): %f;\n" % f1
    return output_str

def label_compare(label1, label2, golds):
    if len(label1) != len(label2):
        raise Exception("Mismatched gold/pred lengths: %i / %i" % (len(label1), len(label2)))
    total_count = 0
    tf_count = 0
    ft_count = 0
    context_improve_count = 0
    unmatch_count = 0
    correct_count = 0
    for idx in range(0, len(label1)):
        if label1[idx] != label2[idx]:
            total_count += 1
            if label1[idx] == 1:
                tf_count += 1
            elif label1[idx] == 0:
                ft_count += 1
        if label1[idx] != label2[idx] and label2[idx] == golds[idx]:
            correct_count += 1 
        if label1[idx] != golds[idx]:
            unmatch_count += 1
            if label2[idx] == golds[idx]:
                context_improve_count += 1
    output_str = "Number of mismatched labels: %i / %i = %f\n" % (total_count, len(label1), float(total_count) / len(label1))
    output_str += "True -> False: %i / %i = %f\n" % (tf_count, total_count, float(tf_count) / total_count)
    output_str += "False -> True: %i / %i = %f\n" % (ft_count, total_count, float(ft_count) / total_count)
    output_str += "Correct with context: %i / %i = %f\n" % (context_improve_count, unmatch_count, float(context_improve_count) / unmatch_count)
    output_str += "Correct count after change: %i\n" % (correct_count)
    return output_str

In [None]:
import pandas as pd
result_tunit_df = pd.read_csv('chatgpt/GPT4/tunit_df_result_mod.csv', encoding='utf-8-sig')
result_sen_df = pd.read_csv('chatgpt/GPT4/sen_df_result_mod.csv', encoding='utf-8-sig')

print("## GPT4语病比较 ##")
print("------ Tunit Evaluation ------")
print(print_evaluation(result_tunit_df['ground_truth_label'], result_tunit_df['yubing_label']))

print("------ Sentence Evaluation ------")
print(print_evaluation(result_sen_df['ground_truth_label'], result_sen_df['yubing_label']))

In [None]:
import pandas as pd
result_tunit_df = pd.read_csv('chatgpt/GPT3.5-turbo/tunit_df_result_mod.csv', encoding='utf-8-sig')
result_sen_df = pd.read_csv('chatgpt/GPT3.5-turbo/sen_df_result_mod.csv', encoding='utf-8-sig')

print("## 语病比较 ##")
print("------ Tunit Evaluation ------")
print("[No Context]")
print(print_evaluation(result_tunit_df['ground_truth_label'], result_tunit_df['yubing_label']))
print("[Context]")
print(print_evaluation(result_tunit_df['ground_truth_label'], result_tunit_df['yubing_label_context']))
print("[Comparison]")
print(label_compare(result_tunit_df['yubing_label'], result_tunit_df['yubing_label_context'], result_tunit_df['ground_truth_label']))

print("------ Sentence Evaluation ------")
print(print_evaluation(result_sen_df['ground_truth_label'], result_sen_df['yubing_label']))

In [None]:
import pandas as pd

result_tunit_df = pd.read_csv('chatgpt/final_tunit_df.csv', encoding='utf-8-sig')
result_sen_df = pd.read_csv('chatgpt/final_sen_df.csv', encoding='utf-8-sig')
print("## 语病比较 ##")
print("------ Tunit Evaluation ------")
print(print_evaluation(result_tunit_df['ground_truth_label'], result_tunit_df['yubing_label']))
print("------ Sentence Evaluation ------")
print(print_evaluation(result_sen_df['ground_truth_label'], result_sen_df['yubing_label']))

print("## 语法比较 ##")
print("------ Tunit Evaluation ------")
print(print_evaluation(result_tunit_df['ground_truth_label'], result_tunit_df['yufa_label']))
print("------ Sentence Evaluation ------")
print(print_evaluation(result_sen_df['ground_truth_label'], result_sen_df['yufa_label']))

print("## 字词比较 ##")
print("------ Tunit Evaluation ------")
print(print_evaluation(result_tunit_df['ground_truth_label'], result_tunit_df['zici_label']))
print("------ Sentence Evaluation ------")
print(print_evaluation(result_sen_df['ground_truth_label'], result_sen_df['zici_label']))

In [None]:
def combine_evaluation(df):
    for i, row in result_tunit_df.iterrows():
        if row['yufa_label'] == 1 and row['zici_label'] == 1:
            df.loc[i, 'combine_label'] = 1
        else:
            df.loc[i, 'combine_label'] = 0

combine_evaluation(result_tunit_df)
combine_evaluation(result_sen_df)

print(print_evaluation(result_tunit_df['yubing_label'], result_tunit_df['combine_label']))
print(print_evaluation(result_sen_df['yubing_label'], result_sen_df['combine_label']))
print(print_evaluation(result_tunit_df['ground_truth_label'], result_tunit_df['combine_label']))
print(print_evaluation(result_sen_df['ground_truth_label'], result_sen_df['combine_label']))


In [None]:
index = range(11) 
file_name_pre = 'chatgpt/yubing/tunit_df_result'

def overwrite(file_name_pre, index):
    # index [0, 1, 2 ... 10]
    for i in index:
        file_name = file_name_pre + str(i) + '.csv'
        if i == 0:
            df = pd.read_csv(file_name, encoding='utf-8-sig')
        else:
            df_new = pd.read_csv(file_name, encoding='utf-8-sig')
            for i, row in df_new.iterrows():
                if df_new[i]['result'] is not None:
                    df[i]['result'] = df_new[i]['result']
    df.to_csv(file_name, index=False, encoding='utf-8-sig')

# hard append...?
overwrite(file_name_pre, index)