## Process the model generation results after lora fine-tuning

In [None]:
import os
from tqdm import tqdm

target_reason_folder='llama2-70b-chat-lora/inference'
#If the text has multiple paragraphs, merge them into one paragraph
reasonList=os.listdir(target_reason_folder)
for reason in tqdm(reasonList):
    with open(os.path.join(target_reason_folder, reason), 'r') as file:
        text = file.read()
    if '\n' in text:
        text=text.replace('\n',' ')
    #Remove extra spaces and "*" at the beginning
    text=text.lstrip()
    text=text.replace('* ','')
    #Remove extra spaces
    text=text.replace('  ',' ')
    with open(os.path.join(target_reason_folder, reason), 'w') as file:
            file.write(text)

## This script processes text files to categorize responsibility in traffic accidents. It identifies mentions of vehicles in the first sentence of each file and categorizes the responsibility accordingly. Files with ambiguous or unclear responsibility are logged separately for further review.

In [None]:
import os
from tqdm import tqdm

def changelabel(name):
    folder = 'llama2-70b-chat-lora/generatedResults'
    hard2judgetxt='hard2judge.txt'
    errorData='errorData.txt'
    filename = os.path.join(folder, name)
    with open(filename, 'r') as file:
        text = file.read()

    # Replace patterns for V1 and V2
    patterns_v1 = ["V1", "Vehicle #1", "Vehicle 1","Vehicle1",
                   "Vehicle # 1", "Vehicle One", "Vehicle one", "V-1"]
    patterns_v2 = ["V2", "Vehicle #2", "Vehicle 2","Vehicle2",
                   "Vehicle # 2", "Vehicle Two", "Vehicle two", "V-2"]
    patterns_v3 = ["V3", "Vehicle #3", "Vehicle 3","Vehicle3",
                   "Vehicle # 3", "Vehicle Three", "Vehicle three", "V-3"]

    # Split the text into sentences
    sentences = text.split('. ')

    # Process the first sentence based on specific keywords and vehicle patterns
    if 'because' in sentences[0]:
        sentences0 = sentences[0].split('because')
        if any(pattern in sentences0[0] for pattern in patterns_v1):
            sentences[0]="V1"
        elif any(pattern in sentences0[0] for pattern in patterns_v2):
            sentences[0]="V2"
        elif any(pattern in sentences0[0] for pattern in patterns_v3):
            sentences[0]="V3"
        else:
            with open(errorData, 'a') as f:
                f.write(name+'\n')
        sentences.insert(1,'Because'+sentences0[1])
    elif "due to" in sentences[0]:
        sentences0 = sentences[0].split('due to')
        if any(pattern in sentences0[0] for pattern in patterns_v1):
            sentences[0]="V1"
        elif any(pattern in sentences0[0] for pattern in patterns_v2):
            sentences[0]="V2"
        elif any(pattern in sentences0[0] for pattern in patterns_v3):
            sentences[0]="V3"
        else:
            with open(errorData, 'a') as f:
                f.write(name+'\n')
        sentences.insert(1,'Due to'+sentences0[1]) 
    elif any(pattern in sentences[0] for pattern in patterns_v1) and any(pattern in sentences[0] for pattern in patterns_v2):
        sentences.insert(0,'Neither')
        with open(hard2judgetxt, 'a') as f:
            f.write(name+'\n')
    elif any(pattern in sentences[0] for pattern in patterns_v1):
        sentences[0] = "V1"
    elif any(pattern in sentences[0] for pattern in patterns_v2):
        sentences[0] = "V2"
    elif any(pattern in sentences[0] for pattern in patterns_v3):
        sentences[0] = "V3"
    else:
        with open(errorData, 'a') as f:
            sentences.insert(0,'Neither')
            f.write(name+'\n')
    # Join the sentences back together
    general_processed_text = '. '.join(sentences)
    # Save the processed text to a new file
    save_folder = 'llama2-70b-chat-lora/responsibility'
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)
    filename = os.path.join(save_folder, name)
    with open(filename, 'w') as file:
        file.write(general_processed_text)


if __name__=='__main__':
    folder = 'llama2-70b-chat-lora/generatedResults'
    file_names = os.listdir(folder)
    for file_name in tqdm(file_names):
        changelabel(file_name)

## Responsibility/Type generated by finetuned models is processed into label and reason

In [None]:
import os
from tqdm import tqdm


def generate_groundtruth(after_label_folder):
    for filename in tqdm(os.listdir(after_label_folder)):
        with open(os.path.join(after_label_folder, filename), 'r') as afterlabel_file:
            try:
                afterlabel_content = afterlabel_file.read()
                # Process text fields
                label, reason = afterlabel_content.split('.', maxsplit=1) # Get the content before the period as the label, and the content after the period as the reason
            except ValueError:
                print(filename)
                continue
            reason = reason.lstrip()
        
        label_folder='llama2-70b-chat-lora/label'
        if not os.path.exists(label_folder):
            os.makedirs(label_folder)
        with open(os.path.join(label_folder, filename), 'w') as label_file:
            label_file.write(label)

        reason_folder='llama2-70b-chat-lora/reason'
        if not os.path.exists(reason_folder):
            os.makedirs(reason_folder)
        with open(os.path.join(reason_folder, filename), 'w') as reason_file:
            reason_file.write(reason)


generate_groundtruth("llama2-70b-chat-lora/responsibility")

## This script processes text files containing descriptions of traffic accidents. It classifies each accident into predefined categories based on the first sentence of the description and extracts the reasoning behind the classification. 

In [None]:
import os
from tqdm import tqdm

# Directories for storing types, reasons, and error data
type_folder = 'ModelDataset/llama2-70b-chat-lora/type'
type_save_folder = 'ModelDataset/llama2-70b-chat-lora/type_label'
reason_save_folder = 'ModelDataset/llama2-70b-chat-lora/type_reason'
type_errorData = 'type_errorData.txt'

# Create directories if they don't exist
if not os.path.exists(type_save_folder):
    os.makedirs(type_save_folder)
if not os.path.exists(reason_save_folder):
    os.makedirs(reason_save_folder)

# List of files in the type folder
typelist = os.listdir(type_folder)

# Dictionary mapping type numbers to descriptions
typedict = {
    '1': 'Single-Vehicle Accident',
    '2': 'Backover Collision',
    '3': 'Rear-End Collision',
    '4': 'Frontal Collision',
    '5': 'Front-to-Side Collision',
    '6': 'Non-Motorized Vehicle or Pedestrian Crash',
    '7': 'Other'
}

# Process each file
for type in tqdm(typelist):
    with open(os.path.join(type_folder, type), 'r') as f:
        type_content = f.read()
    type_label = ''
    # Split the content into the first sentence and the rest
    if '.' not in type_content:
        first_sentence=type_content
        other_sentence=type_content
    else:
        if type_content[0].isdigit():
            parts = type_content.split('.', maxsplit=2)
            first_sentence = parts[0] + '.' + parts[1]
            other_sentence = parts[2] if len(parts) > 2 else ''
        else:
            first_sentence,other_sentence=type_content.split('.',maxsplit=1)
    other_sentence=other_sentence.lstrip()

    # Extract the type label from the first sentence
    for key, value in typedict.items():
        if key in first_sentence:
            type_label = key + '.' + value
            break

    # Write unclassified types to the error data file
    if type_label == '':
        with open(type_errorData, 'a') as f:
            f.write(type + '\n')

    # Save the type label and the reasoning
    with open(os.path.join(type_save_folder, type), 'w') as f:
        f.write(type_label)
    with open(os.path.join(reason_save_folder, type), 'w') as f:
        if other_sentence == '':
            print(type)
        f.write(other_sentence)


## Calculate the accuracy

In [None]:
import os
from tqdm import tqdm

type_label_folder='type/GroundTruth/label/test' #Actual labels
predict_label_folder='ModelDataset/llama2-70b-chat-lora/type_label' #Predicted labels

sum=0
accurate=0
try:
    for label in tqdm(os.listdir(predict_label_folder)):
        sum+=1
        with open(os.path.join(type_label_folder,label),'r') as f:
            type_label=f.read()
        with open(os.path.join(predict_label_folder,label),'r') as f:
            predict_label=f.read()
        if type_label==predict_label:
            accurate+=1
except FileNotFoundError:
    print(label)
print(accurate/sum*100)