In [59]:
import pandas as pd
import requests
import json
import time

API_KEY = "sk-pldpwpjtlbbtuxlglgilhntboxjwvvcmpyyeluishxqnfxvk" # Use your own API key, dont use mine.
API_URL = "https://api.siliconflow.cn/v1/chat/completions" # Use your own server provider, dont use mine.
MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B" # Use your own selected model.

HEADERS = {
    "Authorization": f"Bearer {API_KEY}",
    "Content-Type": "application/json"
}

def call_siliconflow_api(prompt, max_tokens=256, temperature=0.7):
    """
    Calling the Silicon Mobility API to generate text
    prompt: Input prompt word content
    max_tokens: Maximum number of tokens returned
    temperature: The randomness of generated text, to be controlled at around 0.7 by the recommendation from documents.
    """
    payload = {
        "model": MODEL_NAME,
        "messages": [
            {"role": "system", "content": "You are a professional event summary generator. Please detect, describe the event in the third person and neutrally based on the following information."},
            {"role": "user", "content": prompt}
        ],
        "max_tokens": max_tokens,
        "temperature": temperature
    }
    try:
        response = requests.post(API_URL, headers=HEADERS, json=payload, timeout=60)
        if response.status_code == 200:
            result = response.json()
            # return result
            # To strip off only the respond texts.
            generated_text = result["choices"][0]["message"]["content"].strip()
            return generated_text
        else:
            print(f"HTTP Error: {response.status_code} - {response.text}")
            return ""
    except Exception as e:
        print(f"API request exception：{e}")
        return ""

def generate_objective_summary(record):
    """
    Generate a third-person objective description based on an event record.
    Used fields:
      - communication_breakdown
      - report_1_narrative
      - report_2_narrative
      - synopsis
    """
    comm_break = record.get("Person 1.8_Communication Breakdown", "")
    report1 = record.get("Report 1_Narrative", "")
    report2 = record.get("Report 2_Narrative", "")
    synopsis = record.get("Report 1.2_Synopsis", "")
    
    # If report2 is None, then ignore this section
    if report2 == None:
        report2_text = "None"
    else:
        report2_text = report2
    
    prompt = (
        "The following is a record of the incident, including the conflict between the two parties, character descriptions, and the development of the incident."
        "Based on the following information, please summarize the cause, process and result of the incident in the third person perspective, ensuring that the summary is neutral and accurate and avoids extremely high evaluation. \n\n"
        f"The conflicting parties：{comm_break}\n"
        f"Character 1 Description：{report1}\n"
        f"Character 2 Description: {report2_text}\n"
        f"synopsis：{synopsis}\n\n"
        "Please generate a concise and clear summary of the incident. If a solution is proposed in context, include that in the summary as well."
    )
    summary = call_siliconflow_api(prompt, max_tokens=3000, temperature=0.7)
    return summary

def main():
    # Read a CSV file from given location
    data = pd.read_csv("../docs/attandent_data.csv")
    
    required_columns = ["Person 1.8_Communication Breakdown", "Report 1_Narrative", "Report 2_Narrative", "Report 1.2_Synopsis"] 

    df_selected = df[required_columns]
    objective_summaries = []

    print("Generating activity summary...")
    for idx, row in df_selected.iterrows():
        summary_text = generate_objective_summary(row)
        objective_summaries.append(summary_text)
        # Control the call rate to avoid triggering API current limiting
        time.sleep(1)
        print(idx + 1, summary_text)
    
    # Add the generated summary to the data, and name the new column objective_summary
    print(objective_summaries)
    data["objective_summary"] = objective_summaries
    output_file = "../docs/event_objective_summary.csv"
    data.to_csv(output_file, index=False)
    print(f"All events are summarized and generated, and the results are saved to {output_file}")

if __name__ == "__main__":
    main()


Generating activity summary...
1 **Incident Summary:**

A flight attendant discovered an open biohazard bag containing soiled linens, wipes, and a dustpan behind the last row of economy seats after the plane had pushed back from the gate. The attendant, wearing gloves, investigated the contents and reported the issue to the captain. The crew decided to seal the bag and continue the flight, though they expressed discomfort with having the biohazard on board. The situation could have been prevented with more time for thorough cleaning and pre-boarding checks.
2 The incident arose from a security breach involving a gate-checked bag containing spare lithium-ion batteries. The flight attendant cited unclear company policies on lithium-ion batteries and excessive enforcement of carry-on policies by ground personnel as contributing factors. The flight attendant suggested that such bags should not be gate-checked, underscoring the importance of improved guidelines and staff training to prevent

In [81]:
API_KEY = "sk-pldpwpjtlbbtuxlglgilhntboxjwvvcmpyyeluishxqnfxvk" # Use your own API key, dont use mine.
API_URL = "https://api.siliconflow.cn/v1/chat/completions" # Use your own server provider, dont use mine.
MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B" # Use your own selected model.

HEADERS = {
    "Authorization": f"Bearer {API_KEY}",
    "Content-Type": "application/json"
}

def call_siliconflow_api(prompt, max_tokens=5000, temperature=0.7):
    """
    Calling the Silicon Mobility API to generate text
    prompt: Input prompt word content
    max_tokens: Maximum number of tokens returned
    temperature: The randomness of generated text, to be controlled at around 0.7 by the recommendation from documents.
    """
    payload = {
        "model": MODEL_NAME,
        "messages": [
            {"role": "system", "content": "You are a professional text analysis expert who is good at extracting labels from large amounts of descriptions."},
            {"role": "user", "content": prompt}
        ],
        "max_tokens": max_tokens,
        "temperature": temperature
    }
    try:
        response = requests.post(API_URL, headers=HEADERS, json=payload, timeout=120)
        if response.status_code == 200:
            result = response.json()
            generated_text = result["choices"][0]["message"]["content"].strip()
            return generated_text
        else:
            print(f"HTTP Error: {response.status_code} - {response.text}")
            return ""
    except Exception as e:
        print(f"API request exception：{e}")
        return ""

def generate_candidate_labels_in_batches(event_texts, batch_size=20):
    """
    Process the records in batches, generate a portion of candidate tags in each batch
    Finally merge the results of all batches to remove duplicates.
    """
    all_candidate_labels = []
    total = len(event_texts)
    num_batches = (total + batch_size - 1) // batch_size
    
    for i in range(num_batches):
        end_idx = min((i+1)*batch_size, total)
        batch = event_texts[i*batch_size:end_idx]
        batch_text = "\n\n".join(batch)
        prompt = (
            "Provided below are several third-person descriptions of aviation incidents. Please analyze these incidents and summarize a series of representative labels that can summarize the characteristics of the incidents.\n"
            "For example: emergencies, delays, accidents, safety hazards, abnormal weather, relationship between roles etc.\n"
            "Please return a comma-separated list of tags (be careful not to give any extra text).\n\n"
            f"contents: \n{batch_text}\n"
        )
        print(f"Batch{i+1}/{num_batches}...")
        candidate_text = call_siliconflow_api(prompt, max_tokens=5000, temperature=0.7)
        # Separate by commas and remove spaces
        batch_labels = [label.strip() for label in candidate_text.split(",") if label.strip()]
        all_candidate_labels.extend(batch_labels)
        # Control the call rate to avoid triggering API current limiting
        time.sleep(1)
    
    # Sort and remove duplicates
    unique_labels = sorted(set(all_candidate_labels))
    return unique_labels

def main():
    # Read a CSV file from given location
    data = pd.read_csv("../docs/event_objective_summary.csv")

    # Read the event description of each record into a list
    event_texts = data["objective_summary"].tolist()
    print("Generate candidate labels in batches...")
    candidate_labels = generate_candidate_labels_in_batches(event_texts, batch_size=20)
    print("Final candidate labels:", candidate_labels)

if __name__ == "__main__":
    main()


Generate candidate labels in batches...
Batch1/22...
Batch2/22...
Batch3/22...
Batch4/22...
Batch5/22...
Batch6/22...
Batch7/22...
Batch8/22...
Batch9/22...
Batch10/22...
Batch11/22...
Batch12/22...
Batch13/22...
Batch14/22...
Batch15/22...
Batch16/22...
Batch17/22...
Batch18/22...
Batch19/22...
Batch20/22...
Batch21/22...
Batch22/22...


In [103]:
candidate_labels = ['**Final Labels:**\n\nbiohazard', '- physical injury', 'Alcohol violations', 'Attendant communication', 'Attendant efforts', 'Attendant reaction', 'Attendant vs FO conflict', 'COVID-19', 'CRM', 'CRM incident', 'Captain in command', 'Captain-FA interaction', 'Carry-on', 'Child safety', 'Communication barriers', 'Communication misunderstanding', 'Communication protocols', 'Compliance issues', 'Crew Conflict', 'Crew Coordination', 'Crew Coordination; Safety Issue Reporting', 'Crew Coordination; Scheduling Conflict', 'Crew Handling', 'Crew Misconduct', 'Crew Professionalism', 'Crew Response', 'Crew Response; Crew Communication', 'Crew Response; Fuel Odor Detection', 'Crew Response; Scheduling Conflict', 'Crew conflict', 'Cross-checking', 'Deadheading pilots', 'Ejection', 'FFDO communication', 'FO behavior', 'Face mask policy violation', 'Family passengers', 'Flight Attendant Behavior', 'Flight Attendant Error', 'Flight Attendant Handling', 'Fuel Odor Detection', 'Ground staff intervention', 'Hard Landing', 'Hazmat Response', 'Hoverboard', 'IT issues', 'Internal policy', 'Internal procedures', 'Long duty hours', 'MEL issue', 'Manual discrepancies', 'Mask issuance', 'Mask misuse', 'Mask policy enforcement', 'Mask policy misunderstandings', 'Masks', 'Medical exception', 'Medical exemptions', 'Movement', 'No resolution', 'No-fly list', 'Non-compliance', 'PIC', 'Passenger Behavior', 'Passenger Reactions', 'Passenger accommodations', 'Passenger behavior', 'Passenger compliance', 'Passenger confusion', 'Passenger denial', 'Passenger dissatisfaction', 'Passenger disturbance', 'Passenger non-compliance', 'Passenger resistance', 'Passenger warning', 'Physical consequence', 'Physical threats', 'Piper safety.', 'Police Intervention; Device Misuse', 'Policy adherence', 'Policy clarification', 'Policy enforcement', 'Professional Misconduct; E-Cigarette Use', 'Professional Misconduct; Medical Emergency', 'Proper mask', 'Resolution', 'Resolution; Banned Device Use', 'Resolution; Crew Conflict', 'Resolution; E-Cigarette Use', 'Resolution; Flight Attendant Professionalism', 'Resolution; Maintenance Delays', 'Resolution; Medical Emergency', 'Resolution; Safety Issue Reporting', 'Resolution; Safety Protocol Misunderstanding', 'Resolution; Scheduling Conflict', 'Safety Concerns', 'Safety Concerns; Medical Condition', 'Safety concern', 'Safety concerns', 'Safety hazard', 'Scheduling Error', 'Seat assignment', 'Seat reassignment', 'Security', 'Smoking Ban', 'TCAS Resolution Advisory', 'Technical issues', 'abnormal weather', 'access control', 'administrative error', 'aircraft performance', 'alcohol effects on compliance', 'approach safety', 'approved mask usage', 'assistance role', 'attendant error\n- passenger miscount', "attendant's reminders", 'attendants', 'aviation incident', 'aviation safety', 'aviation safety incident', 'aviation safety protocols', 'aviational protocols', 'aviational training', 'boarding', 'boarding procedures', 'boarding process disruption', 'boarding_ chaos', 'cabin cleaning', 'cabin door closure', 'cabin environment concerns', 'cabin environment management', 'cabin reseating', 'cabin safety', 'cabin security', "captain's failure to adhere", 'cargo hold placement', 'chaos', 'checked baggage', 'checklist omission', 'civil aviation safety', 'cleaning protocols', 'cleaning time', 'communication', 'communication breakdown', 'communication delay', 'communication discrepancies', 'communication failure', 'communication gap', 'communication issue', 'communication issues', 'communication lapse\n- boarding process', 'communication protocols', 'communication tools', 'communique_breakdown', 'company policies', 'compliance issues', 'conflict', 'conflict between roles\n- service cart dislocation', 'conflict between roles\n- uninvolved passenger', 'conflict resolution', 'container policy', 'crew Training', 'crew behavior', 'crew communication', 'crew conflict', 'crew conflict/role_discrepancy', 'crew coordination', 'crew coordination.', 'crew deployment', 'crew health', 'crew interaction', 'crew intervention', 'crew judgment', 'crew oversight', 'crew positioning', 'crew procedures', 'crew resource management', 'crew training', 'crew workload', 'customer compliance', 'customer concern', 'customer deception', 'customer interaction', 'customer service', 'deadhead crew', 'debriefing delay', 'delay incident', 'delayed_communication', 'delays', 'deplaning safety', 'device confiscation', 'dispatch inefficiency', 'dispatch team miscommunication', 'disruption', 'disruptive behavior', 'disruptive passenger', 'diverted flights\n- procedural failure', 'diverting flight\n- crew fatigue', 'door closure', 'door operations', 'drug use', 'drum set lithium batteries', 'duty performance', 'e-cigarette', 'e-cigarettes', 'early V1 call', 'early boarding', 'electronic cigarettes', 'electronic device use', 'emergence', 'emergency', 'emergency communication', 'emergency deployment', 'emergency landing', 'emergency management', 'emergency procedures', 'emergency protocols', 'emergency response', 'enforcement', 'engine shutdown', 'ensuring no duplicates and covering all the necessary aspects of each incident.', 'equipment malfunction', 'face mask policy', 'face mask policy enforcement', 'face mask policy enforcement inconsistency', 'face mask policy enforcement inconsistency.', 'face mask policy violation', 'face_mask_policy', 'failure to issue critical command', 'fair treatment', 'fatigue', 'fire extinguisher', 'flight attendant interventions', 'flight attendant training', 'flight attendants', 'flight safety', 'frustration', 'fume/smoke incident', 'functional test', 'gate operations', 'gate-checked bag', 'generator failure', 'ground hold\n- boarding process', 'ground operations', 'ground personnel', 'ground personnel intervention', 'ground-personnel conflicts', 'grounding', 'group_behavior', 'hard landing', 'headset use', 'health communication', 'health communication breakdown', 'health communication breakdowns', 'health communication errors', 'health guidelines', 'health guidelines adherence', 'health hazards', 'health issues', 'health policy enforcement', 'health policy enforcement challenges', 'health policy enforcement problems', 'health scares', 'health training', 'hostile behavior', 'hostile work environment\n- ground hold', 'hostile work environment\n- injury to passenger', 'ill passengers', 'improper cleaning', 'improper communication', 'improper communication\n- turbulence', 'improper door securing', 'improper enforcement', 'improper exit', 'improper handling', 'improper procedure', 'improper protocol', 'improper resource allocation', 'improper securing', 'improper service', 'improper_communication_during_boarding', 'improper_communication_leading_to_safety_hazards', 'improper_crew_concerns', 'improper_door_arthing', 'improper_handling_of_maintenance_issues', 'improper_protocol_adherence', 'improper_safety_protocols', 'in-flight policies', 'incident investigation', 'incident management', 'incident/incident', 'incomplete solutions', 'internal discipline issues', 'internal process inefficiency', 'intervention attempts', 'lack of coordination', 'lack of supervision', 'lavatory use', 'law enforcement', 'leadership conflict', 'lithium batteries', 'lithium-ion batteries', 'logistics', 'maintenance', 'maintenance error', 'maintenance issues', 'maintenance problem', 'manifests discrepancies', 'manifests issues', 'mask compliance', 'mask enforcement challenges', 'mask policy adherence', 'mask policy adherence issues', 'mask policy communication', 'mask policy communication challenges', 'mask policy communication errors', 'mask policy enforcement', 'mask policy enforcement failure', 'mask policy non-compliance', 'mask policy retraining needs', 'mask policy violations', 'mask requirements', 'mask violations', 'mask_incompliance', 'mask_non_compliance', 'mask_noncompliance', 'mask_noncompliance_and_passenger_behavior', 'mask_noncompliance_incident', 'mask_policies', 'mask_policy_enforcement', 'mask_policy_violation', 'mask_policy_violation_and_passenger_behavior', 'mask_policy_violation_incident', 'mask_rejected', 'mask_rejection', 'mask_required', 'mask_usage', 'mask_violation', 'medical assistance', 'medical conditions', 'medical emergency', 'medical equipment', 'medical exemption', 'medical exemption denial', 'medical incident', 'medical incidents', 'medical response', 'memory/compliance failure', 'miscommunication', 'near-miss', 'non-compliance', 'non-compliance resolution', 'non-compliance with policies', 'odor', 'off-duty assistance', 'operated', 'operation delay', 'operational challenges', 'operational communication', 'operational delay', "operational delay\n- captain's instructions", 'operational efficiency', 'operational oversight', 'operations', 'outlet usage', 'overhead bin', 'oversight', 'overweight landing', 'partial compliance', 'passenger behavior', 'passenger behavior management', 'passenger compliance', 'passenger compliance - refuse', 'passenger concern\n- special needs documentation', 'passenger concern\n- unaccounted passenger', 'passenger count', 'passenger deplaning', 'passenger discomfort', 'passenger disturbance', 'passenger health', 'passenger manifests', 'passenger mask compliance', 'passenger mask issues', 'passenger mask policy', 'passenger mask policy issues', 'passenger mask policy violation', 'passenger medical exemption denial', 'passenger non-compliance', 'passenger non-compliance despite exemption', 'passenger non-compliance with policy', 'passenger non-compliance with policy.', 'passenger resistance', 'passenger resistance management', 'passenger safety', 'passenger safety\n- attendant error', 'passenger safety.', 'passenger screening', 'passenger_behavior', 'passenger_resistance', 'penalties', 'performance issues', 'physical injury\n- passenger left aircraft', 'pilot training', 'policy enforcement', 'policy violations', 'poor communication', 'post-landing procedures', 'procedural error', 'procedural violations', 'procedure mistake', 'procedure violations', 'process failure', 'process improvement', 'rapid turnaround', 'regulatory issues', 'repeated non-compliance', 'repeated requests', 'repeated violations', 'reporting issues', 'resolution', 'rush', 'safe deplaning', 'safety', 'safety awareness', 'safety breach', 'safety breakdown', 'safety breakdowns', 'safety communication', 'safety concerns', 'safety demonstration', 'safety deviations', 'safety enforcement', 'safety equipment', 'safety hazard', 'safety hazard\n- emergency situation', 'safety hazard mask-wearing compliance seat changes passenger behavior communication breakdown mask policy safety demonstration weight and balance issue safety equipment mask policy non-compliance seat rearrangement safety equipment mask-wearing violation seat changes communication breakdown safety hazard mask policy policy non-compliance seat changes safety demonstration mask-wearing compliance safety equipment mask policy non-compliance safety demonstration mask-wearing violation seat changes communication breakdown weight and balance issue safety hazard mask policy non-compliance seat changes communication breakdown safety demonstration mask-wearing compliance safety equipment', 'safety hazard.', 'safety hazards', 'safety hazards.', 'safety incident', 'safety issues', 'safety management', 'safety procedure', 'safety procedures', 'safety protocols', 'safety protocols.', 'safety protocols.\n\nThis list is comprehensive and includes all the key tags from each incident', 'safety risk', 'safety risks', 'safety standards', 'safety system defect', 'safety training', 'safety training gaps', 'safety training gaps.', 'safety training needs', 'safety_hazard', 'safety_oversight', 'safety_violations', 'seat changes', 'seat management', 'seat swapping', 'seat usage', 'seatbelt compliance', 'seatbelt system', 'seating issues', 'security', 'security assistance required\n- miscommunication', 'security breach', 'security incident', 'security protocols', 'service cart dislocation\n- passenger safety', 'service conflicts', 'slide deployment', 'smart bag', 'special exemptions', 'staff training', 'system failure', 'systems functionality', 'tail strike', 'takeoff procedures', 'takeoff safety concern\n- hostile work environment', 'tension', 'training', 'turbulence', 'unequal treatment', 'unsafe_communication', 'wake turbulence', 'weather delay', 'weather-related delays', 'workload overload']

API_KEY = "sk-pldpwpjtlbbtuxlglgilhntboxjwvvcmpyyeluishxqnfxvk" # Use your own API key, dont use mine.
API_URL = "https://api.siliconflow.cn/v1/chat/completions" # Use your own server provider, dont use mine.
MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B" # Use your own selected model.

HEADERS = {
    "Authorization": f"Bearer {API_KEY}",
    "Content-Type": "application/json"
}

prompt = (
    "Please merge those similar labels. For some specific items, such as batteries, etc., you should keep them.\n\n"
    f"labels: \n{candidate_labels}\n"
)
payload = {
    "model": MODEL_NAME,
    "messages": [
        {"role": "system", "content": "You are an expert in text classification and are good at assigning appropriate labels to events from given candidate labels."},
        {"role": "user", "content": prompt}
    ],
    "max_tokens": 5000,
    "temperature": 0.7
}
try:
    response = requests.post(API_URL, headers=HEADERS, json=payload, timeout=120)
    if response.status_code == 200:
        result = response.json()
        generated_text = result["choices"][0]["message"]["content"].strip()
        print(generated_text)
    else:
        print(f"HTTP Error: {response.status_code} - {response.text}")
except Exception as e:
    print(f"API request exception：{e}")


It seems like you've provided a list of labels for text classification, and you want me to help merge similar labels while keeping specific items like "batteries" intact. Let me carefully analyze the list and suggest a merged version that covers all necessary aspects while reducing redundancy.

---

### **Final Labels (Merged and Streamlined):**

1. **General Categories:**
   - Aviation incident
   - Aviation safety
   - Aviation safety incident
   - Aviation safety protocols
   - Aviation training
   - Safety concerns
   - Safety hazards
   - Safety incidents
   - Safety issues
   - Safety management
   - Safety protocols
   - Safety risks
   - Safety standards
   - Safety system defect
   - Safety training
   - Safety training gaps
   - Safety violations

2. **Crew-Related Issues:**
   - Crew coordination
   - Crew conflict
   - Crew misconduct
   - Crew professionalism
   - Crew response
   - Crew training
   - Crew workload
   - Crew deployment
   - Crew interaction
   - Crew healt

In [109]:
from collections import Counter

API_KEY = "sk-pldpwpjtlbbtuxlglgilhntboxjwvvcmpyyeluishxqnfxvk" # Use your own API key, dont use mine.
API_URL = "https://api.siliconflow.cn/v1/chat/completions" # Use your own server provider, dont use mine.
MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B" # Use your own selected model.

HEADERS = {
    "Authorization": f"Bearer {API_KEY}",
    "Content-Type": "application/json"
}

def call_siliconflow_api(prompt, max_tokens=500, temperature=0.7):
    """
    Calling the Silicon Mobility API to generate text
    prompt: Input prompt word content
    max_tokens: Maximum number of tokens returned
    temperature: The randomness of generated text, to be controlled at around 0.7 by the recommendation from documents.
    """
    payload = {
        "model": MODEL_NAME,
        "messages": [
            {"role": "system", "content": "You are an expert in text classification and are good at assigning appropriate labels to events from given candidate labels."},
            {"role": "user", "content": prompt}
        ],
        "max_tokens": max_tokens,
        "temperature": temperature
    }
    try:
        response = requests.post(API_URL, headers=HEADERS, json=payload, timeout=120)
        if response.status_code == 200:
            result = response.json()
            generated_text = result["choices"][0]["message"]["content"].strip()
            return generated_text
        else:
            print(f"HTTP Error: {response.status_code} - {response.text}")
            return ""
    except Exception as e:
        print(f"API request exception：{e}")
        return ""

def assign_labels_to_event(event_text, candidate_labels):
    """
    Assigning a label to a single event description prompts the LLM to select suitable labels from the candidate labels.
    Returns a list of labels (comma separated labels)
    """
    # Organize candidate labels into a comma-separated string
    label_options = ", ".join(candidate_labels)
    prompt = (
        "Below is a third-person description of an aviation incident. Based on the description, please select one or more tags that best describe the characteristics of the incident."
        "The candidate tag list is as follows:\n"
        f"{label_options}\n\n"
        "Please return the selected tags directly separated by commas without appending other text.\n\n"
        "You must assigne at least one of these labels [between attendant and crew, between attendant and passengers, between attendant and other people] to each record by analysis the relationship between the people in the incident."
        f"Event Description:\n{event_text}\n"
    )
    labels_text = call_siliconflow_api(prompt, max_tokens=1000, temperature=0.7)
    assigned_labels = [label.strip() for label in labels_text.split(",") if label.strip()]
    return assigned_labels

def main():
    # Read a CSV file from given location
    data = pd.read_csv("../docs/event_objective_summary.csv")
    
    # The candidate labels generated from previous requests
    candidate_labels = ["safety related", "crew-related issues", "passenger-related issues", "emergency", "operational issues",
                        "equipment malfunction", "mask policy", "training", "communication issues", "disruptive", "weather Related",
                        "health and medical issues", "fume/smoke incident", "lithium batteries", "electronic cigarettes",
                        "between attendant and crew", "between attendant and passengers", "between attendant and other people"]
    
    assigned_labels_all = []  # Used to store the labels assigned to each record (in list form)
    
    # Traverse each event description and assign tags to it
    print("Start assigning labels one by one...")
    for idx, row in data.iterrows():
        event_text = row["objective_summary"]
        labels = assign_labels_to_event(event_text, candidate_labels)
        # Run several times to make sure labels are assigned.
        while labels == []:
            labels = assign_labels_to_event(event_text, candidate_labels)
        assigned_labels_all.append(labels)
        print(f"Record {idx+1} with labels：{labels}")
        # Control the call rate to avoid triggering API current limiting
        time.sleep(1)
    
    # Save the list of labels for each record into a new column in CSV, converted to a comma-delimited string
    data["assigned_labels"] = [", ".join(labels) for labels in assigned_labels_all]
    output_file = "../docs/event_objective_summary.csv"
    data.to_csv(output_file, index=False)
    print(f"All events are summarized and generated, and the results are saved to {output_file}")

    # Count the number of times each label is assigned
    label_counter = Counter()
    for labels in assigned_labels_all:
        label_counter.update(labels)
    
    print("\nThe number of times each label is assigned:")
    for label, count in label_counter.items():
        print(f"{label}: {count}")

if __name__ == "__main__":
    main()


Start assigning labels one by one...
Record 1 with labels：['Safety related', 'Crew-Related Issues', 'Operational Issues', 'Health and Medical Issues', 'between attendant and crew']
Record 2 with labels：['Lithium batteries', 'Training', 'Communication Issues', 'between attendant and crew']
Record 3 with labels：['Safety related', 'Crew-Related Issues', 'Operational Issues', 'between attendant and crew']
Record 4 with labels：['between attendant and passengers', 'Crew-Related Issues', 'Equipment malfunction', 'Training', 'Operational Issues', 'Cargo']
Record 5 with labels：['Lithium batteries', 'between attendant and crew', 'between attendant and passengers', 'Operational Issues']
Record 6 with labels：['Crew-Related Issues', 'between attendant and passengers', 'Operational Issues', 'Lithium batteries']
Record 7 with labels：['Operational Issues', 'Communication Issues', 'Safety related', 'between attendant and crew']
Record 8 with labels：['Lithium batteries', 'Training', 'Communication Issue

In [141]:
# Read a CSV file from given location
data = pd.read_csv("../docs/event_objective_summary.csv")

all_labels = []
row_labels = []
for labels in data["assigned_labels"]:
    split_labels = [label.strip().lower() for label in str(labels).split(",")]
    all_labels.extend(split_labels)
    row_labels.extend([split_labels])
label_counts = Counter(all_labels)

for label, count in label_counts.most_common():
    print(f"{label}: {count} ")

data["assigned_labels"] = [", ".join(labels) for labels in row_labels]
output_file = "../docs/event_objective_summary.csv"
data.to_csv(output_file, index=False)

safety related: 264 
crew-related issues: 243 
operational issues: 241 
between attendant and crew: 220 
between attendant and passengers: 219 
mask policy: 156 
communication issues: 154 
disruptive: 85 
passenger-related issues: 70 
equipment malfunction: 49 
health and medical issues: 48 
between attendant and other people: 41 
training: 40 
emergency: 28 
fume/smoke incident: 20 
lithium batteries: 10 
electronic cigarettes: 9 
weather related: 8 
