# vision text data NLP 
Tony 08/12/2023

Input:
- raw data from my detect_road module
- chatGPT generated analysis result

Output:
- bbox json of specific video 
- risk json of specific video

In [1]:
# Let's read the content of the uploaded file to understand the key frame information
file_path = "raw_data/Info_Video_0194.txt"

with open(file_path, 'r') as file:
    key_frame_data = file.read()

# Displaying the first 1000 characters to get an overview of the content
key_frame_data[:1000]


'INFO of 0001:\nroad 0 is at middle_down\nsidewalk 1 is at right_down\nperson 0 is at middle_down\nThe [distance,angle] from person 0 to our dashcam is: [very close,89.97567850454345]\nperson 1 is at middle_down\nThe [distance,angle] from person 1 to our dashcam is: [very close,89.97128033898481]\nperson 2 is at middle_down\nThe [distance,angle] from person 2 to our dashcam is: [very close,-89.98346900030981]\nperson 3 is at left_down\nThe [distance,angle] from person 3 to our dashcam is: [very close,-89.98502153235941]\nperson 4 is at middle_down\nThe [distance,angle] from person 4 to our dashcam is: [very close,-89.98260645390403]\nperson 5 is at left_down\nThe [distance,angle] from person 5 to our dashcam is: [very close,-89.99403606265135]\nPerson 0 is on the road 0, sidewalk 1,his/her bbox is [1148.4463   625.3968  1183.7595   663.28143]\nPerson 1 is on the road 0, sidewalk 1,his/her bbox is [1120.0143  636.9002 1149.0658  672.4008]\nPerson 2 is on the road 0, sidewalk 1,his/her b

In [2]:
# Let's read the content of the LLM generated file and load into JSON
file_path = "LLM_data/Result_Video_0194.md"

with open(file_path, 'r') as file:
    result_data = file.read()

# Displaying the first 1000 characters to get an overview of the content
result_data[:1000]


'### Frame 0001 Analysis\n\n**Summary of Key Frame Information:**\n- **Road Position:** The road (road 0) is located in the middle-down region of the frame.\n- **Sidewalk Position:** The sidewalk (sidewalk 1) is located in the right-down region of the frame.\n- **People Positions and Distances:**\n  - **Person 0:** Located at middle-down, very close to the dashcam, with an angle of 89.98 degrees. On road 0, sidewalk 1.\n  - **Person 1:** Located at middle-down, very close to the dashcam, with an angle of 89.97 degrees. On road 0, sidewalk 1.\n  - **Person 2:** Located at middle-down, very close to the dashcam, with an angle of -89.98 degrees. On road 0, sidewalk 1.\n  - **Person 3:** Located at left-down, very close to the dashcam, with an angle of -89.98 degrees. Not on any detected surface.\n  - **Person 4:** Located at middle-down, very close to the dashcam, with an angle of -89.98 degrees. Not on any detected surface.\n  - **Person 5:** Located at left-down, very close to the dashc

In [3]:
def is_small_bbox(bbox):
    x1, y1, x2, y2 = bbox
    width = x2 - x1
    height = y2 - y1

    # You can adjust these thresholds as needed
    width_threshold = 5.0
    height_threshold = 5.0

    return width < width_threshold or height < height_threshold


In [4]:
from collections import defaultdict
import re
# Function to parse the raw data and extract the relevant information

# Person 0 is on the road 0, sidewalk 1,his/her bbox is [1148.4463   625.3968  1183.7595   663.28143]
def parse_raw_data(data):
    frames = defaultdict(lambda: {
        "bbox": {}
    })
    current_frame = None

    for line in data.split("\n"):
        if line.startswith("INFO of"):
            current_frame = line.split(":")[0].split()[-1]

        elif "Person" in line and "bbox" in line:
            person_id, bbox_str = re.match(r"Person (\d+) .* bbox is (\[.*?\])", line).groups()
            bbox_str = bbox_str.strip('[]')
            bbox = [float(value) for value in bbox_str.split()]
            if is_small_bbox(bbox):
                bbox = [None, None, None, None]

            frames[current_frame]['bbox'][int(person_id)] = (bbox)


        elif "Person" in line and "detected" in line:
            person_id = re.match(r"Person (\d+) is not", line).group(1)

            frames[current_frame]['bbox'][int(person_id)] = [None,None,None,None]


    return frames

# Reading the raw data file
file_path = "raw_data/Info_Video_0194.txt"

with open(file_path, 'r') as file:
    raw_data_content = file.read()
# Parsing the raw data to extract the required information
parsed_data = parse_raw_data(raw_data_content)
parsed_data


defaultdict(<function __main__.parse_raw_data.<locals>.<lambda>()>,
            {'0001': {'bbox': {0: [1148.4463, 625.3968, 1183.7595, 663.28143],
               1: [1120.0143, 636.9002, 1149.0658, 672.4008],
               2: [648.042, 667.07153, 665.49365, 719.2749],
               3: [None, None, None, None],
               4: [None, None, None, None],
               5: [None, None, None, None]}},
             '0002': {'bbox': {0: [1198.5688, 652.10834, 1241.2085, 755.20197],
               1: [621.4365, 695.20935, 631.2711, 725.8157],
               2: [1164.8732, 663.21277, 1198.9618, 754.44324],
               3: [272.70007, 638.92316, 292.74884, 687.86066],
               4: [640.6342, 698.09467, 656.5699, 752.0599],
               5: [None, None, None, None]}},
             '0003': {'bbox': {0: [633.53595, 712.7891, 653.9832, 770.7911],
               1: [1223.1569, 683.0388, 1265.8732, 785.2406],
               2: [623.22614, 708.7739, 634.92694, 739.9016]}},
             '000

In [5]:
import json
import os

def save_to_json(parsed_data, filename):
    # Create a directory named "JSON_data" if it doesn't exist
    os.makedirs('JSON_data', exist_ok=True)

    # Define the path
    path = os.path.join('JSON_data', filename)

    # Convert the dictionary to a JSON string and write it to the file
    with open(path, 'w') as file:
        json.dump(parsed_data, file, indent=4)

    print(f"Data has been saved to {path}")



# Usage example
parsed_data = parse_raw_data(raw_data_content)
filename = 'Bbox_Video_0194_.json'
save_to_json(parsed_data, filename)


Data has been saved to JSON_data\Bbox_Video_0194_.json


In [6]:
import os
def preprocess_text(text):
    # Replace bold markdown with unique markers
    text = text.replace('**', '||') # Using '||' as a unique marker for bold text
    
    # Adding distinct delimiters for each person's information
    lines = text.split('\n')
    processed_lines = []
    for line in lines:
        if 'Person' in line and ':' in line:
            processed_lines.append('<PERSON_INFO>') # Start of person's information
        processed_lines.append(line)
        if line.strip().endswith(')'):
            processed_lines.append('</PERSON_INFO>') # End of person's information
            
    processed_text = '\n'.join(processed_lines)
    return processed_text


def store_preprocessed_text(filename, preprocessed_text, folder='prep_data'):
    # Create the folder if it doesn't exist
    if not os.path.exists(folder):
        os.makedirs(folder)
    
    # Construct the new filename based on the provided name
    base_name = os.path.basename(filename)
    new_name = f"Prep_{base_name.replace('.txt', '.md')}"
    file_path = os.path.join(folder, new_name)

    # Write the preprocessed text to the new file
    with open(file_path, 'w', encoding='utf-8') as f:
        f.write(preprocessed_text)
    
    print(f"Preprocessed text saved to: {file_path}")

# Example usage
filename = "Info_Video_0194.txt"
preprocessed_text = preprocess_text(result_data)
store_preprocessed_text(filename, preprocessed_text)


Preprocessed text saved to: prep_data\Prep_Info_Video_0194.md


In [7]:

def parse_text(text, bbox_data):
    # Initialize the result
    result = {}

    # Splitting the text into sections based on headers
    sections = text.split('###')

    # Iterate through the sections
    for section in sections:
        if "Risk Evaluation" in section:
            # Extracting frame number
            frame_number_match = re.search(r"Frame (\d+)", section)

            
            if frame_number_match:
                
                # Extracting the last paragraph as text summary
                text_summary = section.strip().split("\n\n")[-1]
                
                frame_number = frame_number_match.group(1)
                result[f"Frame{frame_number.zfill(4)}"] = {
                    "Time": f"{float(frame_number) * 0.5}s",
                    "Text": text_summary,  # place holder here
                    "Pedestrian": {}
                }
           
                # Extracting risk evaluations for persons
                person_info_blocks = section.split('<PERSON_INFO>')[1:]
                for person_info_block in person_info_blocks:

                    person_info = person_info_block.split('</PERSON_INFO>')[0]
                    # person_match = re.search(r"Person (\d+): ([A-Za-z]+)", person_info)
                    person_match = re.search(r"\|\|Person (\d+):\|\| ([A-Za-z]+)", person_info)

                    if person_match:
                        person, risk_level = person_match.groups()
                        risk_level = risk_level.strip()
                        # print("risk_level",risk_level)
                        # Getting the bounding box data
                        bbox = bbox_data[frame_number.zfill(4)]['bbox'][int(person)]
                        
                        # Storing the extracted information
                        result[f"Frame{frame_number.zfill(4)}"]["Pedestrian"][person] = (risk_level, bbox)

    return result



# Preprocessing the text
preprocessed_text = preprocess_text(result_data)

# Parsing the text into JSON
json_data = parse_text(preprocessed_text, parsed_data)


json_data

{'Frame0001': {'Time': '0.5s',
  'Text': 'The high-risk evaluation for Persons 0, 1, and 2 is due to their position on the road, very close proximity to the dashcam, and angles near the extremes of the field of view. The medium-risk evaluation for Persons 3, 4, and 5 is attributed to their undetected surface and close proximity to the dashcam. Immediate attention to the road situation is recommended.',
  'Pedestrian': {'0': ('High', [1148.4463, 625.3968, 1183.7595, 663.28143]),
   '1': ('High', [1120.0143, 636.9002, 1149.0658, 672.4008]),
   '2': ('High', [648.042, 667.07153, 665.49365, 719.2749]),
   '3': ('Medium', [None, None, None, None]),
   '4': ('Medium', [None, None, None, None]),
   '5': ('Medium', [None, None, None, None])}},
 'Frame0002': {'Time': '1.0s',
  'Text': 'The situation in Frame 0002 is more critical compared to the previous frame, as more persons are now detected on the road. The high-risk evaluation for Persons 0 through 4 is due to their positions on the road or

In [8]:
risk,bbox = json_data["Frame0005"]['Pedestrian']['0']
print(risk)
print(bbox)

High
[1464.9961, 632.0057, 1528.1875, 727.5109]


In [9]:
save_to_json(json_data,"Risk_evaluation.json")

Data has been saved to JSON_data\Risk_evaluation.json


I wrote a code to parse the information from the raw_data, but there are two bug within it:
1. program don't show person bbox because the raw data don't have it, I want you to record as [None, None, None, None]
2. the key of bbox is string, I want it to be int, because it may be more convinent to search

Debug now

