In [None]:
#!/usr/bin/env python
# coding: utf-8

import os
import time
import json
import cv2
import base64
import numpy as np
import pandas as pd
import concurrent.futures

from openai import OpenAI
from google.cloud import storage
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()
client = OpenAI(api_key="API_KEY")

# Convert image to base64 string
def convert_image_to_base64(image_path):
    image = cv2.imread(image_path)
    _, buffer = cv2.imencode(".jpg", image)
    return base64.b64encode(buffer).decode("utf-8")

# Analyze a single image using GPT-4 API
def analyze_image(client, image_path):
    base64Image = convert_image_to_base64(image_path)



    prompt_message = (
 "You are a medical expert specializing in wound. You will be shown an image from a patient. Your task is to provide a prediction of the wound type in the image from the following seven categories: “Stab Wound”,  “Laceration”, “Ingrown Nails”, “Cut”, “Burns”, “Bruises”, “Abrasions”.\n"
    "Please respond using the format below:\n"
    "{\n"
    '  "description": "Provide a concise description of the image, including details such as size, shape, color, depth, exudate, tissue type (e.g., necrotic, granulating), surrounding skin condition, and any other relevant observations. (max 200 words)"\n'
' "reasoning": "Detailed reasoning for why this wound corresponds to the predicted category based on features. (max 100 words)",\n'
' “Type": One of the following: “Stab Wound”,  “Laceration”, “Ingrown Nails”, “Cut”, “Burns”, “Bruises”, “Abrasions”\n'
    "}\n"
)
    


    prompt_messages = [
        {
            "role": "user",
            "content": [
                prompt_message,
                {"image": base64Image, "resize": 768},
            ],
        }
    ]

    params = {
        # "model": "gpt-4o",
        "model": "gpt-4o-mini-2024-07-18",
        
        "messages": prompt_messages,
        "max_tokens": 4096,
        "temperature": 0.0
    }

    try:
        result = client.chat.completions.create(**params)
        text = result.choices[0].message.content
        return text, None
    except Exception as e:
        print(f"Error analyzing image: {e}")
        return None, None

# List images in the local directory and subdirectories
def list_local_images(directory):
    image_files = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_path = os.path.join(root, file)
                relative_path = os.path.relpath(image_path, directory)
                image_files.append(relative_path.replace(os.sep, '/'))
    return image_files

# Process images in batches
def batch_process_images(client, images, directory, batch_size, output_filename):
    responses = []
    processing_times = []

    def process_image(image):
        image_path = os.path.join(directory, image)
        start_time = time.time()
        response_text, processing_time = analyze_image(client, image_path)
        end_time = time.time()

        if processing_time is None:
            processing_time = end_time - start_time

        response = {image: response_text}
        with open(output_filename, "a") as f:
            f.write(json.dumps(response) + "\n")

        return image, response_text, processing_time

    with concurrent.futures.ThreadPoolExecutor() as executor:
        for i in range(0, len(images), batch_size):
            print("batch:", i)
            futures = []
            batch = images[i:i + batch_size]
            for image in batch:
                futures.append(executor.submit(process_image, image))

            for future in concurrent.futures.as_completed(futures):
                image_name, response_text, processing_time = future.result()
                responses.append({image_name: response_text})
                processing_times.append((image_name, processing_time))
    return responses, processing_times

def save_responses_to_file(responses, filename):
    with open(filename, 'w') as f:
        json.dump(responses, f, indent=4)

def main_part(directory, model_type, batch_size):
    images = list_local_images(directory)
    print(f"Found {len(images)} images in {directory} and subdirectories.")

    output_dir = "response"
    os.makedirs(output_dir, exist_ok=True)

    jsonl_filename = os.path.join(output_dir, f'responses_{model_type}_images_hyp_1.jsonl')
    json_filename = os.path.join(output_dir, f'responses_{model_type}_images_hyp_1.json')

    responses, processing_times = batch_process_images(client, images, directory, batch_size, output_filename=jsonl_filename)

    for response in responses:
        print(response)

    save_responses_to_file(responses, filename=json_filename)
    print("Responses saved to json")

    df_time = pd.DataFrame(processing_times, columns=['Image Name', 'Processing Time'])
    df_time['Image Name'] = df_time['Image Name'].apply(lambda x: os.path.splitext(os.path.basename(x))[0])

    return jsonl_filename, df_time

if __name__ == "__main__":
    current_dir = os.getcwd()
    directory = os.path.join(current_dir, "dataset")  # Folder with images
    batch_size = 10
    model_type = 'gpt4o-mini'
    filename, df_time = main_part(directory, model_type, batch_size)


In [None]:
import os
import time
import json
import cv2
import base64
import numpy as np
import pandas as pd
import concurrent.futures

from openai import OpenAI
from google.cloud import storage
from dotenv import load_dotenv

# Load environment variables from .env file
#load_dotenv()

def extract_video_anomaly_results(filename):
    anomaly_results = {}

    # Read the JSON file line by line
    with open(filename, 'r') as file:
        for line in file:
            try:
                # Parse each line as a dictionary
                video_response = json.loads(line.strip())

                for video, response_text in video_response.items():
                    try:
                        if response_text is None:
                            raise json.JSONDecodeError("Response text is None", response_text, 0)

                        # Clean up the response text by removing unnecessary markers
                        cleaned_response_text = response_text.strip('```json').strip()
                        cleaned_response_text = cleaned_response_text.replace('\n\n', ' ').strip()

                        # Parse the cleaned response text as JSON
                        response_json = json.loads(cleaned_response_text)

                        # Extract the video_description, reasoning, and anomaly values
                        video_description = response_json.get('description', 'NAN')
                        reasoning = response_json.get('reasoning', 'NAN')
                        anomaly = response_json.get('Type', 0)

                        # Store the extracted values in the desired format
                        anomaly_results[video] = {
                            "description": video_description,
                            "reasoning": reasoning,
                            "Type": anomaly
                        }

                    except (json.JSONDecodeError, AttributeError) as e:
                        # Handle the case where response text is not valid JSON or other error
                        anomaly_results[video] = {
                            "description": "NAN",
                            "reasoning": "NAN",
                            "Type": 0  # Default to 0 for no anomaly detected
                        }

            except json.JSONDecodeError as e:
                print(f"Error decoding JSON from line: {line.strip()} - {e}")
                continue

    return anomaly_results
def load_and_format_rules(json_file_path):
    # Load the JSON file
    with open(json_file_path, 'r') as file:
        data = json.load(file)

    # Extract the rules list
    rules = data.get('rules', [])

    # Combine the rules into a single string
    formatted_rules = "\n".join(rules)
    
    return formatted_rules
    
def justify_anomaly_detection(anomaly_result, formatted_rules, client):
    # Extract video description, reasoning, and anomaly from anomaly_result
    video_description = anomaly_result.get('description', 'NAN')
    reasoning = anomaly_result.get('reasoning', 'NAN')
    anomaly = anomaly_result.get('Type', 'NAN')



    prompt = f"""
You are a medical expert specializing in wound. You are provided with the results of a wound analysis, including description, reasoning, and a wound type. Additionally, you have a set of reference examples for wound prediction. Some examples may be easily confused, e.g., abrasions vs. laceration, cut vs. laceration, and laceration vs. bruises.
Your task is to review the provided examples. If the content matches any of the examples, apply the example and update the wound prediction result. If no example applies, retain the original prediction.

The wound prediction result is:
{{
  "description": "{video_description}",
  "reasoning": "{reasoning}",
  "Type": "{anomaly}"
}}

# Reference Examples:
1. Example 1:
{{
  "description": "The image shows a wound on the skin with irregular, jagged edges. The wound is red and appears to have some depth, with possible minor bleeding. The surrounding skin is slightly red and inflamed, indicating irritation or trauma. There is no significant exudate visible, and the tissue appears raw but not necrotic.",
  "reasoning": "The irregular, jagged edges and the appearance of the wound suggest it is an abrasion. Abrasions are characterized by jagged skin and minor bleeding. The absence of a puncture or burn pattern supports this classification.",
  "Type": "Abrasions"
}}

2. Example 2:
{{
  "description": "The image shows a linear wound with clean, sharp edges, approximately several centimeters in length. The wound is open and appears to be of moderate depth, with visible red tissue and some bleeding. The surrounding skin is slightly reddened but otherwise intact. There is no significant swelling or bruising around the wound. The tissue within the wound appears fresh, with no signs of necrosis or granulation.",
  "reasoning": "The characteristics of the wound, including its linear shape, clean edges, and moderate depth, are indicative of a Cut. Cuts are typically caused by sharp objects and result in clean cuts with bleeding, as seen in the image.",
  "Type": "Cut"
}}

3. Example 3:
{{
  "description": "The image shows a wound with a reddish, irregular shape, surrounded by a purplish discoloration. The wound appears shallow with no significant depth and has a moist appearance. The surrounding skin shows signs of red and purple hues, indicating possible trauma. There is no visible necrotic tissue, and the area does not appear to have significant exudate. The skin around the wound is slightly swollen, suggesting inflammation.",
  "reasoning": "The presence of a reddish, irregular wound with surrounding purplish discoloration and swelling suggests trauma. Blood vessels are damaged under the skin.",
  "Type": "Laceration"
}}

Please think step-by-step and respond using the format below:
{{
  "Reasoning": "If the wound matches a reference example, provide reasoning based on the specific example. If no example applies, state 'No applicable rule; retaining the original result.'",
  "updated_prediction": "One of the following: Stab Wound, Laceration, Ingrown Nails, Cut, Burns, Bruises, Abrasions"
}}
"""



    
    #print(prompt)
    
    params = {
        # "model": "gpt-4o",
        "model": "gpt-4o-mini-2024-07-18",

        
        "messages": [
            {"role": "system", "content": "You are a medical expert specializing in wound"},
            {"role": "user", "content": prompt}
        ],
        "max_tokens": 4096,
        "temperature": 0.0
    }

    try:
        # Call the OpenAI API
        response = client.chat.completions.create(**params)
        justification = response.choices[0].message.content
        return justification

    except Exception as e:
        print(f"Error analyzing video description: {e}")
        return str(e)

# Process videos in batches
def batch_process_videos(videos_dict, batch_size, output_filename, client, formatted_rules):
    responses = []
    processing_times = []

    def process_batch(batch):
        batch_responses = {}
        start_time = time.time()

        # Process each video in the batch
        for video_id, description in batch.items():
            result = justify_anomaly_detection(description, formatted_rules, client)
            batch_responses[video_id] = result
        
        end_time = time.time()
        processing_time = end_time - start_time

        # Append the responses to the file
        with open(output_filename, "a") as f:
            for video_id, result in batch_responses.items():
                response_text = {video_id: result}
                f.write(json.dumps(response_text) + "\n")

        return batch_responses, processing_time

    # Process videos in batches
    with concurrent.futures.ThreadPoolExecutor() as executor:
        video_items = list(videos_dict.items())  # Convert dictionary to list of tuples (video_id, description)
        futures = []

        for i in range(0, len(video_items), batch_size):
            print("Processing batch:", i // batch_size + 1)
            batch = dict(video_items[i:i + batch_size])
            future = executor.submit(process_batch, batch)
            futures.append(future)
        
        for future in concurrent.futures.as_completed(futures):
            result, processing_time = future.result()
            responses.append(result)
            processing_times.append(processing_time)

    return responses, processing_times
    
# Save responses to a JSON file
def save_responses_to_file(responses, filename):
    with open(filename, 'w') as f:
        json.dump(responses, f, indent=4)

def main_part(videos_dict, formatted_rules, model_type, batch_size, client):
    print(f"Processing {len(videos_dict)} video descriptions.")

    # Create the directory if it doesn't exist
    output_dir = "ref"
    os.makedirs(output_dir, exist_ok=True)

    # Create file names based on the model_type
    jsonl_filename = os.path.join(output_dir, f'rule10_{model_type}_1.jsonl')
    json_filename = os.path.join(output_dir, f'rule10_{model_type}_1.json')

    # Process video descriptions in batches and save to a JSONL file
    responses, processing_times = batch_process_videos(videos_dict, batch_size, jsonl_filename, client, formatted_rules)

    # Flatten the batch responses and save to JSON
    flat_responses = {k: v for batch in responses for k, v in batch.items()}
    save_responses_to_file(flat_responses, filename=json_filename)
    print("Responses saved to JSON")

    # Create DataFrame with processing times
    df_time = pd.DataFrame(enumerate(processing_times), columns=['Batch', 'Processing Time'])

    return jsonl_filename, df_time
# Example usage
if __name__ == "__main__":
    # Set up your OpenAI API key
    client = OpenAI(api_key="API_KEY")
    # Get the video description first:
    model_type = 'gpt4o-mini'
    video_descriptions = extract_video_anomaly_results(f'response/responses_{model_type}_images_hyp_1.jsonl')
    # test if the "nan" value is the same with the json
    df = pd.DataFrame({
            'Image Name': list(video_descriptions.keys()),
            'Predicted Label': list(video_descriptions.values())
        })
    df.to_csv(f'test_rawreason_{model_type}.csv', index=False)
    # load anomaly rules:
    formatted_rules = load_and_format_rules('response/rule.json')
    batch_size = 10  # Adjust batch size as needed
    jsonl_filename, df_time = main_part(video_descriptions, formatted_rules, model_type, batch_size, client)
