In [None]:
import os
import pandas as pd
import requests
from tqdm import tqdm
import cv2
import time
import base64

from openai import OpenAI

# Set paths
BASE_PATH = "/kaggle/input/tammathon-task-3"
TEST_IMAGE_PATH = os.path.join(BASE_PATH, "test/test")
TEST_CSV_PATH = os.path.join(BASE_PATH, "test.csv")
VAL_IMAGE_PATH = os.path.join(BASE_PATH, "val/val")  # Validation image folder
VAL_CSV_PATH = os.path.join(BASE_PATH, "val.csv")    # Validation CSV

# Delay between API calls (in seconds)
API_CALL_DELAY = 1.0  # Adjust as needed based on API rate limits


# Initialize OpenAI client (configured for Core42 API or OpenAI API)
client = OpenAI(
    api_key="",  # Replace with your Core42 or OpenAI API key
    base_url="https://api.core42.ai/openai/deployments/gpt-4o"  # Core42 API base URL
)


In [2]:
import os
import pandas as pd
import base64
from openai import OpenAI
from tqdm import tqdm
import time
import json
from tenacity import retry, stop_after_attempt, wait_exponential

# Configuration
API_CALL_DELAY = 1.5  # Delay between API calls
MAX_RETRIES = 3  # Maximum retry attempts
TEST_SAMPLE_SIZE = 5  # Number of images to test
LOG_FILE = "/kaggle/working/api_logs.jsonl"  # Log file path

# Initialize OpenAI client
# client = OpenAI(api_key="your_openai_api_key")  # Replace with your key

# Define allowed damage terms
ALLOWED_DAMAGE_TERMS = [
    'scratch', 'dent', 'lamp', 'broken', 'glass',
    'shattered', 'crack', 'tire', 'flat'
]

# Strict format system prompt
SYSTEM_PROMPT = f"""
You are a professional car damage assessor. Analyze the image and:

**Strict Requirements:**
1. Only use these damage terms: {", ".join(ALLOWED_DAMAGE_TERMS)}
2. Respond EXACTLY in format: "Car image with [damage1], [damage2]..."
3. Separate multiple damages with commas
4. Use ONLY the specified terms - no variations
5. If no damage: "Car image with no visible damage"

**Examples:**
- "Car image with scratch, dent"
- "Car image with broken glass"
- "Car image with flat tire"
- "Car image with no visible damage"

**Important:**
- Do not add locations or severity
- Do not use other words besides the specified terms
- Keep it to one short sentence
"""

def encode_image(image_path):
    """Encode image to base64"""
    try:
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')
    except Exception as e:
        print(f"Image encoding failed for {image_path}: {str(e)}")
        return None

@retry(stop=stop_after_attempt(MAX_RETRIES),
       wait=wait_exponential(multiplier=1, min=4, max=10))
def get_damage_description(image_path, client, log_file=None):
    """Get damage description with strict formatting"""
    try:
        base64_image = encode_image(image_path)
        if not base64_image:
            return None

        response = client.chat.completions.create(
            model="gpt-4-vision-preview",
            messages=[
                {
                    "role": "system",
                    "content": SYSTEM_PROMPT
                },
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": "Analyze this car image for damages."
                        },
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/jpeg;base64,{base64_image}",
                                "detail": "high"
                            }
                        }
                    ]
                }
            ],
            temperature=0.0,  # Minimize creativity for strict format
            max_tokens=100,
        )

        description = response.choices[0].message.content.strip()
        
        # Validate response format
        if not description.startswith("Car image with"):
            description = "Car image with " + description
        
        # Log the response
        if log_file:
            log_entry = {
                "image": os.path.basename(image_path),
                "response": description,
                "timestamp": pd.Timestamp.now().isoformat()
            }
            with open(log_file, "a") as f:
                f.write(json.dumps(log_entry) + "\n")

        return description

    except Exception as e:
        print(f"API call failed for {os.path.basename(image_path)}: {str(e)}")
        if log_file:
            with open(log_file, "a") as f:
                f.write(json.dumps({
                    "image": os.path.basename(image_path),
                    "error": str(e),
                    "timestamp": pd.Timestamp.now().isoformat()
                }) + "\n")
        raise

# Load validation data
VAL_CSV_PATH = "/kaggle/input/tammathon-task-3/test.csv"
VAL_IMAGE_PATH = "/kaggle/input/tammathon-task-3/test/test"
val_df = pd.read_csv(VAL_CSV_PATH)

# Process images
predictions = []
for idx, row in tqdm(val_df.iterrows(), total=len(val_df), desc="Processing"):
    try:
        img_path = os.path.join(VAL_IMAGE_PATH, row['file_name'])
        description = get_damage_description(img_path, client, LOG_FILE)
        predictions.append(description or "Format error")
    except Exception as e:
        predictions.append(f"Error: {str(e)}")
    time.sleep(API_CALL_DELAY)

# Create results dataframe
results_df = pd.DataFrame({
    'id': val_df['id'],
    'prediction': predictions
})

# Save results
results_df.to_csv("/kaggle/working/submission.csv", index=False)
print("\nResults saved. Sample predictions:")
print(results_df[['id', 'prediction']].head())

Processing: 100%|██████████| 374/374 [31:36<00:00,  5.07s/it]


Results saved. Sample predictions:
   id                     prediction
0  12      Car image with flat tire.
1  15        Car image with scratch.
2  23  Car image with dent, scratch.
3  33  Car image with dent, scratch.
4  40  Car image with dent, scratch.





In [3]:
import pandas as pd

# Read your CSV file
file_path = "/kaggle/working/submission.csv"  # Replace with your file path
df = pd.read_csv(file_path)

# Remove all speechmarks from string columns
df = df.applymap(lambda x: x.replace('"', '') if isinstance(x, str) else x)

# Save the cleaned CSV
cleaned_file_path = "/kaggle/working/submission_final.csv"
df.to_csv(cleaned_file_path, index=False, quoting=1)  # quoting=1 ensures no quotes are added back

print(f"Speechmarks removed. Cleaned file saved to: {cleaned_file_path}")

Speechmarks removed. Cleaned file saved to: /kaggle/working/submission_final.csv


  df = df.applymap(lambda x: x.replace('"', '') if isinstance(x, str) else x)
