In [None]:
!pip install ultralytics gdown

In [1]:
import os
import pandas as pd
import nltk
nltk.download(['punkt', 'averaged_perceptron_tagger', 'averaged_perceptron_tagger_eng', 'wordnet', 'omw-1.4'])
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
from nltk.stem import WordNetLemmatizer
from ultralytics import YOLO
import gdown
import numpy as np
drive_link = "https://drive.google.com/drive/folders/1liEguiv6LtT_pHlqdiAs5h1EjnOp5qN_?usp=sharing"
prompt_dataset = "https://docs.google.com/document/d/1VJHTKDMPapyLbACAIOWvzRdrUhV70y03Mbyr_y--AN0/edit?usp=sharing"

In [2]:
def download_yolo_file():
    from ultralytics import YOLO
    model = YOLO('yolov8n.pt')
    return 'yolov8n.pt'

In [3]:
yolo_model = download_yolo_file()

In [4]:
def parse_results_from_drive(drive_link):
    gdown.download_folder(drive_link, output='kaggle_resources', quiet=False, use_cookies=False)
    csv_data = pd.read_csv('kaggle_resources/results.csv')
    return csv_data[['run_id', 'prompt', 'filenames']].rename(columns={'filenames': 'generated_images'})

In [5]:
generated_data = parse_results_from_drive(drive_link)

Retrieving folder contents


Processing file 1auXz8KoCQV91xeNFktof_hC0JDg-5eKG DreamLayer_StabilityAI_00177_.png
Processing file 1d53hBVPoVsX0SEuHXPxD5GFpyizn9oWX DreamLayer_StabilityAI_00178_.png
Processing file 1Yv7Te8shqYoyu9JWu47ndD2iBkK3hBFj DreamLayer_StabilityAI_00179_.png
Processing file 1Bqyq-xQVSIXdgWswlP6zSZ2p9gCgvD3B DreamLayer_StabilityAI_00180_.png
Processing file 1ukSC8-xXGkeRiHJjAvRX-goi95IA0AVm results.csv


Retrieving folder contents completed
Building directory structure
Building directory structure completed
Downloading...
From: https://drive.google.com/uc?id=1auXz8KoCQV91xeNFktof_hC0JDg-5eKG
To: /Users/abhijeetbajaj/Coding/ Side_Projects/DreamLayerProd/DreamLayer/kaggle_resources/DreamLayer_StabilityAI_00177_.png
100%|████████████████████████████████████████████████████████████████████| 1.25M/1.25M [00:00<00:00, 40.1MB/s]
Downloading...
From: https://drive.google.com/uc?id=1d53hBVPoVsX0SEuHXPxD5GFpyizn9oWX
To: /Users/abhijeetbajaj/Coding/ Side_Projects/DreamLayerProd/DreamLayer/kaggle_resources/DreamLayer_StabilityAI_00178_.png
100%|████████████████████████████████████████████████████████████████████| 1.48M/1.48M [00:00<00:00, 10.8MB/s]
Downloading...
From: https://drive.google.com/uc?id=1Yv7Te8shqYoyu9JWu47ndD2iBkK3hBFj
To: /Users/abhijeetbajaj/Coding/ Side_Projects/DreamLayerProd/DreamLayer/kaggle_resources/DreamLayer_StabilityAI_00179_.png
100%|██████████████████████████████████████

In [6]:
print(generated_data)

                                 run_id  \
0  9e5ae05b-0ba2-4f98-ad8d-06032d868b9b   
1  2bec46fa-e4e7-4903-9a76-4ffdcfb3d382   
2  6b31efd8-a516-45e1-a59f-13a17d8d1508   
3  32e3792a-b16c-4b54-abaa-82d29ff2e38a   
4  8e3b34ad-34ea-4dea-a164-f3a7f30d8daf   

                                              prompt  \
0  A white and blue truck parked in the middle of...   
1  An airplane in route with a cloudy sky behind it.   
2  A person skiing down a  mountain kicking his l...   
3       A zebra chews a flower in a fenced in field.   
4  A man baking and preparing donuts to sell at s...   

                    generated_images  
0  DreamLayer_StabilityAI_00181_.png  
1  DreamLayer_StabilityAI_00180_.png  
2  DreamLayer_StabilityAI_00179_.png  
3  DreamLayer_StabilityAI_00178_.png  
4  DreamLayer_StabilityAI_00177_.png  


In [7]:
def apply_object_detection(yolo_file, generated_df):
    model = YOLO(yolo_file)
    def detect_objects(img):
        try:
            detections = model(os.path.join('kaggle_resources', img))[0]
            return [model.names[int(cls)] for cls in detections.boxes.cls] if detections.boxes is not None else []
        except:
            return []
    generated_df['predicted_objects'] = [detect_objects(img) for img in generated_df['generated_images']]
    return generated_df


In [8]:
submissions_df = apply_object_detection(yolo_model, generated_data)



image 1/1 /Users/abhijeetbajaj/Coding/ Side_Projects/DreamLayerProd/DreamLayer/kaggle_resources/DreamLayer_StabilityAI_00180_.png: 640x640 1 airplane, 63.0ms
Speed: 2.0ms preprocess, 63.0ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 /Users/abhijeetbajaj/Coding/ Side_Projects/DreamLayerProd/DreamLayer/kaggle_resources/DreamLayer_StabilityAI_00179_.png: 640x640 1 person, 1 skis, 53.1ms
Speed: 1.5ms preprocess, 53.1ms inference, 0.3ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 /Users/abhijeetbajaj/Coding/ Side_Projects/DreamLayerProd/DreamLayer/kaggle_resources/DreamLayer_StabilityAI_00178_.png: 640x640 1 zebra, 51.9ms
Speed: 1.4ms preprocess, 51.9ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 /Users/abhijeetbajaj/Coding/ Side_Projects/DreamLayerProd/DreamLayer/kaggle_resources/DreamLayer_StabilityAI_00177_.png: 640x640 1 person, 19 donuts, 53.6ms
Speed: 1.4ms preprocess, 53.6ms inference, 0.4ms postprocess

In [9]:
def extract_ground_truth(drive_link):
    import nltk
    from nltk.tokenize import word_tokenize
    from nltk.tag import pos_tag
    from nltk.stem import WordNetLemmatizer
    
    lemmatizer = WordNetLemmatizer()
    doc_id = drive_link.split('/d/')[1].split('/')[0]
    export_url = f"https://docs.google.com/document/d/{doc_id}/export?format=txt"
    gdown.download(export_url, 'temp_prompts.txt', quiet=False)
    with open('temp_prompts.txt', 'r') as f:
        prompts = f.readlines()
    
    results = []
    for i, prompt in enumerate(prompts):
        tokens = word_tokenize(prompt.lower().strip())
        pos_tags = pos_tag(tokens)
        nouns = [lemmatizer.lemmatize(word) for word, pos in pos_tags if pos.startswith('NN')]
        results.append({'ID': i+1, 'prompt_id': i, 'prompt': prompt.strip(), 'ground_truth': nouns, 'Usage': 'Public'})
    
    return pd.DataFrame(results)

In [10]:
solutions_df = extract_ground_truth(prompt_dataset)

Downloading...
From: https://docs.google.com/document/d/1VJHTKDMPapyLbACAIOWvzRdrUhV70y03Mbyr_y--AN0/export?format=txt
To: /Users/abhijeetbajaj/Coding/ Side_Projects/DreamLayerProd/DreamLayer/temp_prompts.txt
2.75kB [00:00, 3.11MB/s]


In [12]:
def add_id_to_submission(submission_df, solution_df):
    merged = solution_df[['ID', 'prompt']].merge(submission_df, on='prompt', how='left')
    merged['predicted_objects'] = merged['predicted_objects'].fillna('').apply(lambda x: x if isinstance(x, list) else [])
    merged[['run_id', 'generated_images']] = merged[['run_id', 'generated_images']].fillna("missing_information")
    return merged

In [13]:
submissions_df = add_id_to_submission(submissions_df, solutions_df)

In [14]:
print(submissions_df)

    ID                                             prompt  \
0    1  ﻿A man baking and preparing donuts to sell at ...   
1    2       A zebra chews a flower in a fenced in field.   
2    3  A person skiing down a  mountain kicking his l...   
3    4  An airplane in route with a cloudy sky behind it.   
4    5  A white and blue truck parked in the middle of...   
5    6     PIcked peach flowers sit in a vase with water.   
6    7  Sheep are on a grassy field and one of them is...   
7    8  A blue rusted train engine sitting on top of r...   
8    9  An open laptop computer sitting on top of a wo...   
9   10   A woman twirling an umbrella with flowers on it.   
10  11  A woman sitting in a restaurant with Mexican f...   
11  12           A woman holding a purse and a cellphone.   
12  13  A cat reaching for a knife that has it's blade...   
13  14  The man smiles with a slice of pizza while nex...   
14  15  Night picture of a car parked and some parking...   
15  16  A professional p

In [15]:
def score(solution: pd.DataFrame, submission: pd.DataFrame) -> float:
    """
    Calculate the average F1 score between predicted and ground truth objects using proper precision/recall.
    
    Args:
        solution: DataFrame with 'ID' and 'ground_truth' columns
        submission: DataFrame with 'ID' and 'predicted_objects' columns
    
    Returns:
        float: Average F1 score across all prompts using TP, FP, FN calculations
    """
    import ast
    merged = solution.merge(submission, on='ID', how='left')
    merged['predicted_objects'] = merged['predicted_objects'].fillna('[]').apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
    merged['ground_truth'] = merged['ground_truth'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
    
    f1_scores = []
    for pred, truth in zip(merged['predicted_objects'], merged['ground_truth']):
        pred_set, truth_set = set(pred), set(truth)
        tp = len(pred_set & truth_set)
        fp = len(pred_set - truth_set)
        fn = len(truth_set - pred_set)
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
        f1_scores.append(f1)
    
    return np.mean(f1_scores)

In [17]:
f1_score = score(solutions_df, submissions_df)
print(f1_score)

0.02653061224489796


In [None]:
submissions_df.to_csv("submission.csv", index=False)