## Test of ollama llava:7b output with one image

In [None]:
import ollama
import os
import csv
import time
import multiprocessing

os.chdir('/Folder_path')

res = ollama.chat(
    model = 'llava:7b', 
    messages = [
        {'role' : 'user',
         'content' : 'Provide ten general tags seperated by commas to describe this image.', 
         'images' : ['./single_image_path']
        }
    ],
    options={"temperature": 0} 
)

print(res)

## Full sample code

In [None]:
import time
import os
import csv
import threading
import signal
import ollama

os.chdir('/Folder_path')
image_folder = "./image_folder"
image_files = sorted(
    [os.path.join(image_folder, f) for f in os.listdir(image_folder) if f.lower().endswith((".jpg", ".jpeg", ".png"))]
)

csv_filename = "./scene_tags.csv"
processed_count = 0
total_images = len(image_files)
lock = threading.Lock()

class TimeoutException(Exception):
    pass

def timeout_handler(signum, frame):
    raise TimeoutException()

def process_image(image_path):
    signal.signal(signal.SIGALRM, timeout_handler)
    signal.alarm(30)
    try:
        res = ollama.chat(
            model='llava:7b',
            messages=[
                {'role': 'user',
                 'content': 'Provide ten general tags separated by commas to describe this image.',
                 'images': [image_path]
                }
            ],
            options={"temperature": 0}
        )
        signal.alarm(0)
        tags = res['message']['content'].split(",") if 'message' in res and 'content' in res['message'] else []
        return [tag.strip() for tag in tags][:10] + [""] * (10 - len(tags))
    except TimeoutException:
        print(f"\nSkipping {os.path.basename(image_path)} due to timeout.")
        return [""] * 10
    except Exception as e:
        print(f"\nError processing {os.path.basename(image_path)}: {e}")
        return [""] * 10

def run_timer(stop_event):
    start_time = time.time()
    while not stop_event.is_set():
        elapsed_time = int(time.time() - start_time)
        with lock:
            print(f"\rSeconds passed: {elapsed_time}s | Processing: {processed_count+1}/{total_images}", end='', flush=True)
        time.sleep(1)
    print(f"\nTimer stopped. Time: {elapsed_time}s | Processed: {processed_count}/{total_images}")

with open(csv_filename, mode="w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)
    writer.writerow(["ID"] + [f"Tag {i}" for i in range(1, 11)])
    for image_path in image_files:
        print(f"\nProcessing: {os.path.basename(image_path)}...")
        stop_event = threading.Event()
        timer_thread = threading.Thread(target=run_timer, args=(stop_event,))
        timer_thread.start()
        tags = process_image(image_path)
        writer.writerow([os.path.basename(image_path)] + tags)
        with lock:
            processed_count += 1
        stop_event.set()
        timer_thread.join()

print(f"\nTags saved to {csv_filename}. Processing complete.")


## Post processing 

In [None]:
import pandas as pd
import re
import os

os.chdir("/Folder_path")
inputfile_name = "./scene_tags.csv"
df = pd.read_csv(inputfile_name, sep=";")

def fix_tags(row):
    tags = []
    for i in range(1, 11):
        tag = str(row.iloc[i]) if pd.notna(row.iloc[i]) else ""
        if "1." in tag:
            tag = tag.split("1.", 1)[-1].strip()
        split_tags = re.split(r'\s*\d+\.\s*|[-]', tag)
        split_tags = [t.strip() for t in split_tags if t.strip()]
        tags.extend(split_tags)
    tags = tags[:10] + [""] * (10 - len(tags))
    return pd.Series(tags)

df_fixed = df.copy()
df_fixed.iloc[:, 1:11] = df.apply(fix_tags, axis=1)
df_fixed.to_csv("scene_tags_1.csv", sep=";", index=False)

inputfile_name = "./scene_tags_1.csv"
df = pd.read_csv(inputfile_name, dtype=str, sep=";")

def clean_tag(tag):
    if pd.isna(tag):
        return None
    tag = tag.replace('"', '')
    if len(tag.split()) >= 4:
        return None
    return tag

df.iloc[:, 1:11] = df.iloc[:, 1:11].map(clean_tag)
df.dropna(subset=df.columns[1:11], how='all', inplace=True)

def shift_tags(row):
    tags = [tag for tag in row if pd.notna(tag)]
    return tags + [None] * (10 - len(tags))

df.iloc[:, 1:11] = df.iloc[:, 1:11].apply(shift_tags, axis=1, result_type="broadcast")
df.to_csv("scene_tags_clean.csv", sep=";", index=False)
