In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install -q transformers pandas pillow scikit-learn tqdm

In [None]:
import torch
from transformers import AutoProcessor, AutoModelForCausalLM, pipeline
import pandas as pd
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import classification_report, accuracy_score

In [None]:
model_name = "liuhaotian/llava-v1.5-13b"
processor = AutoProcessor.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

pipe = pipeline("image-to-text", model=model, tokenizer=processor.tokenizer)

In [None]:
# Define the prompt
prompt = "USER:<image>\nAnalyze the given image carefully. Identify any potential risks, safety concerns, sexual explicit content, signs of violence or aggression, indications of illegal activities, health hazards, environmental hazards, presence of weapons or dangerous objects based on visible details. Consider factors such as the context, setting, activities, expressions, and objects present in the image. Analyze and assess the environment and any notable elements. Identify any potential safety hazards or risks present. Note the presence and actions of people, if any. Check if there is any kind of violence. Finally, Classify the overall image context as 'harmless' if it appears safe with no visible threats or 'harmful' if there is any indication of danger. What is your classification?\nASSISTANT:"


In [None]:
# Load the CSV file with image paths and labels
csv_file_path = 'caption_test.csv'
data = pd.read_csv(csv_file_path)

# Initialize lists for storing labels and predictions
labels = []
predictions = []

# Process each image in the CSV file
for index, row in tqdm(data.iterrows(), total=data.shape[0]):
    image_path = row['IMAGE_PATH']
    try:
        image = Image.open(image_path).convert('RGB')
        response = pipe(prompt, images=image)
        predicted_label = response[0]['generated_text'].strip().lower()
    except Exception as e:
        print(f"Error processing image {image_path}: {e}")
        continue

    labels.append(row['label'].strip().lower())
    predictions.append(predicted_label)

# Calculate the classification report and accuracy
report = classification_report(labels, predictions)
accuracy = accuracy_score(labels, predictions)

print(report)
print(f"Accuracy: {accuracy:.4f}")
