In [3]:
from google.colab import drive
drive.mount('/content/drive') #run this cell first


Mounted at /content/drive


In [4]:
import pandas as pd
import requests
from PIL import Image
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import re

# Define a mapping for plural to singular units
plural_to_singular = {
    'grams': 'gram',
    'kilograms': 'kilogram',
    'micrograms': 'microgram',
    'milligrams': 'milligram',
    'ounces': 'ounce',
    'pounds': 'pound',
    'tons': 'ton',
    'centimeters': 'centimetre',
    'meters': 'metre',
    'millimeters': 'millimetre',
    'feet': 'foot',
    'inches': 'inch',
    'yards': 'yard',
    'kilovolts': 'kilovolt',
    'millivolts': 'millivolt',
    'volts': 'volt',
    'kilowatts': 'kilowatt',
    'watts': 'watt',
    'centiliters': 'centilitre',
    'cubic feet': 'cubic foot',
    'cubic inches': 'cubic inch',
    'cups': 'cup',
    'deciliters': 'decilitre',
    'fluid ounces': 'fluid ounce',
    'gallons': 'gallon',
    'imperial gallons': 'imperial gallon',
    'liters': 'litre',
    'microliters': 'microlitre',
    'milliliters': 'millilitre',
    'pints': 'pint',
    'quarts': 'quart'
}

# Load the CSV file
file_path = '/content/drive/MyDrive/test_questions.csv'#CHANGE FILE NAME HERE
df = pd.read_csv(file_path)

# Load the model and tokenizer
model_id = "vikhyatk/moondream2"
revision = "2024-08-26"
model = AutoModelForCausalLM.from_pretrained(
    model_id, trust_remote_code=True, revision=revision
)
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)  # Move model to GPU if available

# Function to process each image link and its corresponding question
def ask_question_from_link(image_link, question):
    try:
      
        response = requests.get(image_link, stream=True)
        image = Image.open(response.raw).convert('RGB')  

       
        enc_image = model.encode_image(image).to(device)
        answer = model.answer_question(enc_image, question, tokenizer)

        
        match = re.search(r'(\d+\.?\d*)\s+(\w+)', answer)
        if match:
            number = match.group(1)
            unit = match.group(2)
            
            unit = plural_to_singular.get(unit.lower(), unit)
            formatted_answer = f"{number} {unit}"
        else:
            formatted_answer = ""

        
        print(f"Image URL: {image_link}")
        print(f"Question: {question}")
        print(f"Answer: {formatted_answer}")
        print('-' * 50)

        return formatted_answer
    except Exception as e:
        print(f"Error processing image: {str(e)}")
        return "Error"


df['prediction'] = df.apply(lambda row: ask_question_from_link(row['image_link'], row['question']), axis=1)

output_df = df[['index', 'prediction']]


output_file_path = '/content/drive/MyDrive/updated_with_answers.csv'
output_df.to_csv(output_file_path, index=False)




Image URL: https://m.media-amazon.com/images/I/110EibNyclL.jpg
Question: What is the height?
Answer: 57.7 inch
--------------------------------------------------
Image URL: https://m.media-amazon.com/images/I/11TU2clswzL.jpg
Question: What is the width?
Answer: 42.5 inch
--------------------------------------------------
Image URL: https://m.media-amazon.com/images/I/11TU2clswzL.jpg
Question: What is the height?
Answer: 200 centimetre
--------------------------------------------------
Image URL: https://m.media-amazon.com/images/I/11TU2clswzL.jpg
Question: What is the depth?
Answer: 
--------------------------------------------------
Image URL: https://m.media-amazon.com/images/I/11gHj8dhhrL.jpg
Question: What is the depth?
Answer: 
--------------------------------------------------
Image URL: https://m.media-amazon.com/images/I/11gHj8dhhrL.jpg
Question: What is the height?
Answer: 
--------------------------------------------------
Image URL: https://m.media-amazon.com/images/I/11gHj8

Unnamed: 0,index,prediction
0,0,57.7 inch
1,1,42.5 inch
2,2,200 centimetre
3,3,
4,4,
