In [9]:
import pandas as pd
import re
import requests
import time
import os
from tqdm import tqdm

import warnings
warnings.filterwarnings("ignore")

from urllib3.exceptions import InsecureRequestWarning
warnings.filterwarnings('ignore', category=InsecureRequestWarning)

from dotenv import load_dotenv

import vertexai

In [10]:
load_dotenv()

project_id = os.getenv('VERTEXAI_PROJECT_ID')
vertexai.init(project=project_id, location="us-central1")


In [11]:
from vertexai.generative_models import (
    GenerationConfig,
    GenerativeModel,
    Image,
    Part,
)

In [12]:
text_model = GenerativeModel("gemini-1.5-pro")

In [13]:
import http.client
import typing
import urllib.request

import IPython.display
from PIL import Image as PIL_Image
from PIL import ImageOps as PIL_ImageOps


def display_images(
    images: typing.Iterable[Image],
    max_width: int = 600,
    max_height: int = 350,
) -> None:
    for image in images:
        pil_image = typing.cast(PIL_Image.Image, image._pil_image)
        if pil_image.mode != "RGB":
            # RGB is supported by all Jupyter environments (e.g. RGBA is not yet)
            pil_image = pil_image.convert("RGB")
        image_width, image_height = pil_image.size
        if max_width < image_width or max_height < image_height:
            # Resize to display a smaller notebook image
            pil_image = PIL_ImageOps.contain(pil_image, (max_width, max_height))
        IPython.display.display(pil_image)

def get_image_bytes_from_url(image_url: str) -> bytes:
    response = requests.get(image_url, headers={'User-Agent': 'Mozilla/5.0'})
    response.raise_for_status()  # Raise an error for bad status codes
    return response.content

def load_image_from_url(image_url: str) -> PIL_Image.Image:
    image_bytes = get_image_bytes_from_url(image_url)
    return Image.from_bytes(image_bytes)


def display_content_as_image(content: str | Image | Part) -> bool:
    if not isinstance(content, Image):
        return False
    display_images([content])
    return True


def display_content_as_video(content: str | Image | Part) -> bool:
    if not isinstance(content, Part):
        return False
    part = typing.cast(Part, content)
    file_path = part.file_data.file_uri.removeprefix("gs://")
    video_url = f"https://storage.googleapis.com/{file_path}"
    IPython.display.display(IPython.display.Video(video_url, width=600))
    return True


def print_multimodal_prompt(contents: list[str | Image | Part]):
    """
    Given contents that would be sent to Gemini,
    output the full multimodal prompt for ease of readability.
    """
    for content in contents:
        if display_content_as_image(content):
            continue
        if display_content_as_video(content):
            continue
        print(content)

In [14]:
df = pd.read_csv('Data/Gemini/llm_responses_combined.csv')

In [15]:
df.columns

Index(['Id', 'Title', 'Body', 'Title_Body', 'ImageURLs', 'llm_zero_shot_title',
       'llm_zero_shot_body', 'llm_zero_shot_combined', 'llm_few_shot_title',
       'llm_few_shot_body', 'llm_few_shot_combined', 'llm_cot_title',
       'llm_cot_body', 'llm_cot_combined'],
      dtype='object')

In [38]:
import pandas as pd
import re
import requests
import time
import os
from tqdm import tqdm
import warnings
from urllib3.exceptions import InsecureRequestWarning
from dotenv import load_dotenv
import vertexai
from vertexai.generative_models import GenerationConfig, GenerativeModel, Image, Part
from PIL import Image as PIL_Image

# Suppress warnings
warnings.filterwarnings("ignore")
warnings.filterwarnings('ignore', category=InsecureRequestWarning)

def init_vertexai():
    """Initialize Vertex AI with credentials"""
    load_dotenv()
    project_id = os.getenv('VERTEXAI_PROJECT_ID')
    vertexai.init(project=project_id, location="us-central1")
    return GenerativeModel("gemini-1.5-pro")

def load_image_from_url(image_url: str) -> Image:
    """Load image from URL and convert to Vertex AI Image format"""
    response = requests.get(image_url, headers={'User-Agent': 'Mozilla/5.0'})
    response.raise_for_status()
    return Image.from_bytes(response.content)

def analyze_relationship(model, image_url, so_title, so_body, llm_title, llm_body, prompt_type, model_name):
    """Analyze relationships between image, SO question, and LLM response"""
    try:
        image = load_image_from_url(image_url)
        
        instruction = f'''
Analyze the following Stack Overflow post and provide ratings for three aspects:

1. Image-Question Relationship: To what extent is the original Stack Overflow question related to the image posted? 
   Explain your reasoning and provide a rating between 1-10. In this question you look at the question title and body and see how much of it looks related to the image? Is the whole question revolving around the image or the image only talks about a part of the question. DO NOT focus on anything else besides the image and the Stack Overflow question.

2. LLM-Image Relationship: To what extent does the {model_name} model's {prompt_type} generated question relate to the image?
   Explain your reasoning and provide a rating between 1-10. In this question, answer how much of the response from LLM matched the image in a way that image might be related to a Stack Overflow question. DO NOT focus on anything else besides the image and the LLM generated response.

3. LLM-SO Question Relationship: To what extent does the {model_name} model's {prompt_type} generated question relate to the original Stack Overflow question?
   Explain your reasoning and provide a rating between 1-10. In this question, compare the LLM response with the original Stack Overflow question and give a similarity score kind of. DO NOT focus on anything else besides the Stack Overflow question and the LLM generated response.

Original Stack Overflow:
Title: {so_title}
Body: {so_body}

{model_name} {prompt_type} Generated:
Title: {llm_title}
Body: {llm_body}

Please format your response exactly as follows:
ANALYSIS:
Image-Question Rating: [1-10]
Reasoning: [Your explanation]

LLM-Image Rating: [1-10]
Reasoning: [Your explanation]

LLM-SO Rating: [1-10]
Reasoning: [Your explanation]
'''

        contents = [instruction, image]

        generation_config = GenerationConfig(
            temperature=0,
            top_p=0.8,
            top_k=40,
            candidate_count=1,
            max_output_tokens=2048,
        )

        response = model.generate_content(
            contents,
            generation_config=generation_config,
            stream=False,
        )

        return response.text
    
    except Exception as e:
        print(f"Error processing image {image_url}: {str(e)}")
        return ""

def extract_ratings(analysis_text):
    """Extract numerical ratings and reasoning from analysis text"""
    try:
        # Extract ratings using regex
        image_question_rating = re.search(r'Image-Question Rating: (\d+)', analysis_text)
        llm_image_rating = re.search(r'LLM-Image Rating: (\d+)', analysis_text)
        llm_so_rating = re.search(r'LLM-SO Rating: (\d+)', analysis_text)
        
        # Extract reasoning
        image_question_reasoning = re.search(r'Image-Question Rating: \d+\nReasoning: (.*?)\n\nLLM', analysis_text, re.DOTALL)
        llm_image_reasoning = re.search(r'LLM-Image Rating: \d+\nReasoning: (.*?)\n\nLLM', analysis_text, re.DOTALL)
        llm_so_reasoning = re.search(r'LLM-SO Rating: \d+\nReasoning: (.*?)$', analysis_text, re.DOTALL)
        
        return {
            'image_question_rating': int(image_question_rating.group(1)) if image_question_rating else None,
            'llm_image_rating': int(llm_image_rating.group(1)) if llm_image_rating else None,
            'llm_so_rating': int(llm_so_rating.group(1)) if llm_so_rating else None,
            'image_question_reasoning': image_question_reasoning.group(1).strip() if image_question_reasoning else "",
            'llm_image_reasoning': llm_image_reasoning.group(1).strip() if llm_image_reasoning else "",
            'llm_so_reasoning': llm_so_reasoning.group(1).strip() if llm_so_reasoning else "",
            'full_analysis': analysis_text
        }
    except Exception as e:
        print(f"Error extracting ratings: {str(e)}")
        return {
            'image_question_rating': None,
            'llm_image_rating': None,
            'llm_so_rating': None,
            'image_question_reasoning': "",
            'llm_image_reasoning': "",
            'llm_so_reasoning': "",
            'full_analysis': analysis_text
        }

def analyze_single_id(post_id):
    """Analyze a single Stack Overflow post ID across all models and prompting strategies"""
    # Initialize Vertex AI and model
    text_model = init_vertexai()
    
    # Define models and prompting strategies
    models = ['Gemini', 'GPT-4o', 'llama-3.2']
    strategies = ['zero_shot', 'few_shot', 'cot']
    results = []
    
    print(f"\nAnalyzing Stack Overflow post ID: {post_id}")
    print("=" * 80)
    
    for model_name in models:
        try:
            # Load data for the specific model
            file_path = f'Data/{model_name}/llm_responses_combined.csv'
            df = pd.read_csv(file_path)
            
            # Get the specific post
            post = df[df['Id'] == post_id].iloc[0]
            
            # Extract image URL
            image_urls = eval(post['ImageURLs'])  # Convert string representation of list to actual list
            image_url = image_urls[0] if image_urls else None
            
            if not image_url:
                print(f"No image found for post ID {post_id} in {model_name} dataset")
                continue
            
            # Process each prompting strategy
            for strategy in strategies:
                print(f"\nProcessing {model_name} model with {strategy} prompting...")
                
                # Get LLM responses for this strategy
                llm_title = post[f'llm_{strategy}_title']
                llm_body = post[f'llm_{strategy}_body']
                
                # Analyze relationships
                analysis = analyze_relationship(
                    text_model,
                    image_url,
                    post['Title'],
                    post['Body'],
                    llm_title,
                    llm_body,
                    strategy.replace('_', ' '),
                    model_name
                )
                
                # Extract ratings
                ratings = extract_ratings(analysis)
                
                # Store results
                result = {
                    'Id': post_id,
                    'model': model_name,
                    'strategy': strategy,
                    **ratings
                }
                results.append(result)
                
                # Print analysis
                print(f"\n{model_name} {strategy} Analysis:")
                print("-" * 40)
                print(analysis)
                print("-" * 40)
                
                # Add delay between analyses to avoid rate limiting
                if not (model_name == models[-1] and strategy == strategies[-1]):
                    print("\nWaiting 10 seconds before next analysis...")
                    time.sleep(10)
            
        except Exception as e:
            print(f"Error processing {model_name} for post ID {post_id}: {str(e)}")
            continue
    
    # Create comparison DataFrame
    comparison_df = pd.DataFrame(results)
    
    # Save detailed results
    output_file = f'Data/analysis_id_{post_id}_detailed.csv'
    comparison_df.to_csv(output_file, index=False)
    
    # Create and display summary tables
    print("\nAnalysis Summary:")
    print("=" * 80)
    
    # Summary by model
    print("\nAverage Ratings by Model:")
    model_summary = comparison_df.groupby('model')[['image_question_rating', 'llm_image_rating', 'llm_so_rating']].mean()
    print(model_summary)
    
    # Summary by strategy
    print("\nAverage Ratings by Prompting Strategy:")
    strategy_summary = comparison_df.groupby('strategy')[['image_question_rating', 'llm_image_rating', 'llm_so_rating']].mean()
    print(strategy_summary)
    
    # Combined model-strategy summary
    print("\nDetailed Ratings by Model and Strategy:")
    detailed_summary = comparison_df.pivot_table(
        index=['model', 'strategy'],
        values=['image_question_rating', 'llm_image_rating', 'llm_so_rating'],
        aggfunc='mean'
    )
    print(detailed_summary)
    
    print(f"\nDetailed analysis saved to: {output_file}")
    
    return comparison_df

# Interactive interface
if __name__ == "__main__":
    while True:
        try:
            post_id = int(input("\nEnter Stack Overflow post ID to analyze (or -1 to exit): "))
            if post_id == -1:
                break
            results = analyze_single_id(post_id)
        except ValueError:
            print("Please enter a valid post ID")
        except Exception as e:
            print(f"Error: {str(e)}")
        
        proceed = input("\nWould you like to analyze another post? (y/n): ")
        if proceed.lower() != 'y':
            break
            
    print("\nAnalysis complete. Thank you!")


Analyzing Stack Overflow post ID: 79124400

Processing Gemini model with zero_shot prompting...

Gemini zero_shot Analysis:
----------------------------------------
ANALYSIS:
Image-Question Rating: 8/10
Reasoning: The image shows the VS Code Source Control tab with the "Commit" button available. The Stack Overflow question is about customizing the behavior of this button to always include the "and Push" option. The image directly illustrates the area of the VS Code interface the question is about.

LLM-Image Rating: 9/10
Reasoning: The Gemini response focuses on a scenario where the VS Code Source Control tab is empty and the user cannot commit changes. The image clearly shows an empty Source Control tab, making the generated question highly relevant to the image.

LLM-SO Rating: 2/10
Reasoning: The Gemini response and the original Stack Overflow question both deal with the VS Code Source Control tab and committing changes. However, the specific issues and desired outcomes are differe