In [5]:
import sys
import os
sys.path.append(os.path.abspath('..'))

from typing import List, Dict
import numpy as np
from config.db import DB_CONFIG
import plotly.express as px
import plotly.graph_objects as go

In [3]:
def fetch_feedback_data():
    """Fetch all feedback data from the database"""
    conn = psycopg2.connect(**DB_CONFIG)
    cur = conn.cursor()
    
    # Query to get all feedback data
    cur.execute("""
        SELECT session_id, query_text, query_image, feedback, timestamp
        FROM user_feedback
        ORDER BY timestamp
    """)
    
    # Fetch all results
    results = cur.fetchall()
    
    # Close database connection
    cur.close()
    conn.close()
    
    # Convert to DataFrame
    df = pd.DataFrame(results, columns=['session_id', 'query_text', 'query_image', 'feedback', 'timestamp'])
    return df

def calculate_precision(feedback_list: List[Dict]) -> float:
    """Calculate precision for a single session's feedback
    
    Args:
        feedback_list: List of dictionaries containing 'pid' and 'feedback' keys
        
    Returns:
        float: Precision score (ratio of positive feedbacks to total feedbacks)
    """
    if not feedback_list:
        return 0.0
    
    # Count positive feedbacks (True values)
    positive_count = sum(1 for item in feedback_list if item['feedback'] is True)
    total_count = len(feedback_list)
    
    return positive_count / total_count if total_count > 0 else 0.0

In [8]:
# Fetch data
df = fetch_feedback_data()

# Calculate precision for each session
df['precision'] = df['feedback'].apply(calculate_precision)

# Basic statistics
print("Feedback Analysis Summary:")
print(f"Total number of sessions: {len(df)}")
print(f"Average precision across all sessions: {df['precision'].mean():.3f}")
print(f"Median precision across all sessions: {df['precision'].median():.3f}")
print("\nPrecision Distribution:")
print(df['precision'].describe())

# Group by query type (text, image, or both)
df['query_type'] = df.apply(
    lambda row: 'text_only' if pd.notna(row['query_text']) and pd.isna(row['query_image'])
    else 'image_only' if pd.isna(row['query_text']) and pd.notna(row['query_image'])
    else 'both', axis=1
)

# Calculate average precision by query type
print("\nAverage Precision by Query Type:")
print(df.groupby('query_type')['precision'].agg(['mean', 'count']))

Feedback Analysis Summary:
Total number of sessions: 1
Average precision across all sessions: 0.600
Median precision across all sessions: 0.600

Precision Distribution:
count    1.0
mean     0.6
std      NaN
min      0.6
25%      0.6
50%      0.6
75%      0.6
max      0.6
Name: precision, dtype: float64

Average Precision by Query Type:
            mean  count
query_type             
both         0.6      1
