In [4]:
import pandas as pd
from openai import OpenAI
import time
from concurrent.futures import ThreadPoolExecutor
import logging
import re


# Initialize OpenAI client with Ollama endpoint
client = OpenAI(
    base_url='http://localhost:11434/v1',
    api_key='ollama'  # required but unused
)

def create_sample_data():
    # Your existing sample_data function remains the same
    sample_data = [
        {
            "Row_ID": 1,
            "ProblemDescription": "I want to know how much my flight bonus will be for next year.",
            "Resolution": "Here’s a link to the bonus calculation guidelines.",
            "CustomerFeedback": "I wasn’t contacted and got a generic response with no specifics."
        },
        {
            "Row_ID": 2,
            "ProblemDescription": "Why is my ticket cost higher than usual?",
            "Resolution": "The price reflects new seasonal rates.",
            "CustomerFeedback": "I don’t understand why the cost increased. No detailed explanation."
        },
        {
            "Row_ID": 3,
            "ProblemDescription": "When will my refund for the canceled flight be processed?",
            "Resolution": "Refunds typically take 4-6 weeks to process.",
            "CustomerFeedback": "I received a generic timeline. No specific details for my case."
        },
        {
            "Row_ID": 4,
            "ProblemDescription": "I can’t access my flight booking details.",
            "Resolution": "You can access them directly on the website by logging in.",
            "CustomerFeedback": "The website is not working, and the advice didn’t help."
        },
        {
            "Row_ID": 5,
            "ProblemDescription": "How do I reset my password for my booking account?",
            "Resolution": "Follow the steps in the attached guide to reset your password.",
            "CustomerFeedback": "The guide wasn’t clear, I couldn’t reset my password."
        },
        {
            "Row_ID": 6,
            "ProblemDescription": "Why hasn’t my refund been processed yet?",
            "Resolution": "Refunds are processed at the end of each month.",
            "CustomerFeedback": "I wasn’t informed why my refund was delayed."
        }
        ]
        
    '''
        {
            "Row_ID": 7,
            "ProblemDescription": "Can you explain the process for claiming a flight delay compensation?",
            "Resolution": "Please refer to the compensation policy document linked here.",
            "CustomerFeedback": "The document is too complicated, and I didn’t find any clear answers."
        },
        {
            "Row_ID": 8,
            "ProblemDescription": "I need help with making a group booking for my department.",
            "Resolution": "Here’s a guide for making group bookings on our website.",
            "CustomerFeedback": "The guide missed some important steps for large group bookings."
        },
        {
            "Row_ID": 9,
            "ProblemDescription": "Why hasn’t my booking for an extra seat been processed?",
            "Resolution": "It takes up to 48 hours to confirm extra seat requests.",
            "CustomerFeedback": "Agent didn’t check my specific request, just gave a standard response."
        },
        {
            "Row_ID": 10,
            "ProblemDescription": "I need help with changing my flight date.",
            "Resolution": "Here’s the link to change your booking online.",
            "CustomerFeedback": "The website process was unclear and I couldn’t change my flight."
        },
        {
            "Row_ID": 11,
            "ProblemDescription": "Can I get an update on my refund request?",
            "Resolution": "There’s no update yet, but we’ll notify you soon.",
            "CustomerFeedback": "No real update, just delays in communication."
        },
        {
            "Row_ID": 12,
            "ProblemDescription": "How do I submit my request for special assistance during the flight?",
            "Resolution": "Refer to the assistance request page on our website.",
            "CustomerFeedback": "The webpage wasn’t user-friendly, and I couldn’t submit my request."
        },
        {
            "Row_ID": 13,
            "ProblemDescription": "Why was my request for priority boarding denied?",
            "Resolution": "Priority boarding is only available to premium passengers.",
            "CustomerFeedback": "I wasn’t told about the eligibility criteria before."
        },
        {
            "Row_ID": 14,
            "ProblemDescription": "Can you explain why my checked bag was delayed?",
            "Resolution": "Bag delays are often caused by high traffic during peak hours.",
            "CustomerFeedback": "Agent didn’t explain the specific cause for my delayed luggage."
        },
        {
            "Row_ID": 15,
            "ProblemDescription": "How do I get a copy of my flight receipt?",
            "Resolution": "You can download the receipt from the booking portal.",
            "CustomerFeedback": "The receipt wasn’t available on the portal as mentioned."
        },
        {
            "Row_ID": 16,
            "ProblemDescription": "Why is my seat upgrade request taking longer than expected?",
            "Resolution": "Upgrades are subject to availability and processing time.",
            "CustomerFeedback": "Agent didn’t explain the exact reason for the delay."
        },
        {
            "Row_ID": 17,
            "ProblemDescription": "How can I get help with using the airline’s mobile app?",
            "Resolution": "Please refer to the app user guide or attend our mobile app training session.",
            "CustomerFeedback": "No immediate support, had to wait for a session."
        },
        {
            "Row_ID": 18,
            "ProblemDescription": "Why was my frequent flyer status downgraded?",
            "Resolution": "Your status is based on recent travel activity, which may have decreased.",
            "CustomerFeedback": "No explanation of the specific travel activity that caused the downgrade."
        },
        {
            "Row_ID": 19,
            "ProblemDescription": "I need assistance with claiming my frequent flyer miles.",
            "Resolution": "Check the attached guide on how to claim miles from your recent flights.",
            "CustomerFeedback": "The guide was unclear and didn’t help with my claim."
        },
        {
            "Row_ID": 20,
            "ProblemDescription": "Why is my baggage fee higher than usual?",
            "Resolution": "Baggage fees are adjusted depending on flight routes and seasons.",
            "CustomerFeedback": "Agent didn’t explain why my fee was higher for this specific flight."
        },
        {
            "Row_ID": 21,
            "ProblemDescription": "How do I update my credit card details for booking?",
            "Resolution": "Update your card information through the booking portal.",
            "CustomerFeedback": "The portal didn’t allow me to update my details, poor service."
        },
        {
            "Row_ID": 22,
            "ProblemDescription": "I need help understanding my flight charge breakdown.",
            "Resolution": "Check the breakdown guide attached to your booking confirmation.",
            "CustomerFeedback": "The breakdown didn’t explain the charges I was concerned about."
        },
        {
            "Row_ID": 23,
            "ProblemDescription": "Why is it taking so long to process my flight change request?",
            "Resolution": "Flight changes depend on availability and schedule adjustments.",
            "CustomerFeedback": "No solution was offered to speed up the processing time."
        },
        {
            "Row_ID": 24,
            "ProblemDescription": "Can I get assistance with canceling my flight?",
            "Resolution": "Follow the cancellation steps outlined on our website.",
            "CustomerFeedback": "The website instructions were confusing and didn’t work."
        },
        {
            "Row_ID": 25,
            "ProblemDescription": "How do I register for the airline's loyalty program?",
            "Resolution": "Please visit our loyalty program registration page on the website.",
            "CustomerFeedback": "The page was hard to find and took too long to navigate."
        },
        {
            "Row_ID": 26,
            "ProblemDescription": "Can I change my seat selection after booking?",
            "Resolution": "You can modify seat selection through the booking portal.",
            "CustomerFeedback": "The portal was not responsive, and I couldn’t change my seat."
        },
        {
            "Row_ID": 27,
            "ProblemDescription": "Why does it take so long to get customer support for flight queries?",
            "Resolution": "Support is provided based on queue order and agent availability.",
            "CustomerFeedback": "Unacceptable wait time and poor customer service."
        },
        {
            "Row_ID": 28,
            "ProblemDescription": "How can I escalate my complaint about flight delays?",
            "Resolution": "Escalations are possible after 5 business days if not resolved.",
            "CustomerFeedback": "I wasn’t given immediate escalation options, felt ignored."
        },
        {
            "Row_ID": 29,
            "ProblemDescription": "Why do I need to resubmit my claim for flight damage compensation?",
            "Resolution": "Your previous claim was incomplete, please provide additional details.",
            "CustomerFeedback": "I wasn’t told what was missing, causing unnecessary delays."
        },
        {
            "Row_ID": 30,
            "ProblemDescription": "I need clarification on the airline's policy for pet travel.",
            "Resolution": "Please refer to the attached policy document for pet travel guidelines.",
            "CustomerFeedback": "The policy document was too vague and didn’t answer my questions."
        },
        {
            "Row_ID": 31,
            "ProblemDescription": "I need help getting my delayed baggage back.",
            "Resolution": "Your baggage is being tracked, and you will receive an update soon.",
            "CustomerFeedback": "No one followed up with me and the agent was very rude about it."
        },
        {
            "Row_ID": 32,
            "ProblemDescription": "Why is my seat not assigned despite checking in?",
            "Resolution": "Seats are assigned based on availability at check-in.",
            "CustomerFeedback": "Agent was dismissive and said it’s ‘just the way it is.’"
        }
        '''
    return pd.DataFrame(sample_data)


theme_descriptions = {
    'TIME_WAITING': 'Feedback related to long call waiting times, call queue lengths, or delays in receiving responses. This is specifically about the time taken to access customer support.',
    
    'POLICY': 'Feedback related to company policies, rules, or standard procedures. This includes cases where customers feel policies are unclear, seem unfair, or limit service options.',
    
    'SERVICE_OR_PROCESS': 'Feedback related to difficult processes or complicated workflows.',
    
    'RESOLUTION_DID_NOT_ANSWER_QUESTION': 'Feedback related to customer\'s problem not being resolved, it can be resolutions that are generic or incomplete or do not answer the initial question asked..',
    
    'SELF_HELP_RESOURCES': 'Feedback related to QRG, websites, documentation, user guides, manuals, or other self-service materials. This includes unclear instructions, missing information, or difficult-to-use resources.',
    
    'AGENT_MANNERS': 'Feedback related to agent\'s attitudes towards customers. This includes lack of empathy, being rude, abusive or abrupt.',
    
    'AGENT_KNOWLEDGE': 'Feedback related to customers being critical of an agent\'s expertise or understanding. This includes incorrect information, inability to explain clearly, or lack of technical knowledge.',
    
    'TECHNOLOGY': 'Feedback related to systems, software, or technical infrastructure. This includes system errors, software bugs, or lack of ease of use with digital tools.',
    
    'REPEATED_FOLLOW_UP': 'Feedback related to updates on existing requests logged with customer service teams. This means lack of updates, or the need for the customer to have enquired multiple times to get a response..'
}

def parse_model_response(response_content, model):
    """Parse response based on model type"""
    if model.startswith("deepseek"):
        match = re.search(r'<think>(.*?)</think>(.*)', response_content, re.DOTALL)
        think_part = match.group(1).strip() if match else ""
        answer_part = match.group(2).strip() if match else response_content.strip()
        return think_part, answer_part
    else:
        # For other models, just return empty think part and full response
        return "", response_content.strip()

def classify_single_theme(row, theme, model="deepseek-r1:8b"):
    """Classify a single row for a single theme"""
    text = f"Problem: {row['ProblemDescription']} Resolution: {row['Resolution']} Feedback: {row['CustomerFeedback']}"
    prompt = f"""Does the following text mention issues related to '{theme}'? ONLY respond with 'Yes' or 'No'.\n\nText: {text}.
                 In selecting the theme you MUST use the {theme_descriptions} to guide your selection
      """
    
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[{"role": "user", "content": prompt}],
            temperature=0
        )
        response_content = response.choices[0].message.content
        think_part, answer_part = parse_model_response(response_content, model)
        
        return row['Row_ID'], {
            'think': think_part,
            'answer': answer_part
        }
    except Exception as e:
        logging.error(f"Error processing row {row['Row_ID']}: {e}")
        return row['Row_ID'], {
            'think': "Error",
            'answer': "Cannot classify"
        }

def process_theme(df, theme, model="deepseek-r1:8b", max_workers=5):
    """Process all rows for a single theme using ThreadPoolExecutor"""
    results = []
    
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_row = {
            executor.submit(classify_single_theme, row, theme, model): row['Row_ID']
            for _, row in df.iterrows()
        }
        
        for future in future_to_row:
            try:
                row_id, result = future.result()
                results.append((row_id, result))
            except Exception as e:
                row_id = future_to_row[future]
                logging.error(f"Failed to process row {row_id}: {e}")
                results.append((row_id, {'think': "Error", 'answer': "Cannot classify"}))
    
    # Sort results by Row_ID and extract think/answer parts
    sorted_results = sorted(results, key=lambda x: x[0])
    
    # Add both think and answer columns for the theme
    df[f'{theme}'] = [r[1]['answer'] for r in sorted_results]
    
    # Only add think columns for Deepseek model
    if model.startswith("deepseek"):
        df[f'{theme}_think'] = [r[1]['think'] for r in sorted_results]
    
    return df

def main():
    # Create sample data
    df = create_sample_data()
    
    # Define model
    MODEL = "mistral:latest"  # Change this to use different models
    
    # Define themes
    themes = [
        "TIME_WAITING",
        "POLICY",
        "SERVICE_OR_PROCESS",
        "RESOLUTION_DID_NOT_ANSWER_QUESTION",
        "SELF_HELP_RESOURCES",
        "AGENT_MANNERS",
        "AGENT_KNOWLEDGE",
        "TECHNOLOGY",
        "REPEATED_FOLLOW_UP"
    ]
    
    # Process each theme
    for theme in themes:
        logging.info(f"Processing theme: {theme}")
        df = process_theme(df, theme, model=MODEL)
        #time.sleep(0.1)  # Small delay between themes
    
    # Save results
    df.to_csv("feedback_analysis_results_latest_mistral.csv", index=False)
    logging.info("Analysis complete. Results saved to feedback_analysis_results.csv")
    
    # Display sample results
    print("\nSample of results:")
    # Display a few columns for better readability
    sample_columns = ['Row_ID', 'ProblemDescription', themes[0]]
    if MODEL.startswith("deepseek"):
        sample_columns.append(f'{themes[0]}_think')
    print(df[sample_columns].head())

if __name__ == "__main__":
    main()


Sample of results:
   Row_ID                                 ProblemDescription TIME_WAITING
0       1  I want to know how much my flight bonus will b...           No
1       2           Why is my ticket cost higher than usual?           No
2       3  When will my refund for the canceled flight be...           No
3       4          I can’t access my flight booking details.           No
4       5  How do I reset my password for my booking acco...           No
