In [47]:
import pandas as pd
import ollama
from ollama import chat
from pydantic import BaseModel
#from openai import OpenAI

# Set Display Settings
pd.set_option('display.max_colwidth', 70)

In [48]:
data=pd.read_excel("/Users/santhanakrishnans/Downloads/ford_service_review_processed.xlsx")

data=data.dropna(subset=['review'])
data["review_original"]=data["review"]
data=data.reset_index()


In [49]:
import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed
from ollama import chat  # Import the correct method from the ollama package
from tqdm import tqdm  # For the progress bar

def analyze_single_review(review, predefined_topics, allowed_sentiments, index):
    """
    Analyze a single review using LLaMA 3.2 from Ollama.

    Args:
        review (str): The review text to analyze.
        predefined_topics (list): List of predefined topics.
        allowed_sentiments (list): List of allowed sentiments.
        index (int): Index of the review (for debugging purposes).

    Returns:
        dict: A dictionary containing the index and analysis result.
    """
    prompt_template = """
    Analyze the following review and do the following:
    1. Identify the key topics talked about in the review only out of the following comma-separated list: {topics}.
    2. Identify the sentiments expressed in the review from the following comma-separated list: {sentiments}.

    Here are a few examples of topic identification:
    Example 1:
    Review: "The music feature stopped working again. I'm really frustrated with this issue."
    - Chain of Thought:
    1. The review mentions "music feature", which suggests reviewer talks about Music System. 
    2.Stopped working again suggest a Product here being music system failure hence Product Quality
    - Key topics: Music System,Product Quality
    
    
    Example 2:
    Review: "The staff at the dealership were amazing! Tomasina provided great service, and I had a smooth experience."
    - Chain of Thought:
    1. The review highlights positive experiences with the staff or service person hence "Staff"
    - Key topics: Staff 
    
    
    Example 3:
    Review: "Came to change brake pad. The product doesn't meet the standards, and I'm disappointed."
    - Chain of Thought:
    1. The review mentions "brake pad", indicating the issue is with a specific part.
    2. The phrase "doesn't meet the standards" suggests a problem with the product’s quality.
    - Key topics: Brake,Product Quality
    
    
    Example 4:
    Review: "I had a quick and easy service appointment. The staff were courteous, and the mechanic did a great job in quick time."
    - Chain of Thought:
    1. The review mentions a "quick and easy service appointment", which suggests that the service time was favorable.
    2. The staff were "courteous", indicating a positive review of staff or service person behavior.
    3. The mechanic did a "great job", suggesting that service quality was also good and broadly classify it into staff.
    - Key topics: Staff,Service Time
    
    
    Example 5:
    Review: "While the service was good, the facilities were basic and not ideal. I wish there were more amenities."
    - Chain of Thought:
    1. The review mentions a service was good, which suggest overall experience was good
    2. however they also mention facilities were basic and not ideal and they would like more aminities hence indicating negative sentiment due to lack of Facilities
    - Key topics: Service quality, Facilities
    
    
    Example 6:
    Review: "I found only one screw on my left front wheel after recall by this service center. That mistake totally ruined my wheel and cost hundreds of dollars to fix."
    - Chain of Thought:
    1. The review mentions only one screw was there,which means overall Service quality was bad
    2. Customer also mentions a specific part as wheel hence identify Wheel/Tires,also this was a recall service hence labelling recall
    - Key topics: Wheel/Tires, Recall,Service Quality
    
    
    Example 7:
    Review: "I went to get an inspection and they told me to fix a bulb for $400. When I did it outside, they did it for $49. They were ripping people off. I waited 3 hours and they still couldn’t figure out what was wrong with the car."
    - Chain of Thought:
    1. The review mentions feeling ripped off which indicates high Service Cost
    2. Review also takes about waiting for 3 hours hence Service Time
    3. It also mentions that they could not figure out whats wrong with thw car hence Service Diagnostics
    - Key topics: Service Cost,Service Diagnostics,Service Time
    
    
    Example 8:
    Review: "Worst customer service ever. Took my F150 for a recall. From the start, she was so negative about the issue, told me it would take months to fix, and that I would have to leave the truck there. She is not even a technician and gave a diagnostic before inspecting the car. I was in disbelief. I suggest taking your Ford to a better service department."
    - Chain of Thought:
    1. The Customer talks about a service person being negative and expresses disappointment also they have mentioned it was  for a recall
    2.Customer has also expressed that Staff mentioned it would take months to fix hence a Topic Service Time and also alleges wrong diagnosis hence Service Diagnostics
    - Key topics: Staff ,Recall,Service Time,Service Diagnostics

    Example 9:
    - Chain of Thought:
    Review: "Allen was cool"
    1. The Customer talks about a service person called "Allen" hence Staff
    - Key topics: Staff 

     Example 10:
    - Chain of Thought:
    Review: "5 star for quality of work and professionalism."
    1. The Customer talks about quality of work ,which is service hence Service Quality
    2.They also mention professionalism which could be attributed to Staff behaviour hence Staff
    - Key topics: Service Quality,Staff 
    
    Return the output in this format:
    Key topics: [comma-separated list of topics]
    Sentiment: [sentiment]
    """
    prompt = prompt_template.format(
        topics=", ".join(predefined_topics),
        sentiments=", ".join(allowed_sentiments),
    ) + f"\n\nReview {index + 1}: {review}"

    # Call the model
   response = chat(
    model="llama3.2",
    temperature=0.4,  # Ideal temperature for predefined topics
    messages=[
        {"role": "system", "content": "You are a helpful assistant for analyzing reviews."},
        {"role": "user", "content": prompt},
    ],
)

    if hasattr(response, "message") and hasattr(response.message, "content"):
        return {"index": index, "analysis": response.message.content.strip()}
    else:
        raise ValueError(f"Unexpected response format for review {index}:", response)


def analyze_reviews_in_parallel(dataframe, predefined_topics, allowed_sentiments, max_workers=4):
    """
    Analyze reviews in parallel and add analysis back to the DataFrame.

    Args:
        dataframe (pd.DataFrame): DataFrame containing a column "review" with reviews to analyze.
        predefined_topics (list): List of predefined topics.
        allowed_sentiments (list): List of allowed sentiments.
        max_workers (int): Maximum number of threads for parallel processing.

    Returns:
        pd.DataFrame: Updated DataFrame with an "analysis" column containing the results.
    """
    # Prepare inputs for parallel processing
    reviews = dataframe["review"].tolist()
    results = []

    # Use ThreadPoolExecutor for parallel processing
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Wrap the futures in a progress bar
        with tqdm(total=len(reviews), desc="Processing Reviews") as progress_bar:
            future_to_review = {
                executor.submit(
                    analyze_single_review, review, predefined_topics, allowed_sentiments, idx
                ): idx
                for idx, review in enumerate(reviews)
            }

            for future in as_completed(future_to_review):
                try:
                    result = future.result()
                    results.append(result)
                except Exception as e:
                    print(f"Error processing review {future_to_review[future]}: {e}")
                finally:
                    progress_bar.update(1)

    # Sort results by index to ensure correct order
    results.sort(key=lambda x: x["index"])

    # Add the analysis results to the DataFrame
    dataframe["analysis"] = [result["analysis"] for result in results]

    return dataframe


# Example usage
if __name__ == "__main__":
    # Create a sample DataFrame
   

    predefined_topics = [
        'Product Quality', 'Staff', 'Service Time',
        'Music System', 'Recall', 'Brake',
        'Oil Change/Refill', 'Wheel/Tires',
        'Product Quality', 'Service Cost', 'Service Diagnostics',
        'Facilities', 'Windshield', 'Door', 'Battery',
        'Engine', 'Product Availability', 'Not Classified'
    ]
    allowed_sentiments = ["Positive", "Negative", "Neutral"]

    # Analyze the reviews with parallel processing
    updated_data = analyze_reviews_in_parallel(data, predefined_topics, allowed_sentiments, max_workers=4)

    # Display the results
    print(updated_data)


Processing Reviews: 100%|███████████████████| 1181/1181 [32:15<00:00,  1.64s/it]

      index                  serviceCentre  \
0         0  Ford of Queens Service Center   
1         1  Ford of Queens Service Center   
2         2  Ford of Queens Service Center   
3         3  Ford of Queens Service Center   
4         4  Ford of Queens Service Center   
...     ...                            ...   
1176   1775                          Tesla   
1177   1776                          Tesla   
1178   1777                          Tesla   
1179   1779                          Tesla   
1180   1781                          Tesla   

                                   dummy  overall rating  dummy1  \
0                    Car Service Station             1.0     624   
1                    Car Service Station             3.5     624   
2                    Car Service Station             3.5     624   
3                    Car Service Station             3.5     624   
4                    Car Service Station             3.5     624   
...                                  ..




In [50]:
updated_data["Key Topics"] = updated_data["analysis"].str.extract(r"Key topics:\s*([^\n]*)")
updated_data["Sentiment"] = data["analysis"].str.extract(r"Sentiment:\s*([^\n]*)")

# Drop the analysis column (optional)
#updated_data = updated_data.drop(columns=["analysis"])

# Display the result
print(updated_data)


      index                  serviceCentre  \
0         0  Ford of Queens Service Center   
1         1  Ford of Queens Service Center   
2         2  Ford of Queens Service Center   
3         3  Ford of Queens Service Center   
4         4  Ford of Queens Service Center   
...     ...                            ...   
1176   1775                          Tesla   
1177   1776                          Tesla   
1178   1777                          Tesla   
1179   1779                          Tesla   
1180   1781                          Tesla   

                                   dummy  overall rating  dummy1  \
0                    Car Service Station             1.0     624   
1                    Car Service Station             3.5     624   
2                    Car Service Station             3.5     624   
3                    Car Service Station             3.5     624   
4                    Car Service Station             3.5     624   
...                                  ..

In [51]:
updated_data.to_csv("/Users/santhanakrishnans/Downloads/llama3.2_few_shot_v1.csv")

In [52]:
# Predefined topics
predefined_topics = [
    'Product Quality', 'Staff', 'Service Time', 'Music System', 'Recall',
    'Brake', 'Oil Change/Refill', 'Wheel/Tires', 'Service Cost',
    'Service Diagnostics', 'Facilities', 'Windshield', 'Door', 'Battery',
    'Engine', 'Product Availability', 'Not Classified', 'Customer Service',
    'Service Quality'
]



# Create a column for each predefined topic
for topic in predefined_topics:
    updated_data[topic] = updated_data.apply(
        lambda row: 1 if isinstance(row['Key Topics'], str) and topic.lower() in row['Key Topics'].lower()
        else (1 if isinstance(row['analysis'], str) and topic.lower() in row['analysis'].lower() else 0),
        axis=1
    )
# Display the updated DataFrame
print(updated_data)

      index                  serviceCentre  \
0         0  Ford of Queens Service Center   
1         1  Ford of Queens Service Center   
2         2  Ford of Queens Service Center   
3         3  Ford of Queens Service Center   
4         4  Ford of Queens Service Center   
...     ...                            ...   
1176   1775                          Tesla   
1177   1776                          Tesla   
1178   1777                          Tesla   
1179   1779                          Tesla   
1180   1781                          Tesla   

                                   dummy  overall rating  dummy1  \
0                    Car Service Station             1.0     624   
1                    Car Service Station             3.5     624   
2                    Car Service Station             3.5     624   
3                    Car Service Station             3.5     624   
4                    Car Service Station             3.5     624   
...                                  ..

In [53]:
updated_data.to_csv("/Users/santhanakrishnans/Downloads/llama3.2_few_shot_topics_v_1.csv")

In [54]:
# Calculate the number of rows with no topics (excluding 'Not Classified')
topics_to_consider = [topic for topic in predefined_topics if topic != 'Not Classified']
no_topics_count = updated_data[topics_to_consider].sum(axis=1).eq(0).sum()

# Calculate the percentage of rows with no topics
no_topics_percentage = (no_topics_count / len(updated_data)) * 100

print(f"{no_topics_percentage:.2f}% of rows do not contain any topic from the predefined list (excluding 'Not Classified').")

2.79% of rows do not contain any topic from the predefined list (excluding 'Not Classified').


In [55]:
# Get the topic occurrence counts
topic_counts = updated_data[predefined_topics].sum().sort_values(ascending=False)

# Print the topic counts
print(topic_counts)

Staff                   840
Service Quality         447
Service Time            437
Product Quality         372
Facilities              251
Service Cost            170
Product Availability    164
Not Classified           87
Music System             58
Recall                   57
Customer Service         34
Oil Change/Refill        26
Battery                  24
Brake                    23
Wheel/Tires              21
Engine                   20
Door                     18
Service Diagnostics      17
Windshield               16
dtype: int64


In [62]:
topic_counts.to_csv("Topic_Distribution_fewshot.csv")

In [57]:
sentiment_data=pd.read_csv("/Users/santhanakrishnans/Downloads/llama3.2_few_shot_topics_v_1.csv")

In [58]:
import pandas as pd
import re

# Remove the string "star" and any other special characters, and convert to int
sentiment_data['rating'] = sentiment_data['rating'].apply(lambda x: int(re.sub('[^0-9]', '', x)))

# Label the sentiment based on the rating
sentiment_data['sentiment_original'] = sentiment_data['rating'].apply(lambda x: 'Positive' if x > 3 else 'Neutral' if x == 3 else 'Negative')

In [59]:
sentiment_summary = sentiment_data['sentiment_original'].value_counts()
print(sentiment_summary).groupby()

sentiment_original
Positive    727
Negative    396
Neutral      58
Name: count, dtype: int64


AttributeError: 'NoneType' object has no attribute 'groupby'

In [60]:
sentiment_summary_AI = sentiment_data['Sentiment'].value_counts()
print(sentiment_summary_AI).groupby()

Sentiment
Positive             685
Negative             386
Neutral               54
** Negative           21
** Positive           19
Positive               4
Negative               3
**                     1
** Not Classified      1
** Neutral             1
Name: count, dtype: int64


AttributeError: 'NoneType' object has no attribute 'groupby'

In [63]:
model

NameError: name 'model' is not defined