In [34]:
import pandas as pd
import re
import nltk


In [35]:
from nltk.tokenize import word_tokenize
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [36]:
nltk.download('punkt')
nltk.download('vader_lexicon')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [37]:
# Data Preprocessing
# 1. Load the Dataset


df = 'Evaluation-dataset.csv' 
data = pd.read_csv(df)


In [38]:
# determine the correct column name


print("Columns dataset:", data.columns)


Columns dataset: Index(['Tires where delivered to the garage of my choice,the garage notified me when they had been delivered. A day and time was arranged with the garage and I went and had them fitted,a Hassel free experience.',
       'garage service positive', 'ease of booking positive', 'Unnamed: 3',
       'Unnamed: 4', 'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8',
       'Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11', 'Unnamed: 12',
       'Unnamed: 13', 'Unnamed: 14'],
      dtype='object')


In [39]:
# Assuming the first column contains the review text



review_column = data.columns[0]

print(f"Using '{review_column}' as the review column.")

Using 'Tires where delivered to the garage of my choice,the garage notified me when they had been delivered. A day and time was arranged with the garage and I went and had them fitted,a Hassel free experience.' as the review column.


In [40]:
# Data Preprocessing
# 2. Text Cleaning



def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    return text

In [41]:
# Data Preprocessing
# 3. Applying Text Cleaning


data['cleaned_text'] = data[review_column].apply(clean_text)
data['tokens'] = data['cleaned_text'].apply(word_tokenize)



In [42]:
# Subtheme Identification -  We identify subthemes by checking for the presence of predefined keywords (aspects and problems) in the tokenized text.
# Define subthemes



aspects = ['garage service', 'wait time']
problems = ['incorrect tyres sent']

In [43]:
 # identify subthemes


def identify_subthemes(tokens):
    subthemes = {'aspects': [], 'problems': []}
    text = ' '.join(tokens)
    for aspect in aspects:
        if aspect in text:
            subthemes['aspects'].append(aspect)
    for problem in problems:
        if problem in text:
            subthemes['problems'].append(problem)
    return subthemes

# Apply subtheme identification

data['subthemes'] = data['tokens'].apply(identify_subthemes)

In [44]:
# Sentiment Analysis: We use VADER to analyze the sentiment of each subtheme identified in the text.
# Initialize SentimentIntensityAnalyzer

sia = SentimentIntensityAnalyzer()

In [45]:

# analyze sentiment   [ nltk.download('vader_lexicon')]


def analyze_sentiment(subthemes, text):
    sentiments = {}
    for category, themes in subthemes.items():
        for theme in themes:
            if theme in text:
                sentiment_score = sia.polarity_scores(theme + ' ' + text)['compound']
                sentiment = 'positive' if sentiment_score > 0 else 'negative'
                sentiments[theme] = sentiment
    return sentiments

In [46]:
# Apply sentiment analysis


data['sentiments'] = data.apply(lambda row: analyze_sentiment(row['subthemes'], row['cleaned_text']), axis=1)

In [47]:
# Print Result


result = data[[review_column, 'subthemes', 'sentiments']]
print(result.head())

  Tires where delivered to the garage of my choice,the garage notified me when they had been delivered. A day and time was arranged with the garage and I went and had them fitted,a Hassel free experience.  \
0  Easy Tyre Selection Process, Competitive Prici...                                                                                                                                                           
1         Very easy to use and good value for money.                                                                                                                                                           
2              Really easy and convenient to arrange                                                                                                                                                           
3  It was so easy to select tyre sizes and arrang...                                                                                                                    

In [48]:
# Final result in  a CSV file


result.to_csv('Final_Solutions.csv', index=False)
print("Results saved to 'Final_Solutions.csv'")


Results saved to 'Final_Solutions.csv'
