In [8]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

column_names = [
    'review', 'subtheme_1', 'subtheme_2', 'Unnamed: 3', 'Unnamed: 4', 
    'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9', 
    'Unnamed: 10', 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13', 'Unnamed: 14'
]

data = pd.read_csv('Evaluation-dataset.csv', names=column_names)
print(data.columns)

review_column = 'review'

lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    tokens = word_tokenize(text.lower())
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
    return ' '.join(tokens)

data['cleaned_review'] = data[review_column].apply(preprocess_text)

def identify_subthemes(text):
    subthemes = []
    if 'incorrect tyre' in text or 'missing tyre' in text:
        subthemes.append(('Incorrect tyres sent', 'problem'))
    if 'garage' in text or 'service' in text:
        subthemes.append(('Garage service', 'aspect'))
    if 'delay' in text or 'wait time' in text:
        subthemes.append(('Wait time', 'aspect'))
    if 'booking' in text or 'book' in text:
        subthemes.append(('Ease of booking', 'aspect'))
    if 'price' in text or 'cost' in text or 'value' in text:
        subthemes.append(('Price', 'aspect'))
    if 'selection' in text or 'choose' in text or 'choice' in text:
        subthemes.append(('Selection process', 'aspect'))
    if 'fitting' in text or 'install' in text or 'fit' in text:
        subthemes.append(('Fitting process', 'aspect'))
    return subthemes

data['subthemes'] = data['cleaned_review'].apply(identify_subthemes)

analyzer = SentimentIntensityAnalyzer()

def get_sentiment(text):
    score = analyzer.polarity_scores(text)
    if score['compound'] >= 0.05:
        return 'positive'
    elif score['compound'] <= -0.05:
        return 'negative'
    else:
        return 'neutral'

def analyze_subthemes(row):
    subthemes = row['subthemes']
    sentiments = []
    for subtheme, sub_type in subthemes:
        sentiment = get_sentiment(row[review_column])
        sentiments.append((subtheme, sub_type, sentiment))
    return sentiments

data['subtheme_sentiments'] = data.apply(analyze_subthemes, axis=1)

exploded_data = data.explode('subtheme_sentiments')


exploded_data[['Subtheme', 'Type', 'Sentiment']] = pd.DataFrame(exploded_data['subtheme_sentiments'].tolist(), index=exploded_data.index)

exploded_data = exploded_data.drop(columns=['subtheme_sentiments', 'cleaned_review'])


print(exploded_data[[review_column, 'Subtheme', 'Type', 'Sentiment']])

exploded_data.to_csv('review_subtheme_sentiments_power_bi.csv', index=False)


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\sahil\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\sahil\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\sahil\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Index(['review', 'subtheme_1', 'subtheme_2', 'Unnamed: 3', 'Unnamed: 4',
       'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9',
       'Unnamed: 10', 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13',
       'Unnamed: 14'],
      dtype='object')
                                                  review           Subtheme  \
0      Tires where delivered to the garage of my choi...     Garage service   
0      Tires where delivered to the garage of my choi...  Selection process   
0      Tires where delivered to the garage of my choi...    Fitting process   
1      Easy Tyre Selection Process, Competitive Prici...     Garage service   
1      Easy Tyre Selection Process, Competitive Prici...  Selection process   
...                                                  ...                ...   
10129  I ordered the tyre I needed on line, booked a ...              Price   
10129  I ordered the tyre I needed on line, booked a ...    Fitting process   
10130  Excellent service fr