<a href="https://colab.research.google.com/github/Adhini99/Subtheme-Sentimental-Analysis/blob/main/subtheme%20sentimental%20analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import spacy
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import pandas as pd

# Load spaCy model for dependency parsing and NER
nlp = spacy.load('en_core_web_sm')

# Initialize NLTK resources
nltk.download('vader_lexicon')
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
sid = SentimentIntensityAnalyzer()

# Function to preprocess text
def preprocess(text):
    tokens = word_tokenize(text)
    tokens = [word.lower() for word in tokens if word.isalpha() and word not in stop_words]
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    return ' '.join(tokens)


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...


In [None]:
# Function to extract aspects and their descriptors
def extract_aspects(doc):
    aspects = []
    for token in doc:
        if token.dep_ in ('nsubj', 'dobj') and token.head.pos_ == 'VERB':
            aspects.append((token.text, token.head.text))
    return aspects


In [None]:
# Function to determine sentiment of a phrase
def get_sentiment(phrase):
    scores = sid.polarity_scores(phrase)
    if scores['compound'] >= 0.05:
        return 'positive'
    elif scores['compound'] <= -0.05:
        return 'negative'
    else:
        return 'neutral'


In [None]:
# Function to analyze subtheme sentiments in a review
def analyze_review(review):
    preprocessed_review = preprocess(review)
    doc = nlp(preprocessed_review)
    aspects = extract_aspects(doc)
    subtheme_sentiments = {}

    for aspect, verb in aspects:
        sentiment = get_sentiment(f"{verb} {aspect}")
        subtheme_sentiments[aspect] = sentiment

    return subtheme_sentiments

In [None]:
# Load CSV data into a pandas DataFrame
def load_data(csv_file):
    df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Evaluation-dataset.csv', header=None)
    return df

In [None]:
# Apply analysis to each review in the dataset
def analyze_dataset(df):
    results = []
    for review in df[0]:
        result = analyze_review(review)
        results.append(result)
    return results

In [None]:
# Example usage
csv_file = '/content/drive/MyDrive/Colab Notebooks/Evaluation-dataset.csv'

In [None]:
# Load the data
df = load_data(csv_file)

In [None]:
# Print the first few rows to inspect the structure of the CSV
print(df.head())

                                                  0   \
0  Tires where delivered to the garage of my choi...   
1  Easy Tyre Selection Process, Competitive Prici...   
2         Very easy to use and good value for money.   
3              Really easy and convenient to arrange   
4  It was so easy to select tyre sizes and arrang...   

                         1                         2   \
0   garage service positive  ease of booking positive   
1   garage service positive  value for money positive   
2  value for money positive                       NaN   
3  ease of booking positive                       NaN   
4         location positive  value for money positive   

                         3    4    5    6    7    8    9    10   11   12   13  \
0                       NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN   
1                       NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN   
2                       NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN

In [None]:
# Analyze the dataset
results = analyze_dataset(df)

In [None]:
# Add the results to the DataFrame
df['subtheme_sentiments'] = results

In [None]:
# Print the DataFrame with subtheme sentiments
print(df)

                                                       0  \
0      Tires where delivered to the garage of my choi...   
1      Easy Tyre Selection Process, Competitive Prici...   
2             Very easy to use and good value for money.   
3                  Really easy and convenient to arrange   
4      It was so easy to select tyre sizes and arrang...   
...                                                  ...   
10127  I ordered the wrong tyres, however [REDACTED] ...   
10128  Good experience, first time I have used [REDAC...   
10129  I ordered the tyre I needed on line, booked a ...   
10130  Excellent service from point of order to fitti...   
10131  Seamless, well managed at both ends. I would r...   

                                1                              2  \
0         garage service positive       ease of booking positive   
1         garage service positive       value for money positive   
2        value for money positive                            NaN   
3      