In [None]:
!pip install spacy textblob gensim nltk
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m39.4 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [3]:
import pandas as pd
import spacy
from textblob import TextBlob
import nltk
from nltk.corpus import stopwords
import string

# Download the NLTK stopwords
nltk.download('stopwords')

# Load the spaCy model for NLP
nlp = spacy.load("en_core_web_sm")

# Define stop words and punctuation
stop_words = set(stopwords.words('english'))
punctuation = set(string.punctuation)

def preprocess_text(text):
    # Tokenize and remove stop words and punctuation
    doc = nlp(text.lower())
    tokens = [token.text for token in doc if token.text not in stop_words and token.text not in punctuation]
    return tokens

def extract_key_phrase(sentence):
    doc = nlp(sentence)
    key_phrase = []

    for token in doc:
        # Extract main noun, verb, and associated adjectives
        if token.pos_ in ("NOUN", "PROPN", "VERB", "ADJ"):
            key_phrase.append(token.lemma_)

    return " ".join(key_phrase)

def analyze_sentiment(text):
    # Calculate sentiment using TextBlob
    sentiment = TextBlob(text).sentiment.polarity
    sentiment_label = "positive" if sentiment > 0 else "negative"
    return sentiment_label

def extract_subthemes_and_sentiments(review):
    # Split the review into individual sentences
    doc = nlp(review)
    sentences = [sent.text for sent in doc.sents]

    # Extract key phrase and sentiment for each sentence
    subthemes = []
    sentiments = []
    for sentence in sentences:
        key_phrase = extract_key_phrase(sentence)
        sentiment = analyze_sentiment(sentence)
        subthemes.append(key_phrase)
        sentiments.append(sentiment)

    return subthemes, sentiments

def generate_output_dataframe(reviews):
    output_data = {"Review": reviews}

    # Extract subthemes and sentiments for each review
    subthemes_list = []
    sentiments_list = []
    for review in reviews:
        subthemes, sentiments = extract_subthemes_and_sentiments(review)
        subthemes_list.append(subthemes)
        sentiments_list.append(sentiments)

    # Create separate columns for each subtheme and sentiment
    for i in range(max(len(subthemes) for subthemes in subthemes_list)):
        output_data[f"Subtheme_{i+1}"] = [subthemes[i] if len(subthemes) > i else "" for subthemes in subthemes_list]
        output_data[f"Sentiment_{i+1}"] = [sentiments[i] if len(sentiments) > i else "" for sentiments in sentiments_list]

    output_df = pd.DataFrame(output_data)
    return output_df

def main():
    # Load dataset containing reviews from CSV
    csv_file = "/content/Evaluation-dataset.csv"
    df = pd.read_csv(csv_file)

    # Extract reviews from the first column
    reviews = df.iloc[:, 0].tolist()

    # Generate output dataframe
    output_df = generate_output_dataframe(reviews)

    # Display output dataframe
    print(output_df)
    output_df.to_csv('output.csv')
if __name__ == "__main__":
    main()


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


                                                  Review  \
0      Easy Tyre Selection Process, Competitive Prici...   
1             Very easy to use and good value for money.   
2                  Really easy and convenient to arrange   
3      It was so easy to select tyre sizes and arrang...   
4      service was excellent. Only slight downside wa...   
...                                                  ...   
10126  I ordered the wrong tyres, however [REDACTED] ...   
10127  Good experience, first time I have used [REDAC...   
10128  I ordered the tyre I needed on line, booked a ...   
10129  Excellent service from point of order to fitti...   
10130  Seamless, well managed at both ends. I would r...   

                                              Subtheme_1 Sentiment_1  \
0      Easy Tyre Selection Process Competitive Pricin...    positive   
1                              easy use good value money    positive   
2                                easy convenient arrange    pos