# Use case one when there is limited feature 


In [12]:
import os
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification,pipeline
import torch
import requests
from bs4 import BeautifulSoup
import spacy
from collections import Counter
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
nlp = spacy.load("en_core_web_md")


class TextPreprocessor:
    def __init__(self):
        # Convert stopwords to lowercase to ensure case-insensitive comparison
        self.stopwords = set(word.lower() for word in stopwords.words('english')).union({
            'i', 'the', 'these', 'there', 'are', 'this', 'that', 'we', 'you', 'it', 'they', 'he', 'she', 'them', 'is', 'am', 'was', 'were', 'been', 'being'
        })
        self.lemmatizer = WordNetLemmatizer()
    
    def remove_html_tags(self, text):
        return re.sub(r'<.*?>', '', text)
    
    def remove_accented_chars(self, text):
        return text.encode('ascii', 'ignore').decode('utf-8')
    
    def replace_diacritics(self, text):
        return re.sub(r'[\u0300-\u036F]', '', text)
    
    def expand_contractions(self, text):
        contractions = {"can't": "cannot", "won't": "will not", "it's": "it is"}  # Expand this list as needed
        for contraction, expansion in contractions.items():
            text = text.replace(contraction, expansion)
        return text
    
    def remove_urls(self, text):
        return re.sub(r'http\S+', '', text)
    
    def remove_emojis(self, text):
        return re.sub(r'[:;=X][oO\-]?[D\)\(P]', '', text)
    
    def remove_special_characters(self, text):
        return re.sub(r'[^a-zA-Z0-9\s]', '', text)
    
    def remove_numbers(self, text):
        return re.sub(r'\d+', '', text)
    
    def remove_extra_whitespace(self, text):
        return re.sub(r'\s+', ' ', text).strip()
    
    def clean_text(self, text):
        text = self.remove_html_tags(text)
        text = self.remove_accented_chars(text)
        text = self.replace_diacritics(text)
        text = self.expand_contractions(text)
        text = self.remove_urls(text)
        text = self.remove_emojis(text)
        text = self.remove_special_characters(text)
        text = self.remove_numbers(text)
        
        # Remove stopwords (case insensitive)
        text = " ".join([word.lower() for word in text.split() if word.lower() not in self.stopwords])
        
        # Lemmatize the remaining words
        text = " ".join([self.lemmatizer.lemmatize(word) for word in text.split()])
        
        text = self.remove_extra_whitespace(text)
        return text


def analyze_sentiment(text, tokenizer, model):
    """Encodes text and predicts sentiment score."""
    tokens = tokenizer.encode(text, return_tensors='pt', truncation=True, max_length=512)
    result = model(tokens)
    sentiment_score = int(torch.argmax(result.logits)) + 1  # Convert to 1-5 scale
    return sentiment_score

def extract_entities(text):
    """Extracts key subjects and objects from text using BERT-based NER and dependency parsing."""
    doc = nlp(text)
    
    subjects, objects = [], []
    
    # Dependency parsing: Find subjects & objects
    for token in doc:
        if "subj" in token.dep_:
            subjects.append(token.text)
        if "obj" in token.dep_ or token.pos_ == "NOUN":
            objects.append(token.text)

    # Named Entity Recognition (NER) as a backup
    entities = ner_pipeline(text)
    for entity in entities:
        label = entity["entity_group"]
        word = entity["word"]
        
        if label in ["PER", "ORG","LOC","PROD"]:  # Treat as subjects
            subjects.append(word)
        elif label in ["LOC", "MISC"]:  # Treat as objects
            objects.append(word)

    # Fallback: Use first found nouns if subjects/objects are empty
    if not subjects:
        subjects = [token.text for token in doc if token.pos_ == "NOUN"][:1]  
    if not objects:
        objects = [token.text for token in doc if token.pos_ == "NOUN"][:1]  

    return {
        "subject": " ".join(subjects) if subjects else "Unknown",
        "object": " ".join(objects) if objects else "Unknown"
    }


def collect_feedback(dataframe):
    """Placeholder function to simulate collecting user feedback."""
    dataframe['corrected_sentiment'] = dataframe['predicted_sentiment']  # Simulated user corrections
    return dataframe

def generate_report(dataframe):
    """Creates a summary report of sentiment distribution and accuracy."""
    report = dataframe.groupby('predicted_sentiment').size().reset_index(name='count')
    print("Sentiment Distribution:")
    print(report)
    return report

# Load Model and Tokenizer
tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
# Load NER model for entity extraction
ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple")

# Fetch and Process Data
data = {
    "text": [
        "This super app is truly a lifesaver, offering an all-in-one solution that brings together everything I need in one convenient place, making it incredibly easy to manage various tasks without switching between different apps.",
        "The app crashes every time I try to order food. So frustrating!",
        "Love how I can pay bills, order food, and book rides all in one app!",
        "The interface is so cluttered and hard to navigate.",
        "Best app ever! Saves me so much time and effort.",
        "The payment feature is so convenient and secure.",
        "I tried booking a ride, but the app kept freezing. Disappointed.",
        "The food delivery service is fast and reliable!",
        "The app logs me out randomly. Needs a fix ASAP.",
        "I love the rewards program! Great way to save money.",
        "The customer support is terrible. No one responds to queries.",
        "This app has made my life so much easier. Highly recommend!",
        "The ride-hailing feature is overpriced compared to other apps.",
        "The grocery delivery is always on time and fresh!",
        "The app is slow and takes forever to load.",
        "I love the variety of services offered. Truly a super app!",
        "The app keeps asking for unnecessary permissions. Not cool.",
        "The food delivery options are amazing! So many restaurants to choose from.",
        "The app is buggy and needs a major update.",
        "I use this app every day for everything. It’s fantastic!",
        "The payment feature failed during checkout. Very unreliable.",
        "The app’s design is sleek and user-friendly.",
        "The ride-hailing feature is so convenient and affordable.",
        "The app drains my phone battery way too fast.",
        "I love the discounts and coupons available on this app!",
        "The app doesn’t work offline. Needs improvement.",
        "The food delivery is always late. Not worth it.",
        "This app is my go-to for all my daily needs!",
        "The app’s notifications are too frequent and annoying.",
        "The grocery delivery service is a game-changer!",
        "The app freezes every time I try to make a payment.",
        "I love how I can track my orders in real-time!",
        "The app’s customer service is unhelpful and slow.",
        "The ride-hailing feature is so reliable and safe.",
        "The app’s interface is confusing and not intuitive.",
        "I love the seamless integration of all services in one app!",
        "The app takes up too much storage on my phone.",
        "The food delivery is always hot and fresh. Great service!",
        "The app’s login process is too complicated.",
        "I love the cashback offers on payments. Saves me money!",
        "The app crashes every time I try to book a ride.",
        "The grocery delivery is always accurate and on time.",
        "The app’s design is outdated and needs a refresh.",
        "I love how I can pay for everything with just one app!",
        "The app’s search feature is terrible. Hard to find anything.",
        "The food delivery options are limited in my area.",
        "This app has everything I need. It’s amazing!",
        "The app’s notifications are helpful and timely.",
        "The ride-hailing feature is too expensive during peak hours.",
        "I love the convenience of booking rides and ordering food in one app!",
        "The app’s updates always break something. Needs better testing.",
        "The grocery delivery is always fresh and well-packaged.",
        "The app’s payment feature is so fast and secure.",
        "The app’s customer support is responsive and helpful.",
        "I love the variety of cuisines available for delivery!",
        "The app’s interface is too cluttered and overwhelming.",
        "The ride-hailing feature is so convenient for daily commutes.",
        "The app’s rewards program is a great way to save money.",
        "The food delivery is always late and cold. Not worth it.",
        "I love how I can track my ride in real-time!",
        "The app’s login process is seamless and quick.",
        "The grocery delivery is always reliable and fresh.",
        "The app’s design is modern and easy to use.",
        "I love the discounts available on ride-hailing!",
        "The app’s notifications are too intrusive and annoying.",
        "The food delivery is always fast and delicious.",
        "The app’s payment feature failed during an important transaction.",
        "I love how I can do everything in one app. So convenient!",
        "The app’s search feature is slow and unresponsive.",
        "The ride-hailing feature is so affordable and reliable.",
        "The app’s updates have made it much faster and smoother.",
        "I love the variety of services available in one app!",
        "The app’s customer support is unhelpful and rude.",
        "The grocery delivery is always on time and well-packaged.",
        "The app’s interface is intuitive and easy to navigate.",
        "I love the cashback offers on food delivery!",
        "The app’s notifications are helpful and not too frequent.",
        "The ride-hailing feature is so convenient for late-night trips.",
        "The app’s design is sleek and modern.",
        "I love how I can pay bills and order food in one app!",
        "The app’s search feature is accurate and fast.",
        "The food delivery is always hot and fresh. Great service!",
        "The app’s payment feature is secure and reliable.",
        "I love the rewards program. It’s a great way to save money!",
        "The app’s customer support is quick and helpful.",
        "The grocery delivery is always fresh and on time.",
        "The app’s interface is clean and easy to use.",
        "I love the convenience of booking rides and ordering food in one app!",
        "The app’s notifications are timely and useful.",
        "The ride-hailing feature is so reliable and safe.",
        "The app’s design is user-friendly and modern.",
        "I love the variety of services offered. Truly a super app!",
        "The app’s updates have improved its performance significantly.",
        "The food delivery is always fast and delicious.",
        "The app’s payment feature is so convenient and secure.",
        "I love the discounts available on grocery delivery!",
        "The app’s customer support is responsive and helpful.",
        "The ride-hailing feature is so affordable and convenient.",
        "The app’s interface is intuitive and easy to navigate.",
        "I love how I can do everything in one app. It’s amazing!",
        "The app’s notifications are helpful and not too frequent.",
        "The grocery delivery is always fresh and well-packaged.",
        "The app’s design is sleek and modern.",
        "I love the convenience of booking rides and ordering food in one app!",
        "The app’s updates have made it much faster and smoother.",
        "The food delivery is always hot and fresh. Great service!",
        "The app’s payment feature is secure and reliable.",
        "I love the rewards program. It’s a great way to save money!",
        "The app’s customer support is quick and helpful.",
        "The grocery delivery is always fresh and on time.",
        "The app’s interface is clean and easy to use.",
        "I love the convenience of booking rides and ordering food in one app!",
        "The app’s notifications are timely and useful.",
        "The ride-hailing feature is so reliable and safe.",
        "The app’s design is user-friendly and modern.",
        "I love the variety of services offered. Truly a super app!"
    ]
}

# Initialize Preprocessor
preprocessor = TextPreprocessor()

# Fetch and Process Data
#sentiment_data = fetch_bigquery_data()
sentiment_data = pd.DataFrame(data)
sentiment_data['cleaned_text'] = sentiment_data['text'].apply(preprocessor.clean_text)

# Predict Sentiment
sentiment_data['predicted_sentiment'] = sentiment_data['cleaned_text'].apply(lambda x: analyze_sentiment(x, tokenizer, model))
sentiment_data['keywords'] = sentiment_data['cleaned_text'].apply(extract_entities)

#subject (who or what the sentiment is about) and the object (who or what the sentiment is directed to)
sentiment_data["subject"] = sentiment_data["keywords"].apply(lambda x: x["subject"])
sentiment_data["object"] = sentiment_data["keywords"].apply(lambda x: x["object"])
sentiment_data


[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/tsheponqapela/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/tsheponqapela/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/tsheponqapela/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you

Unnamed: 0,text,cleaned_text,predicted_sentiment,keywords,subject,object
0,"This super app is truly a lifesaver, offering ...",super app truly lifesaver offering allinone so...,5,"{'subject': 'app offering everything', 'object...",app offering everything,lifesaver solution place task apps
1,The app crashes every time I try to order food...,app crash every time try order food frustrating,1,"{'subject': 'crash', 'object': 'app crash time...",crash,app crash time order food
2,"Love how I can pay bills, order food, and book...",love pay bill order food book ride one app,5,"{'subject': 'book', 'object': 'love pay bill o...",book,love pay bill order food book app
3,The interface is so cluttered and hard to navi...,interface cluttered hard navigate,1,"{'subject': 'interface', 'object': 'interface'}",interface,interface
4,Best app ever! Saves me so much time and effort.,best app ever save much time effort,5,"{'subject': 'app', 'object': 'app time effort'}",app,app time effort
...,...,...,...,...,...,...
111,I love the convenience of booking rides and or...,love convenience booking ride ordering food on...,5,"{'subject': 'love', 'object': 'love convenienc...",love,love convenience booking ride food app
112,The app’s notifications are timely and useful.,apps notification timely useful,4,"{'subject': 'apps', 'object': 'apps notificati...",apps,apps notification
113,The ride-hailing feature is so reliable and safe.,ridehailing feature reliable safe,5,"{'subject': 'feature', 'object': 'feature'}",feature,feature
114,The app’s design is user-friendly and modern.,apps design userfriendly modern,5,"{'subject': 'apps', 'object': 'apps'}",apps,apps


In [9]:
import re
import logging
import torch
import pandas as pd
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import spacy

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class TextPreprocessor:
    def __init__(self):
        """Initializes text preprocessing components."""
        self.stopwords = set(word.lower() for word in stopwords.words('english')).union({
            'i', 'the', 'these', 'there', 'are', 'this', 'that', 'we', 'you', 'it', 'they', 'he', 'she', 'them', 'is', 'am', 'was', 'were', 'been', 'being'
        })
        self.lemmatizer = WordNetLemmatizer()
    
    def clean_text(self, text):
        """Applies various text preprocessing steps."""
        text = re.sub(r'<.*?>', '', text)  # Remove HTML tags
        text = text.encode('ascii', 'ignore').decode('utf-8')  # Remove accented chars
        text = re.sub(r'[̀-ͯ]', '', text)  # Replace diacritics
        contractions = {"can't": "cannot", "won't": "will not", "it's": "it is"}  # Expand contractions
        for contraction, expansion in contractions.items():
            text = text.replace(contraction, expansion)
        text = re.sub(r'http\S+', '', text)  # Remove URLs
        text = re.sub(r'[:;=X][oO\-]?[D\)\(P]', '', text)  # Remove emojis
        text = re.sub(r'[^a-zA-Z0-9\s]', '', text)  # Remove special characters
        text = re.sub(r'\d+', '', text)  # Remove numbers
        text = " ".join([word.lower() for word in text.split() if word.lower() not in self.stopwords])
        text = " ".join([self.lemmatizer.lemmatize(word) for word in text.split()])
        return re.sub(r'\s+', ' ', text).strip()  # Remove extra whitespace

class SentimentAnalyzer:
    def __init__(self, model_name='nlptown/bert-base-multilingual-uncased-sentiment'):
        """Loads the sentiment analysis model and tokenizer."""
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
    
    def analyze_sentiment(self, text):
        """Encodes text and predicts sentiment score."""
        tokens = self.tokenizer.encode(text, return_tensors='pt', truncation=True, max_length=512)
        result = self.model(tokens)
        return int(torch.argmax(result.logits)) + 1  # Convert to 1-5 scale

class EntityExtractor:
    def __init__(self):
        """Loads NLP models for entity recognition and dependency parsing."""
        self.ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple")
        self.nlp = spacy.load("en_core_web_md")
    
    def extract_entities(self, text):
        """Extracts key subjects and objects using dependency parsing and NER."""
        doc = self.nlp(text)
        subjects, objects = [], []
        
        for token in doc:
            if "subj" in token.dep_:
                subjects.append(token.text)
            if "obj" in token.dep_ or token.pos_ == "NOUN":
                objects.append(token.text)
        
        entities = self.ner_pipeline(text)
        for entity in entities:
            if entity["entity_group"] in ["PER", "ORG","PROD","ORG"]:
                subjects.append(entity["word"])
            elif entity["entity_group"] in ["LOC", "MISC"]:
                objects.append(entity["word"])
        
        if not subjects:
            subjects = [token.text for token in doc if token.pos_ == "NOUN"][:1]  
        if not objects:
            objects = [token.text for token in doc if token.pos_ == "NOUN"][:1]  

        return {"subject": " ".join(subjects) if subjects else "Unknown", "object": " ".join(objects) if objects else "Unknown"}

# Initialize components
logger.info("Initializing components...")
preprocessor = TextPreprocessor()
sentiment_analyzer = SentimentAnalyzer()
entity_extractor = EntityExtractor()

# Sample data
data = {"text": [
    "This super app is a lifesaver! Everything I need in one place!",
    "The app crashes every time I try to order food. So frustrating!",
    "Love how I can pay bills, order food, and book rides all in one app!",
    "The interface is so cluttered and hard to navigate.",
    "Best app ever! Saves me so much time and effort.",
    "The payment feature is so convenient and secure.",
    "I tried booking a ride, but the app kept freezing. Disappointed.",
    "The food delivery service is fast and reliable!"
]}

logger.info("Processing data...")
sentiment_data = pd.DataFrame(data)
sentiment_data['cleaned_text'] = sentiment_data['text'].apply(preprocessor.clean_text)
sentiment_data['predicted_sentiment'] = sentiment_data['cleaned_text'].apply(sentiment_analyzer.analyze_sentiment)
sentiment_data['keywords'] = sentiment_data['cleaned_text'].apply(entity_extractor.extract_entities)
sentiment_data['subject'] = sentiment_data['keywords'].apply(lambda x: x['subject'])
sentiment_data['object'] = sentiment_data['keywords'].apply(lambda x: x['object'])

logger.info("Processing complete. Displaying results:")
sentiment_data


2025-02-19 15:59:31,798 - INFO - Initializing components...
Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
2025-02-19 15:59:34,098 - INFO - Processing data...
2025-02-19 15:59:35,107 - INFO - Processing complet

Unnamed: 0,text,cleaned_text,predicted_sentiment,keywords,subject,object
0,This super app is a lifesaver! Everything I ne...,super app lifesaver everything need one place,5,"{'subject': 'everything', 'object': 'place'}",everything,place
1,The app crashes every time I try to order food...,app crash every time try order food frustrating,1,"{'subject': 'crash', 'object': 'app crash time...",crash,app crash time order food
2,"Love how I can pay bills, order food, and book...",love pay bill order food book ride one app,5,"{'subject': 'book', 'object': 'love pay bill o...",book,love pay bill order food book app
3,The interface is so cluttered and hard to navi...,interface cluttered hard navigate,1,"{'subject': 'interface', 'object': 'interface'}",interface,interface
4,Best app ever! Saves me so much time and effort.,best app ever save much time effort,5,"{'subject': 'app', 'object': 'app time effort'}",app,app time effort
5,The payment feature is so convenient and secure.,payment feature convenient secure,4,"{'subject': 'payment', 'object': 'payment feat...",payment,payment feature secure
6,"I tried booking a ride, but the app kept freez...",tried booking ride app kept freezing disappointed,1,"{'subject': 'app', 'object': 'booking ride'}",app,booking ride
7,The food delivery service is fast and reliable!,food delivery service fast reliable,5,"{'subject': 'food', 'object': 'food delivery s...",food,food delivery service


In [3]:
import pandas as pd

# Assuming the dataframe is loaded into `df`
sentiment_counts = sentiment_data['predicted_sentiment'].value_counts()
sentiment_percentages = sentiment_counts / len(sentiment_data) * 100

print(sentiment_counts)
print(sentiment_percentages)


predicted_sentiment
5    72
1    16
4    14
2     8
3     6
Name: count, dtype: int64
predicted_sentiment
5    62.068966
1    13.793103
4    12.068966
2     6.896552
3     5.172414
Name: count, dtype: float64


In [4]:
import pandas as pd

# Assuming your DataFrame is named sentiment_data
# Filter positive and negative sentiment rows
positive_feedback = sentiment_data[sentiment_data['predicted_sentiment'] >= 4]
negative_feedback = sentiment_data[sentiment_data['predicted_sentiment'] <= 2]

# Extract keywords (e.g., the 'subject' and 'object' combined from the 'keywords' column)
positive_keywords = ' '.join([f"{entry['subject']} {entry['object']}" for entry in positive_feedback['keywords']]).split()
positive_keyword_counts = pd.Series(positive_keywords).value_counts()

negative_keywords = ' '.join([f"{entry['subject']} {entry['object']}" for entry in negative_feedback['keywords']]).split()
negative_keyword_counts = pd.Series(negative_keywords).value_counts()

print("Top Positive Keywords:")
print(positive_keyword_counts.head(10))

print("Top Negative Keywords:")
print(negative_keyword_counts.head(10))


Top Positive Keywords:
apps        41
delivery    32
love        28
app         25
feature     20
food        17
service     15
grocery     14
customer     8
program      7
Name: count, dtype: int64
Top Negative Keywords:
apps        16
feature     10
app          6
customer     5
crash        4
support      3
time         3
payment      3
ride         3
freeze       2
Name: count, dtype: int64


In [5]:
ner_label = nlp.get_pipe('ner').labels
print(ner_label)

('CARDINAL', 'DATE', 'EVENT', 'FAC', 'GPE', 'LANGUAGE', 'LAW', 'LOC', 'MONEY', 'NORP', 'ORDINAL', 'ORG', 'PERCENT', 'PERSON', 'PRODUCT', 'QUANTITY', 'TIME', 'WORK_OF_ART')


# Use case two when there are more features 

In [9]:
data = {
    "text": [
        {
            "review": "This super app is a lifesaver! Everything I need in one place.",
            "sentiment": "positive",
            "rating": 5,
            "user_id": "user_12345",
            "timestamp": "2023-10-15T14:30:00Z",
            "service_category": "general",
            "location": "New York, USA",
            "language": "English",
            "review_length": 15,
            "contains_emojis": False,
            "keywords": ["lifesaver", "everything in one place"],
            "user_type": "loyal_user",
            "company_response": "",
            "device_type": "mobile",
            "helpfulness_score": 20,
            "topic": "convenience"
        },
        {
            "review": "The app crashes every time I try to order food. So frustrating!",
            "sentiment": "negative",
            "rating": 2,
            "user_id": "user_67890",
            "timestamp": "2023-10-14T09:15:00Z",
            "service_category": "food_delivery",
            "location": "Los Angeles, USA",
            "language": "English",
            "review_length": 20,
            "contains_emojis": False,
            "keywords": ["crashes", "frustrating"],
            "user_type": "new_user",
            "company_response": "We apologize for the inconvenience. Our team is working on a fix.",
            "device_type": "desktop",
            "helpfulness_score": 5,
            "topic": "app_performance"
        },
        {
            "review": "Love how I can pay bills, order food, and book rides all in one app!",
            "sentiment": "positive",
            "rating": 5,
            "user_id": "user_54321",
            "timestamp": "2023-10-13T18:45:00Z",
            "service_category": "general",
            "location": "Chicago, USA",
            "language": "English",
            "review_length": 25,
            "contains_emojis": False,
            "keywords": ["pay bills", "order food", "book rides"],
            "user_type": "loyal_user",
            "company_response": "",
            "device_type": "mobile",
            "helpfulness_score": 30,
            "topic": "convenience"
        },
        {
            "review": "The food delivery is always late and cold. Not worth it.",
            "sentiment": "negative",
            "rating": 1,
            "user_id": "user_98765",
            "timestamp": "2023-10-12T12:00:00Z",
            "service_category": "food_delivery",
            "location": "Houston, USA",
            "language": "English",
            "review_length": 18,
            "contains_emojis": False,
            "keywords": ["late", "cold", "not worth it"],
            "user_type": "new_user",
            "company_response": "We’re sorry to hear about your experience. Please contact support for a refund.",
            "device_type": "mobile",
            "helpfulness_score": 10,
            "topic": "delivery_speed"
        },
        {
            "review": "The ride-hailing feature is so convenient and affordable!",
            "sentiment": "positive",
            "rating": 5,
            "user_id": "user_11223",
            "timestamp": "2023-10-11T08:30:00Z",
            "service_category": "ride_hailing",
            "location": "Miami, USA",
            "language": "English",
            "review_length": 16,
            "contains_emojis": False,
            "keywords": ["convenient", "affordable"],
            "user_type": "loyal_user",
            "company_response": "",
            "device_type": "mobile",
            "helpfulness_score": 25,
            "topic": "ride_hailing"
        },
        {
            "review": "The app’s interface is so cluttered and hard to navigate.",
            "sentiment": "negative",
            "rating": 2,
            "user_id": "user_33445",
            "timestamp": "2023-10-10T17:20:00Z",
            "service_category": "general",
            "location": "San Francisco, USA",
            "language": "English",
            "review_length": 20,
            "contains_emojis": False,
            "keywords": ["cluttered", "hard to navigate"],
            "user_type": "new_user",
            "company_response": "Thank you for your feedback. We’re working on improving the interface.",
            "device_type": "desktop",
            "helpfulness_score": 15,
            "topic": "user_interface"
        },
        {
            "review": "The grocery delivery is always fresh and on time!",
            "sentiment": "positive",
            "rating": 5,
            "user_id": "user_55667",
            "timestamp": "2023-10-09T10:45:00Z",
            "service_category": "grocery_delivery",
            "location": "Seattle, USA",
            "language": "English",
            "review_length": 14,
            "contains_emojis": False,
            "keywords": ["fresh", "on time"],
            "user_type": "loyal_user",
            "company_response": "",
            "device_type": "mobile",
            "helpfulness_score": 35,
            "topic": "grocery_delivery"
        },
        {
            "review": "The app keeps logging me out randomly. Needs a fix ASAP.",
            "sentiment": "negative",
            "rating": 2,
            "user_id": "user_77889",
            "timestamp": "2023-10-08T19:00:00Z",
            "service_category": "general",
            "location": "Boston, USA",
            "language": "English",
            "review_length": 18,
            "contains_emojis": False,
            "keywords": ["logging out", "needs fix"],
            "user_type": "new_user",
            "company_response": "We apologize for the inconvenience. Our team is investigating the issue.",
            "device_type": "mobile",
            "helpfulness_score": 12,
            "topic": "app_performance"
        },
        {
            "review": "I love the rewards program! Great way to save money.",
            "sentiment": "positive",
            "rating": 5,
            "user_id": "user_99001",
            "timestamp": "2023-10-07T13:10:00Z",
            "service_category": "general",
            "location": "Austin, USA",
            "language": "English",
            "review_length": 16,
            "contains_emojis": False,
            "keywords": ["rewards program", "save money"],
            "user_type": "loyal_user",
            "company_response": "",
            "device_type": "mobile",
            "helpfulness_score": 40,
            "topic": "rewards"
        },
        {
            "review": "The payment feature failed during checkout. Very unreliable.",
            "sentiment": "negative",
            "rating": 1,
            "user_id": "user_22334",
            "timestamp": "2023-10-06T11:25:00Z",
            "service_category": "payments",
            "location": "Denver, USA",
            "language": "English",
            "review_length": 18,
            "contains_emojis": False,
            "keywords": ["payment failed", "unreliable"],
            "user_type": "new_user",
            "company_response": "We’re sorry for the inconvenience. Please try again or contact support.",
            "device_type": "desktop",
            "helpfulness_score": 8,
            "topic": "payments"
        },
        {
            "review": "The app’s design is sleek and user-friendly.",
            "sentiment": "positive",
            "rating": 5,
            "user_id": "user_44556",
            "timestamp": "2023-10-05T16:50:00Z",
            "service_category": "general",
            "location": "Atlanta, USA",
            "language": "English",
            "review_length": 12,
            "contains_emojis": False,
            "keywords": ["sleek", "user-friendly"],
            "user_type": "loyal_user",
            "company_response": "",
            "device_type": "mobile",
            "helpfulness_score": 28,
            "topic": "user_interface"
        },
        {
            "review": "The app drains my phone battery way too fast.",
            "sentiment": "negative",
            "rating": 2,
            "user_id": "user_66778",
            "timestamp": "2023-10-04T09:05:00Z",
            "service_category": "general",
            "location": "Phoenix, USA",
            "language": "English",
            "review_length": 14,
            "contains_emojis": False,
            "keywords": ["drains battery", "too fast"],
            "user_type": "new_user",
            "company_response": "We’re working on optimizing battery usage. Thank you for your feedback.",
            "device_type": "mobile",
            "helpfulness_score": 10,
            "topic": "app_performance"
        },
        {
            "review": "The food delivery options are amazing! So many restaurants to choose from.",
            "sentiment": "positive",
            "rating": 5,
            "user_id": "user_88990",
            "timestamp": "2023-10-03T14:15:00Z",
            "service_category": "food_delivery",
            "location": "Dallas, USA",
            "language": "English",
            "review_length": 22,
            "contains_emojis": False,
            "keywords": ["amazing", "many restaurants"],
            "user_type": "loyal_user",
            "company_response": "",
            "device_type": "mobile",
            "helpfulness_score": 45,
            "topic": "food_delivery"
        },
        {
            "review": "The app is buggy and needs a major update.",
            "sentiment": "negative",
            "rating": 2,
            "user_id": "user_11223",
            "timestamp": "2023-10-02T18:30:00Z",
            "service_category": "general",
            "location": "Philadelphia, USA",
            "language": "English",
            "review_length": 12,
            "contains_emojis": False,
            "keywords": ["buggy", "needs update"],
            "user_type": "new_user",
            "company_response": "We’re working on a major update to address these issues. Thank you for your patience.",
            "device_type": "desktop",
            "helpfulness_score": 18,
            "topic": "app_performance"
        },
        {
            "review": "I use this app every day for everything. It’s fantastic!",
            "sentiment": "positive",
            "rating": 5,
            "user_id": "user_33445",
            "timestamp": "2023-10-01T12:00:00Z",
            "service_category": "general",
            "location": "San Diego, USA",
            "language": "English",
            "review_length": 16,
            "contains_emojis": False,
            "keywords": ["every day", "fantastic"],
            "user_type": "loyal_user",
            "company_response": "",
            "device_type": "mobile",
            "helpfulness_score": 50,
            "topic": "convenience"
        }
    ]
}

In [11]:
import pandas as pd

# Flatten the nested data and create the DataFrame
df = pd.json_normalize(data['text'])

# Display the DataFrame
df


Unnamed: 0,review,sentiment,rating,user_id,timestamp,service_category,location,language,review_length,contains_emojis,keywords,user_type,company_response,device_type,helpfulness_score,topic
0,This super app is a lifesaver! Everything I ne...,positive,5,user_12345,2023-10-15T14:30:00Z,general,"New York, USA",English,15,False,"[lifesaver, everything in one place]",loyal_user,,mobile,20,convenience
1,The app crashes every time I try to order food...,negative,2,user_67890,2023-10-14T09:15:00Z,food_delivery,"Los Angeles, USA",English,20,False,"[crashes, frustrating]",new_user,We apologize for the inconvenience. Our team i...,desktop,5,app_performance
2,"Love how I can pay bills, order food, and book...",positive,5,user_54321,2023-10-13T18:45:00Z,general,"Chicago, USA",English,25,False,"[pay bills, order food, book rides]",loyal_user,,mobile,30,convenience
3,The food delivery is always late and cold. Not...,negative,1,user_98765,2023-10-12T12:00:00Z,food_delivery,"Houston, USA",English,18,False,"[late, cold, not worth it]",new_user,We’re sorry to hear about your experience. Ple...,mobile,10,delivery_speed
4,The ride-hailing feature is so convenient and ...,positive,5,user_11223,2023-10-11T08:30:00Z,ride_hailing,"Miami, USA",English,16,False,"[convenient, affordable]",loyal_user,,mobile,25,ride_hailing
5,The app’s interface is so cluttered and hard t...,negative,2,user_33445,2023-10-10T17:20:00Z,general,"San Francisco, USA",English,20,False,"[cluttered, hard to navigate]",new_user,Thank you for your feedback. We’re working on ...,desktop,15,user_interface
6,The grocery delivery is always fresh and on time!,positive,5,user_55667,2023-10-09T10:45:00Z,grocery_delivery,"Seattle, USA",English,14,False,"[fresh, on time]",loyal_user,,mobile,35,grocery_delivery
7,The app keeps logging me out randomly. Needs a...,negative,2,user_77889,2023-10-08T19:00:00Z,general,"Boston, USA",English,18,False,"[logging out, needs fix]",new_user,We apologize for the inconvenience. Our team i...,mobile,12,app_performance
8,I love the rewards program! Great way to save ...,positive,5,user_99001,2023-10-07T13:10:00Z,general,"Austin, USA",English,16,False,"[rewards program, save money]",loyal_user,,mobile,40,rewards
9,The payment feature failed during checkout. Ve...,negative,1,user_22334,2023-10-06T11:25:00Z,payments,"Denver, USA",English,18,False,"[payment failed, unreliable]",new_user,We’re sorry for the inconvenience. Please try ...,desktop,8,payments


# Scial media comments retrieval test

In [None]:
import requests
import pandas as pd
import json

page_id = # your page id, ex: '123456789'
post_id = # your post id, ex: '123456789'
access_token = # your access token, from https://developers.facebook.com/tools/explorer/

url = f'https://graph.facebook.com/v16.0/{page_id}_{post_id}/comments?access_token={access_token}'

response = requests.request("GET", url)

# save name, time, message in excel file
data = json.loads(response.text)
print(data)
# create object with only name, time, message
def get_comment(comment):
    return {
        'name': comment['from']['name'],
        'time': comment['created_time'],
        'message': comment['message']
    }

excel_data = list(map(get_comment, data['data']))
df = pd.DataFrame(excel_data)
df.to_excel('comments.xlsx', index=False)