In [None]:
import pandas as pd
import json
import nltk
from textblob import TextBlob
import matplotlib.pyplot as plt
import seaborn as sns

# Required for nltk word tokenization
nltk.download('punkt')


In [None]:
business_path = r'C:\Users\hp\Downloads\dataset\business.json'
review_path = r'C:\Users\hp\Downloads\dataset\review.json'

In [None]:
def load_data(json_path):
    data = []
    with open(json_path, 'r', encoding='utf-8') as f:
        for line in f:
            data.append(json.loads(line))
    return pd.DataFrame(data)


In [None]:
business_df = load_data(business_path)
review_df = load_data(review_path)

In [None]:
print("Business Data Sample")
display(business_df.head())
print("\nReview Data Sample")
display(review_df.head())

In [None]:
# Aspect-based sentiment analysis using TextBlob
def extract_sentiment(text):
    blob = TextBlob(text)
    sentiment_polarity = blob.sentiment.polarity
    if sentiment_polarity > 0:
        return 'Positive'
    elif sentiment_polarity < 0:
        return 'Negative'
    else:
        return 'Neutral'

In [None]:
# Example aspects extraction function (keywords)
def extract_aspects(text):
    aspects = []
    if 'food' in text.lower():
        aspects.append('food')
    if 'service' in text.lower():
        aspects.append('service')
    if 'ambiance' in text.lower():
        aspects.append('ambiance')
    if 'price' in text.lower():
        aspects.append('price')
    if 'cleanliness' in text.lower():
        aspects.append('cleanliness')
    return aspects

In [None]:
# Apply aspect extraction and sentiment analysis on the review text
review_df['aspects'] = review_df['text'].apply(extract_aspects)
review_df['sentiment'] = review_df['text'].apply(extract_sentiment)

# Expand aspects to rows
aspect_df = review_df.explode('aspects').dropna(subset=['aspects'])

# Save aspect-sentiment results to CSV
output_path_aspect_sentiments = r'C:\Users\hp\Downloads\aspect_sentiment_analysis.csv'
aspect_df[['business_id', 'aspects', 'sentiment']].to_csv(output_path_aspect_sentiments, index=False)

print(f"\nAspect-based sentiment analysis saved to {output_path_aspect_sentiments}")


In [None]:
# Generate insights for businesses
def generate_insights(reviews_df):
    insights = {
        'Total Reviews': len(reviews_df),
        'Positive Reviews': len(reviews_df[reviews_df['sentiment'] == 'Positive']),
        'Negative Reviews': len(reviews_df[reviews_df['sentiment'] == 'Negative']),
        'Neutral Reviews': len(reviews_df[reviews_df['sentiment'] == 'Neutral']),
    }
    return insights

# Generate insights based on the sentiment analysis
insights = generate_insights(aspect_df)

In [None]:
# Display insights
print("\nInsights Report:")
for key, value in insights.items():
    print(f"{key}: {value}")

# Plotting Insights using a bar chart
plt.figure(figsize=(10,6))
sns.barplot(x=list(insights.keys()), y=list(insights.values()), palette='viridis')
plt.title("Sentiment Insights for Business Reviews")
plt.xticks(rotation=45)
plt.ylabel("Count")
plt.show()

# Generate a structured report of insights as a CSV
output_path_insights = r'C:\Users\hp\Downloads\insights_report.csv'
insights_df = pd.DataFrame(list(insights.items()), columns=['Metric', 'Value'])
insights_df.to_csv(output_path_insights, index=False)

print(f"\nInsights report saved to {output_path_insights}")