# Yelp Dataset EDA – ACARS Project

Explore user, business, and review data for understanding trends and preprocessing.

In [None]:
import pandas as pd
import json
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")

In [None]:
# Load Yelp Reviews JSON
def load_yelp_json(path, limit=50000):
    with open(path, 'r') as f:
        data = [json.loads(line) for _, line in zip(range(limit), f)]
    return pd.DataFrame(data)

reviews = load_yelp_json('../datasets/yelp_academic_dataset_review.json')
users = load_yelp_json('../datasets/yelp_academic_dataset_user.json')
businesses = load_yelp_json('../datasets/yelp_academic_dataset_business.json')

In [None]:
# Preview reviews
reviews[['user_id', 'business_id', 'stars', 'date']].head()

In [None]:
# Rating distribution
sns.countplot(x='stars', data=reviews)
plt.title('Distribution of Ratings')
plt.show()

In [None]:
# Reviews over time
reviews['date'] = pd.to_datetime(reviews['date'])
reviews['year'] = reviews['date'].dt.year
sns.countplot(x='year', data=reviews)
plt.title('Review Volume by Year')
plt.xticks(rotation=45)
plt.show()