<a href="https://colab.research.google.com/github/amanullahshah32/Review-Scraping/blob/main/Attempt_2_using_google_play_scraper.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **Install Libraries**

In [6]:
!pip install --upgrade requests urllib3
import requests
import pandas as pd
import time

# Your app list
app_list = [
    {"name":"Facebook", "ios_id":284882215},
    {"name":"WhatsApp Messenger", "ios_id":310633997},
    {"name":"Instagram", "ios_id":389801252},
    {"name":"TikTok", "ios_id":835599320},
    {"name":"Snapchat", "ios_id":447188370},
    {"name":"Spotify", "ios_id":324684580},
    {"name":"Telegram Messenger", "ios_id":686449807},
    {"name":"Twitter", "ios_id":333903271},
    {"name":"YouTube", "ios_id":544007664},
    {"name":"Zoom", "ios_id":546505307},
    {"name":"bKash", "ios_id":568935136},
    {"name":"Pathao", "ios_id":954909346},
    {"name":"Nagad", "ios_id":1104364118}
]

def get_app_store_reviews(app_id, country="us"):
    """
    Fetches the most recent ~500 reviews from Apple's RSS feed.
    This requires NO installation and works in any Python environment.
    """
    url = f"https://itunes.apple.com/{country}/rss/customerreviews/id={app_id}/sortBy=mostRecent/json"

    try:
        resp = requests.get(url)
        data = resp.json()

        reviews = []
        entries = data.get("feed", {}).get("entry", [])

        # First entry is app metadata, skip it
        for entry in entries[1:]:
            reviews.append({
                "author": entry["author"]["name"]["label"],
                "rating": int(entry["im:rating"]["label"]),
                "title": entry["title"]["label"],
                "content": entry["content"]["label"],
                "updated": entry["updated"]["label"],
            })

        return reviews

    except Exception as e:
        print(f"Error fetching reviews for app_id={app_id}: {e}")
        return []


# Collect reviews for all apps
all_reviews = []

for app in app_list:
    print(f"Fetching reviews for {app['name']}...")
    reviews = get_app_store_reviews(app["ios_id"], country="us")

    for r in reviews:
        r["app_name"] = app["name"]
        r["app_id"] = app["ios_id"]

    all_reviews.extend(reviews)

    time.sleep(1)  # avoid hitting API rate limit

# Convert to DataFrame
df = pd.DataFrame(all_reviews)
df.head()

Collecting requests
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting urllib3
  Downloading urllib3-2.5.0-py3-none-any.whl.metadata (6.5 kB)
Downloading requests-2.32.5-py3-none-any.whl (64 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.7/64.7 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading urllib3-2.5.0-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.8/129.8 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: urllib3, requests
  Attempting uninstall: urllib3
    Found existing installation: urllib3 1.25.11
    Uninstalling urllib3-1.25.11:
      Successfully uninstalled urllib3-1.25.11
  Attempting uninstall: requests
    Found existing installation: requests 2.23.0
    Uninstalling requests-2.23.0:
      Successfully uninstalled requests-2.23.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are ins

Unnamed: 0,author,rating,title,content,updated,app_name,app_id
0,ايسءيلل,5,❤️,I like it,2025-11-17T00:10:21-07:00,Facebook,284882215
1,Bâïłęÿ,1,FB review,Horrible Quality I have gotten zero assistance...,2025-11-17T00:06:34-07:00,Facebook,284882215
2,ariii666,2,Facebook dating won’t load for me,"For some reason, my Facebook dating isn’t load...",2025-11-16T23:53:16-07:00,Facebook,284882215
3,Willenium08,2,"Privacy Concerns Galore, Social Experiments, A...",Facebook was fun 2006-2012. Since then it has ...,2025-11-16T23:05:44-07:00,Facebook,284882215
4,strew loc,5,Facebook is awesome,Love how every year it constantly keeping up w...,2025-11-16T22:52:52-07:00,Facebook,284882215


In [8]:
!pip install vaderSentiment
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl.metadata (572 bytes)
Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: vaderSentiment
Successfully installed vaderSentiment-3.3.2


In [9]:
analyzer = SentimentIntensityAnalyzer()

def classify_sentiment(text):
    score = analyzer.polarity_scores(text)["compound"]
    return "positive" if score > 0 else "negative"

df["sentiment"] = df["content"].apply(classify_sentiment)

# ----------------------------------------
# COUNT POSITIVE VS NEGATIVE
# ----------------------------------------
sentiment_counts = df["sentiment"].value_counts()
print("\nOverall Sentiment Counts:")
print(sentiment_counts)

# ----------------------------------------
# COUNT PER APP
# ----------------------------------------
sentiment_by_app = df.groupby("app_name")["sentiment"].value_counts().unstack().fillna(0)
print("\nSentiment per App:")
print(sentiment_by_app)

# Save to CSV
df.to_csv("ios_reviews_with_sentiment.csv", index=False)
print("\nSaved: ios_reviews_with_sentiment.csv")


Overall Sentiment Counts:
sentiment
negative    278
positive    212
Name: count, dtype: int64

Sentiment per App:
sentiment           negative  positive
app_name                              
Facebook                  26        23
Instagram                 33        16
Snapchat                  22        27
Spotify                   21        28
Telegram Messenger        40         9
TikTok                    25        24
Twitter                   30        19
WhatsApp Messenger        35        14
YouTube                   28        21
Zoom                      18        31

Saved: ios_reviews_with_sentiment.csv
