In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

pd.set_option("display.max_colwidth", None)

In [3]:
google_play_path = "google_play_music_reviews.csv"
app_store_path = "app_store_music_reviews.csv"

df_gp = pd.read_csv(google_play_path)
df_as = pd.read_csv(app_store_path)

print("Google Play:", df_gp.shape)
print("App Store:", df_as.shape)

Google Play: (76535, 7)
App Store: (5640, 7)


In [4]:
df_gp.head()

Unnamed: 0,app,review,rating,thumbs_up,version,date,device
0,com.spotify.music,great,5,0,9.0.42.529,2025-05-22 11:58:57,
1,com.spotify.music,5 star if u have premium,5,0,8.6.82.1113,2025-05-22 11:58:46,
2,com.spotify.music,too many ads,1,0,8.9.96.476,2025-05-22 11:57:00,
3,com.spotify.music,adds,1,0,,2025-05-22 11:52:40,
4,com.spotify.music,awesome 👍 thanks,5,0,9.0.44.478,2025-05-22 11:50:55,


In [5]:
df_as.head()

Unnamed: 0,app,country,title,review,rating,date,scraped_at
0,Apple Music,US,4 star,I tunes don’t have all the songs like to listen but it has majority. I’m stratified with it,4,2025-05-21T18:50:04-07:00,2025-05-23T10:08:45.209909
1,Apple Music,US,Songs,Good songs especially taking home country roads,4,2025-05-21T18:20:31-07:00,2025-05-23T10:08:45.209934
2,Apple Music,US,Encantado,Me encanta esta aplicación.,5,2025-05-21T17:44:36-07:00,2025-05-23T10:08:45.209940
3,Apple Music,US,Love it,Love a lot of stuff on here love that it blanks out bad words if you have restrictions!,5,2025-05-21T17:40:19-07:00,2025-05-23T10:08:45.209945
4,Apple Music,US,Very convenient and I would consider,I love Apple Music because it doesn’t have any adds unlike Spotify or Pandora. I created my account for free and I love listening to my music. It has all new songs from your favorite artists and updates and even podcasts!!! I would HIGHLY RECOMMEND!!!!!! 👍👍👍,5,2025-05-21T17:19:50-07:00,2025-05-23T10:08:45.209949


In [6]:
app_map = {
    "com.spotify.music": "Spotify",
    "com.apple.android.music": "Apple Music",
    "com.soundcloud.android": "SoundCloud",
    "com.aspiro.tidal": "TIDAL",
    "deezer.android.app": "Deezer",
    "com.shazam.android": "Shazam",
    "com.google.android.music": "Google Play Music"
}
df_gp["app"] = df_gp["app"].map(app_map).fillna(df_gp["app"])

df_gp.drop(columns=["thumbs_up", "device", "scraped_at"], errors="ignore", inplace=True)

df_gp["date"] = pd.to_datetime(df_gp["date"], errors="coerce")

df_gp.dropna(subset=["review", "rating", "date"], inplace=True)
df_gp["rating"] = df_gp["rating"].astype(int)

df_gp = df_gp.sort_values("date", ascending=False)
df_gp.head()

Unnamed: 0,app,review,rating,version,date
0,Spotify,great,5,9.0.42.529,2025-05-22 11:58:57
1,Spotify,5 star if u have premium,5,8.6.82.1113,2025-05-22 11:58:46
2,Spotify,too many ads,1,8.9.96.476,2025-05-22 11:57:00
3,Spotify,adds,1,,2025-05-22 11:52:40
4,Spotify,awesome 👍 thanks,5,9.0.44.478,2025-05-22 11:50:55


In [9]:
df_as["date"] = pd.to_datetime(df_as["date"], errors="coerce")
df_as.dropna(subset=["review", "rating", "date"], inplace=True)
df_as["rating"] = df_as["rating"].astype(int)
df_as.drop(columns=["scraped_at"], errors="ignore", inplace=True)
df_as["date"] = pd.to_datetime(df_as["date"], errors="coerce")
df_as = df_as.sort_values("date", ascending=False)
df_as.head()

Unnamed: 0,app,country,title,review,rating,date
1850,Spotify,MX,Muy buena aplicación de música. Casi sin anuncios solo dos,La,5,2025-05-21 18:57:56-07:00
2750,SoundCloud,US,I have loved you from day 1 bi,"Nah fr tho, omm nothing but love for SoundCloud ❤️",5,2025-05-21 18:56:41-07:00
1851,Spotify,MX,Los amo,Los amo,5,2025-05-21 18:56:12-07:00
3750,Deezer,US,Must have a premium account to download music,"I got this music app so I could download songs and listen to them on airplane mode. It requires you to make an account and pick your favorite artists only to say you have to pay in order to download songs. Waste of my time.\nIf you are not using it on an airplane, then it seems like a decent app, but disappointing for me.",1,2025-05-21 18:50:34-07:00
0,Apple Music,US,4 star,I tunes don’t have all the songs like to listen but it has majority. I’m stratified with it,4,2025-05-21 18:50:04-07:00


In [10]:
df_gp.to_csv("clean_google_play_music_reviews.csv", index=False)
df_as.to_csv("clean_app_store_music_reviews.csv", index=False)
print("Cleaned datasets saved.")

Cleaned datasets saved.


In [11]:
df_gp["source"] = "Google Play"
df_as["source"] = "App Store"

df_merge = pd.concat([df_gp, df_as], ignore_index=True)
df_merge.sample(5)

Unnamed: 0,app,review,rating,version,date,source,country,title
23195,SoundCloud,Good App I like it,5,2025.02.25-release,2025-03-04 01:09:09,Google Play,,
57798,Apple Music,"Subscription issue, Worst subscription I had used. When I unsubscribes apple music still it charged from my account even though it was stopped. Even put a complaint about it but no response. Worst customer service ever!!!!!",1,,2023-04-21 12:53:31,Google Play,,
13358,Spotify,"I like it, but I always have issues when going from listening to my Playlist in my office, then go to my car, it starts over from when I stopped listening in my car before I got to the office. I deal with it but it's quite annoying.",4,9.0.42.529,2025-05-09 18:35:38,Google Play,,
78262,Shazam,"shazam werkt niet meer met voiceover. ging altijd goed, maar nu niet meer.",1,,2025-05-13 03:27:30-07:00,App Store,NL,probleem met voiceover
50639,Apple Music,"songs still freeze when busy on another app. the search bar in the library is useless as it will find the song, but will play the song that was recently added.",1,4.5.0,2023-11-14 13:52:51,Google Play,,
