In [1]:
!pip install pandas plotly arabic-reshaper python-bidi



In [2]:
from google.colab import files
uploaded = files.upload()

Saving StreamingHistory_music_0.json to StreamingHistory_music_0 (4).json


In [3]:
import pandas as pd
import json
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import arabic_reshaper
from bidi.algorithm import get_display
import re

In [12]:
def fix_text_display(text):
    """Fix text display for Arabic and other RTL languages"""
    if pd.isna(text):  # Handle potential None or NaN values
        return text
    if isinstance(text, str):
        # Check if text contains Arabic characters
        arabic_pattern = re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]')
        if arabic_pattern.search(text):
            try:
                # Simple reshape with configuration parameter
                configuration = arabic_reshaper.ArabicReshaper.get_configuration(
                    arabic_reshaper.config.default_config()
                )
                reshaper = arabic_reshaper.ArabicReshaper(configuration=configuration)
                reshaped_text = reshaper.reshape(text)
                return get_display(reshaped_text)
            except Exception as e:
                print(f"Error processing Arabic text '{text}': {e}")
                return text
    return text

# Load and process your data
file_name = next(iter(uploaded))
with open(file_name, 'r', encoding='utf-8') as f:
    data = json.load(f)

df = pd.DataFrame(data)
df['endTime'] = pd.to_datetime(df['endTime'])
df['minutesPlayed'] = df['msPlayed'] / 60000
df['hour'] = df['endTime'].dt.hour
df['weekday'] = df['endTime'].dt.day_name()
df['date'] = df['endTime'].dt.date
df['month'] = df['endTime'].dt.month_name()

In [5]:
!pip install --upgrade arabic-reshaper



In [13]:
df = df[df['msPlayed'] > 10000].copy()

df.loc[:, 'artistName_display'] = df['artistName'].apply(fix_text_display)
df.loc[:, 'trackName_display'] = df['trackName'].apply(fix_text_display)

Error processing Arabic text 'صادني': type object 'ArabicReshaper' has no attribute 'get_configuration'
Error processing Arabic text 'بستانس': type object 'ArabicReshaper' has no attribute 'get_configuration'
Error processing Arabic text 'صادني': type object 'ArabicReshaper' has no attribute 'get_configuration'
Error processing Arabic text 'صادني': type object 'ArabicReshaper' has no attribute 'get_configuration'
Error processing Arabic text 'بستانس': type object 'ArabicReshaper' has no attribute 'get_configuration'
Error processing Arabic text 'صادني': type object 'ArabicReshaper' has no attribute 'get_configuration'
Error processing Arabic text 'صادني': type object 'ArabicReshaper' has no attribute 'get_configuration'
Error processing Arabic text 'صادني': type object 'ArabicReshaper' has no attribute 'get_configuration'
Error processing Arabic text 'صادني': type object 'ArabicReshaper' has no attribute 'get_configuration'
Error processing Arabic text 'صادني': type object 'ArabicResha

In [14]:
top_artists = df.groupby('artistName_display')['minutesPlayed'].sum().sort_values(ascending=False).head(10)
top_tracks = df.groupby(['artistName_display', 'trackName_display'])['minutesPlayed'].sum().sort_values(ascending=False).head(10)
daily = df.groupby('date')['minutesPlayed'].sum().reset_index()
weekday_avg = df.groupby('weekday')['minutesPlayed'].mean().reindex([
    'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'
]).reset_index()
hourly_avg = df.groupby('hour')['minutesPlayed'].mean().reset_index()
monthly = df.groupby('month')['minutesPlayed'].sum()

In [15]:
#Top Artists
fig1 = px.bar(x=top_artists.values, y=top_artists.index, orientation='h',
             title="Top 10 Artists by Listening Time",
             labels={'x': 'Minutes', 'y': 'Artist'},
             color=top_artists.values, color_continuous_scale='blues')
fig1.update_layout(height=600, showlegend=False, yaxis={'categoryorder':'total ascending'})
fig1.show()

#Top Tracks
top_tracks_df = top_tracks.reset_index()
top_tracks_df['track_label'] = top_tracks_df['artistName_display'] + ' - ' + top_tracks_df['trackName_display']

fig2 = px.bar(x=top_tracks.values, y=top_tracks_df['track_label'], orientation='h',
             title="Top 10 Songs by Listening Time",
             labels={'x': 'Minutes', 'y': 'Track'},
             color=top_tracks.values, color_continuous_scale='greens')
fig2.update_layout(height=700, showlegend=False, yaxis={'categoryorder':'total ascending'})
fig2.show()

#Daily Trend
fig3 = px.line(daily, x='date', y='minutesPlayed',
              title="Daily Listening Trend",
              labels={'minutesPlayed': 'Minutes Played', 'date': 'Date'})
fig3.update_traces(line_color='#1f77b4')
fig3.show()

#Weekday Average
fig4 = px.bar(weekday_avg, x='weekday', y='minutesPlayed',
             title="Average Listening by Weekday",
             labels={'minutesPlayed': 'Minutes Played', 'weekday': 'Weekday'},
             color='minutesPlayed', color_continuous_scale='reds')
fig4.update_layout(showlegend=False)
fig4.show()

#Hourly Trend
fig5 = px.line(hourly_avg, x='hour', y='minutesPlayed', markers=True,
              title="Average Listening by Hour",
              labels={'minutesPlayed': 'Avg Minutes Played', 'hour': 'Hour'})
fig5.update_traces(line_color='#ff7f0e', marker_size=8)
fig5.show()

#Monthly Summary
if df['month'].nunique() > 1:
    monthly_df = monthly.reset_index()
    fig6 = px.bar(monthly_df, x='month', y='minutesPlayed',
                 title="Total Listening Time by Month",
                 labels={'minutesPlayed': 'Minutes', 'month': 'Month'},
                 color='minutesPlayed', color_continuous_scale='oranges')
    fig6.update_layout(showlegend=False)
    fig6.show()


arabic_artists = df[df['artistName'].str.contains(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]', na=False)]['artistName'].unique()
japanese_artists = df[df['artistName'].str.contains(r'[\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FAF]', na=False)]['artistName'].unique()

print(f"Found {len(arabic_artists)} artists with Arabic text")
print(f"Found {len(japanese_artists)} artists with Japanese/Chinese text")

if len(arabic_artists) > 0:
    print("\nArabic artists found:")
    for artist in arabic_artists[:5]:
        print(f"  Original: {artist}")
        print(f"  Fixed: {fix_text_display(artist)}")

if len(japanese_artists) > 0:
    print("\nJapanese/Chinese artists found:")
    for artist in japanese_artists[:5]:
        print(f"  {artist}")

#testing is it's displayed propely
print("\nTesting Japanese text display:")
test_names = ["高橋あず美", "星空 凛(CV.飯田里穂)", "園田海未"]
for name in test_names:
    print(f"  {name}")

Found 0 artists with Arabic text
Found 21 artists with Japanese/Chinese text

Japanese/Chinese artists found:
  高橋あず美
  星空 凛(CV.飯田里穂)
  園田海未 (CV.三森すずこ) from μ's
  アトラスサウンドチーム
  A-RISE (綺羅ツバサ(CV.桜川めぐ)、統堂英玲奈(CV.松永真穂)、優木あんじゅ(CV.大橋歩夕))

Testing Japanese text display:
  高橋あず美
  星空 凛(CV.飯田里穂)
  園田海未
