In [None]:
import json
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Cleaning Data

In [None]:
playlist_file_path = 'datasets/Playlist1.json'
with open(playlist_file_path, 'r', encoding='utf-8') as file:
    data = json.load(file)

rows = []

for playlist in data['playlists']:
    playlist_name = playlist['name']
    last_modified_date = playlist['lastModifiedDate']
    
    #Iterate through each item
    for item in playlist['items']:
        track = item['track']
        rows.append({
            'playlist_name': playlist_name,
            'last_modified_date': last_modified_date,
            'track_name': track['trackName'],
            'artist_name': track['artistName'],
            'album_name': track['albumName'],
            'track_uri': track['trackUri'],
            'added_date': item['addedDate']
        })

df = pd.DataFrame(rows)

df.tail(10)

In [None]:
df.isna().sum()

In [None]:
df.drop_duplicates(inplace=True)
df.duplicated().sum()

# Top Artists

In [None]:
artist_counts = df['artist_name'].value_counts()

top_artists = artist_counts.head(10)

plt.figure(figsize=(12, 6))
top_artists.plot(kind='bar')
plt.title('Top 10 Artists')
plt.xlabel('Artist Name')
plt.ylabel('Number of Appearances')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()