# Importing data

In [10]:
import json
from pprint import pprint
import os.path

dir = os.path.dirname(os.path.abspath("__file__"))
path = dir + '/../data/artists.json'

with open(path) as f:
  JsonData = json.load(f)
pprint(JsonData[0])

{'collaborators': [{'id': '23TFHmajVfBtlRx5MXqgoz', 'name': 'Sfera Ebbasta'},
                   {'id': '2ALJBMyhbGODOEpstHfEqN', 'name': 'Rizzo'},
                   {'id': '754BUADwzMYecBgOoBaetK', 'name': 'Neima Ezza'},
                   {'id': '7F2utINZ6tSokSiZTQBE27', 'name': 'Guè'},
                   {'id': '3hBQ4zniNdQf1cqqo6hzuW', 'name': 'Salmo'},
                   {'id': '3fhMfkPPzksWuw0hEm4ldm', 'name': 'Ernia'},
                   {'id': '5dXlc7MnpaTeUIsHLVe3n4', 'name': 'Coez'},
                   {'id': '0jdNdfi4vAuVi7a6cPDFBM', 'name': 'Lazza'},
                   {'id': '7u710e44HW3K7A5eTnRqHC', 'name': 'Fabri Fibra'},
                   {'id': '3ufoz151GN8lddatwZ998s', 'name': 'G. Soave'},
                   {'id': '3Y0ccXFrUeRPlnEzXmDeWa', 'name': 'Duellz'},
                   {'id': '2av4MrhPDUQLG3py1i0h7L', 'name': 'Tormento'},
                   {'id': '5AZuEF0feCXMkUCwQiQlW7', 'name': 'Marracash'},
                   {'id': '672TMrvmIHZhMbe3AldMaf', 'name': 'Fa

# Saving collaboration csv

In [11]:
import pandas as pd

def filter_repeated_pairs(pair_list):
    seen_pairs = set()
    filtered_list = []

    for pair in pair_list:
        x, y = pair
        if (x, y) not in seen_pairs and (y, x) not in seen_pairs:
            filtered_list.append(pair)
            seen_pairs.add(pair)

    return filtered_list


CollaborationsList = []
for artist in JsonData:
  for collaborator in artist['collaborators']:
    if collaborator['name'] in [x['name'] for x in JsonData]:
      CollaborationsList.append( ( artist['name'] , collaborator['name'] ) )

print('Length of the original list:', len(CollaborationsList))

FilteredCollaborationsList = filter_repeated_pairs(CollaborationsList)

print('Length of the filtered list:', len(FilteredCollaborationsList))


df = pd.DataFrame(columns = ['artist', 'collaborator'])

for artist, collaborator in FilteredCollaborationsList:
  to_concat =  pd.DataFrame( {'artist': [artist], 'collaborator': [collaborator] } )

  df = pd.concat([df, to_concat])

df



Length of the original list: 2661
Length of the filtered list: 1930


Unnamed: 0,artist,collaborator
0,Emis Killa,Sfera Ebbasta
0,Emis Killa,Rizzo
0,Emis Killa,Neima Ezza
0,Emis Killa,Guè
0,Emis Killa,Salmo
...,...,...
0,chiello,Demo
0,Chris Nolan,Birthh
0,Chris Nolan,AIELLO
0,AIELLO,Tormento


In [12]:
df.to_csv(dir + '/../data/FilteredCollaboration.csv')

# Popularity and genres

In [None]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

# Set up your Spotify API credentials
client_id = os.environ.get('CLIENT_ID')

client_secret = os.environ.get('CLIENT_SECRET')

# Initialize the Spotipy client
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

# Function to get the popularity index of an artist
def get_artist_popularity(artist_id):
    artist = sp.artist(artist_id)
    popularity = artist['popularity']
    return popularity


In [None]:
# Example usage

df = pd.DataFrame(columns = ['artist', 'popularity', 'num_genres'])


for artist in JsonData:
  artist_id =   artist['id']
  popularity_index = get_artist_popularity(artist_id)
  to_concat =  pd.DataFrame( {'artist': [artist['name']], 'popularity': [popularity_index], 'num_genres' : len(artist['genres'])  } )
  df = pd.concat([df, to_concat])

df

Unnamed: 0,artist,popularity,num_genres
0,Emis Killa,68,2
0,Rhove,67,2
0,Finesse,64,0
0,Lazza,77,4
0,Geolier,78,4
...,...,...,...
0,Birthh,32,2
0,AIELLO,51,1
0,Merk & Kremont,59,4
0,BB Team,38,0


In [None]:
df.to_csv(dir + '/../data/PopularityAndGenres.csv')