In [1]:
import pandas as pd

In [2]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join('..', 'src')))
from db_conexion import establecer_conexion, cerrar_conexion


Conexión exitosa a la base de datos
Conexión cerrada a la base de datos


In [3]:
connection = establecer_conexion()

# Consulta SQL para seleccionar los datos
query = "SELECT * FROM grammy_awards"

# Leer los datos en un DataFrame de pandas
try:
    # Usar pandas para leer directamente desde la conexión
    grammy = pd.read_sql_query(query, con=connection)
    print("Datos cargados con éxito")
except Exception as e:
    print(f"Error al leer la base de datos: {e}")
    grammy = pd.DataFrame()  # Retornar un DataFrame vacío en caso de error

# Cerrar la conexión
cerrar_conexion(connection)

Conexión exitosa a la base de datos
Datos cargados con éxito
Conexión cerrada a la base de datos


  grammy = pd.read_sql_query(query, con=connection)


In [4]:
# Agregar una columna id al principio del csv
grammy['id'] = range(1, len(grammy) + 1)

# Mover la columna 'id' al principio
cols = ['id'] + [col for col in grammy.columns if col != 'id']
grammy = grammy[cols]

# Reemplazar los valores nulos de Worker, por trabajador desconocido
grammy['workers'] = grammy['workers'].fillna("Unknown Worker")
print(grammy['workers'].isnull().sum())

# Reemplazar los valor nulos de nominee, por desconocido
grammy['nominee'] = grammy['nominee'].fillna("Unknown")
print(grammy['nominee'].isnull().sum())

grammy['img'] = grammy['img'].fillna("https://soundimaging.com/wp-content/uploads/2020/12/coming.jpeg")
print(grammy.isnull().sum())

0
0
id                 0
year               0
title              0
published_at       0
updated_at         0
category           0
nominee            0
artist          1840
workers            0
img                0
winner             0
dtype: int64


In [5]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join('..', 'credentials')))

In [6]:
from API_conexion import client_id,client_secret

In [7]:
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import logging
from tqdm import tqdm  # Progress bar

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Spotify API credentials (make sure to replace with your actual credentials)
client_id = 'your_client_id'  # Replace with your client_id
client_secret = 'your_client_secret'  # Replace with your client_secret

# Authenticate with Spotify
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

# Function to search for an artist on Spotify
def buscar_artista(artista):
    if artista.strip() == "":
        return None  # Return None if the artist name is empty
    try:
        resultado = sp.search(q='artist:' + artista, type='artist')
        if resultado['artists']['items']:
            logging.info(f"Artista encontrado: {resultado['artists']['items'][0]['name']}")
            return resultado['artists']['items'][0]['name']  # Return the found artist's name
    except Exception as e:
        logging.error(f"Error buscando artista {artista}: {e}")
    return None

# Assuming `grammy` DataFrame is already defined
grammy['artist'] = grammy['artist'].fillna('')

logging.info("Comienza proceso de relleno")

# Use tqdm to show progress while filling in missing artist names
grammy['artist'] = tqdm(grammy['artist'].apply(lambda x: buscar_artista(x) if pd.isna(x) or x == '' else x), 
                         total=grammy['artist'].isnull().sum(), desc="Rellenando artistas")

logging.info("Termina proceso de relleno")


KeyboardInterrupt: 

In [8]:
print(grammy.isnull().sum())

# Save the csv
grammy.to_csv('../data/grammys_updated.csv', index=False)

print("Archivo actualizado y guardado.")

id              0
year            0
title           0
published_at    0
updated_at      0
category        0
nominee         0
artist          0
workers         0
img             0
winner          0
dtype: int64
Archivo actualizado y guardado.


In [9]:
import os
import time
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
from API_conexion import client_id, client_secret  # Ensure this is secure

# Set up Spotify credentials
client_id = client_id
client_secret = client_secret

# Authenticate with Spotify
auth_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(auth_manager=auth_manager)

# Function to get the Spotify ID based on nominee or artist
def get_spotify_id(nominee, artist):
    try:
        # Search for the track first
        result = sp.search(q=nominee, type='track', limit=1)
        if result['tracks']['items']:
            print(f"Track found: {nominee} - {result['tracks']['items'][0]['id']}")
            return result['tracks']['items'][0]['id']  # Return track_id

        # Search for the album if track is not found
        result = sp.search(q=nominee, type='album', limit=1)
        if result['albums']['items']:
            print(f"Album found: {nominee} - {result['albums']['items'][0]['id']}")
            return result['albums']['items'][0]['id']  # Return album_id

        # Search for the artist if both track and album are not found
        result = sp.search(q=artist, type='artist', limit=1)
        if result['artists']['items']:
            print(f"Artist found: {artist} - {result['artists']['items'][0]['id']}")
            return result['artists']['items'][0]['id']  # Return artist_id

        # If nothing is found, return None
        print(f"No information found for: {nominee} / {artist}")
        return None

    except Exception as e:
        print(f"Error retrieving data for nominee: {nominee}, artist: {artist}. Error: {e}")
        return None

# Function to add Spotify IDs to the DataFrame with progress tracking
def add_spotify_ids(df):
    total_rows = len(df)
    
    # Create a new column for Spotify IDs
    df['spotify_id'] = None
    
    for i, (index, row) in enumerate(df.iterrows()):
        # Get the Spotify ID based on the nominee or artist
        spotify_id = get_spotify_id(row['nominee'], row['artist'])
        
        # Add the Spotify ID to the new column
        df.at[index, 'spotify_id'] = spotify_id
        
        # Print progress
        print(f"Processed: {i + 1}/{total_rows} rows (Progress: {((i + 1) / total_rows) * 100:.2f}%)")
        
        # Optional: Sleep to avoid hitting API rate limits
        time.sleep(0.5)
    
    return df

# Load your Grammy table
csv_file = os.path.abspath(os.path.join('../data/grammys_updated.csv'))
grammys_df = pd.read_csv(csv_file)

# Call to add Spotify IDs with progress tracking
grammys_df_with_ids = add_spotify_ids(grammys_df)

# Optional: Verify the first rows of the cleaned DataFrame
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
print(grammys_df_with_ids.head(20))

# Save the resulting DataFrame with Spotify IDs
output_file = os.path.abspath(os.path.join('../data/grammys_with_spotify_ids.csv'))
grammys_df_with_ids.to_csv(output_file, index=False)
print("Spotify IDs added and saved successfully!")




Track encontrado: Bad Guy - 2Fxmhks0bxGSBdJ92vM42m
Procesado: 1/4810 filas (Progreso: 0.02%)
Track encontrado: Hey, Ma - 0RstfX9nRY1Lfuy1808MoT
Procesado: 2/4810 filas (Progreso: 0.04%)
Track encontrado: 7 rings - 6ocbgoVGwYJhOv1GgI9NsF
Procesado: 3/4810 filas (Progreso: 0.06%)
Track encontrado: Hard Place - 4IablJ6SqVNGY4vrseyKxu
Procesado: 4/4810 filas (Progreso: 0.08%)
Track encontrado: Talk - 6g6A7qNhTfUgOSH7ROOxTD
Procesado: 5/4810 filas (Progreso: 0.10%)
Track encontrado: Old Town Road - 0F7FA14euOIX8KcbEturGH
Procesado: 6/4810 filas (Progreso: 0.12%)
Track encontrado: Truth Hurts - 3HWzoMvoF3TQfYg4UPszDq
Procesado: 7/4810 filas (Progreso: 0.15%)
Track encontrado: Sunflower - 3KkXRkHbMCARz0aVfEt68P
Procesado: 8/4810 filas (Progreso: 0.17%)
Track encontrado: When We All Fall Asleep, Where Do We Go? - 43zdsphuZLzwA9k4DJhU0I
Procesado: 9/4810 filas (Progreso: 0.19%)
Track encontrado: I,I - 3ihJ79kKH1P3YrOznRSDIA
Procesado: 10/4810 filas (Progreso: 0.21%)
Track encontrado: Norman F**

Unnamed: 0,id,year,title,published_at,updated_at,category,nominee,artist,workers,img,winner,spotify_id
0,1,2019,62nd Annual GRAMMY Awards (2019),2020-05-19 05:10:28,2020-05-19 05:10:28,Record Of The Year,Bad Guy,Billie Eilish,"Finneas O'Connell, producer; Rob Kinelski & Fi...",https://www.grammy.com/sites/com/files/styles/...,True,2Fxmhks0bxGSBdJ92vM42m
1,2,2019,62nd Annual GRAMMY Awards (2019),2020-05-19 05:10:28,2020-05-19 05:10:28,Record Of The Year,"Hey, Ma",Bon Iver,"BJ Burton, Brad Cook, Chris Messina & Justin V...",https://www.grammy.com/sites/com/files/styles/...,True,0RstfX9nRY1Lfuy1808MoT
2,3,2019,62nd Annual GRAMMY Awards (2019),2020-05-19 05:10:28,2020-05-19 05:10:28,Record Of The Year,7 rings,Ariana Grande,"Charles Anderson, Tommy Brown, Michael Foster ...",https://www.grammy.com/sites/com/files/styles/...,True,6ocbgoVGwYJhOv1GgI9NsF
3,4,2019,62nd Annual GRAMMY Awards (2019),2020-05-19 05:10:28,2020-05-19 05:10:28,Record Of The Year,Hard Place,H.E.R.,"Rodney “Darkchild” Jerkins, producer; Joseph H...",https://www.grammy.com/sites/com/files/styles/...,True,4IablJ6SqVNGY4vrseyKxu
4,5,2019,62nd Annual GRAMMY Awards (2019),2020-05-19 05:10:28,2020-05-19 05:10:28,Record Of The Year,Talk,Khalid,"Disclosure & Denis Kosiak, producers; Ingmar C...",https://www.grammy.com/sites/com/files/styles/...,True,6g6A7qNhTfUgOSH7ROOxTD
5,6,2019,62nd Annual GRAMMY Awards (2019),2020-05-19 05:10:28,2020-05-19 05:10:28,Record Of The Year,Old Town Road,Lil Nas X Featuring Billy Ray Cyrus,"Andrew ""VoxGod"" Bolooki, Jocelyn “Jozzy” Donal...",https://www.grammy.com/sites/com/files/styles/...,True,0F7FA14euOIX8KcbEturGH
6,7,2019,62nd Annual GRAMMY Awards (2019),2020-05-19 05:10:28,2020-05-19 05:10:28,Record Of The Year,Truth Hurts,Lizzo,"Ricky Reed & Tele, producers; Chris Galland, M...",https://www.grammy.com/sites/com/files/styles/...,True,3HWzoMvoF3TQfYg4UPszDq
7,8,2019,62nd Annual GRAMMY Awards (2019),2020-05-19 05:10:28,2020-05-19 05:10:28,Record Of The Year,Sunflower,Post Malone & Swae Lee,"Louis Bell & Carter Lang, producers; Louis Bel...",https://www.grammy.com/sites/com/files/styles/...,True,3KkXRkHbMCARz0aVfEt68P
8,9,2019,62nd Annual GRAMMY Awards (2019),2020-05-19 05:10:28,2020-05-19 05:10:28,Album Of The Year,"When We All Fall Asleep, Where Do We Go?",Billie Eilish,"Finneas O'Connell, producer; Rob Kinelski & Fi...",https://www.grammy.com/sites/com/files/styles/...,True,43zdsphuZLzwA9k4DJhU0I
9,10,2019,62nd Annual GRAMMY Awards (2019),2020-05-19 05:10:28,2020-05-19 05:10:28,Album Of The Year,"I,I",Bon Iver,"Brad Cook, Chris Messina & Justin Vernon, prod...",https://www.grammy.com/sites/com/files/styles/...,True,3ihJ79kKH1P3YrOznRSDIA


In [10]:
print(grammys_df_with_ids.isnull().sum())

# Save the csv
grammys_df_with_ids.to_csv('../data/grammys_updated_with_id.csv', index=False)

print("Archivo actualizado y guardado.")



id              0
year            0
title           0
published_at    0
updated_at      0
category        0
nominee         0
artist          0
workers         0
img             0
winner          0
spotify_id      0
dtype: int64
Archivo actualizado y guardado.
