In [30]:
import pandas as pd
from sqlalchemy import create_engine
from dotenv import load_dotenv
import os

In [31]:
load_dotenv("../credentials.env") #Here goes the credentials for your Database

db_username = os.getenv("DB_USERNAME")
db_password = os.getenv("DB_PASSWORD")
db_host = os.getenv("DB_HOST")
db_port = os.getenv("DB_PORT")
db_name = os.getenv("DB_NAME")

engine = create_engine(f"postgresql://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}")

query = "SELECT * FROM clean_awards"

awards = pd.read_sql_query(query, engine)
awards.head()

Unnamed: 0,year,title,published_at,updated_at,category,track_name,artists,workers,was_nominated
0,2019,62nd Annual GRAMMY Awards (2019),2020-05-19 12:10:28+00:00,2020-05-19 12:10:28+00:00,Record Of The Year,bad guy,billie eilish,"Finneas O'Connell, producer; Rob Kinelski & Fi...",True
1,2019,62nd Annual GRAMMY Awards (2019),2020-05-19 12:10:28+00:00,2020-05-19 12:10:28+00:00,Record Of The Year,"hey, ma",bon iver,"BJ Burton, Brad Cook, Chris Messina & Justin V...",True
2,2019,62nd Annual GRAMMY Awards (2019),2020-05-19 12:10:28+00:00,2020-05-19 12:10:28+00:00,Record Of The Year,7 rings,ariana grande,"Charles Anderson, Tommy Brown, Michael Foster ...",True
3,2019,62nd Annual GRAMMY Awards (2019),2020-05-19 12:10:28+00:00,2020-05-19 12:10:28+00:00,Record Of The Year,hard place,h.e.r.,"Rodney “Darkchild” Jerkins, producer; Joseph H...",True
4,2019,62nd Annual GRAMMY Awards (2019),2020-05-19 12:10:28+00:00,2020-05-19 12:10:28+00:00,Record Of The Year,talk,khalid,"Disclosure & Denis Kosiak, producers; Ingmar C...",True


In [32]:
spotify = pd.read_csv("../data/spotify_dataset_clean.csv", delimiter=',')
spotify.head()

Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre
0,5SuOikwiRyPMVoIQDJUgSV,gen hoshino,Comedy,comedy,73,230666,False,0.676,0.461,1,-6.746,0,0.143,0.0322,1e-06,0.358,0.715,87.917,4,Acoustic
1,4qPNDBW1i3p13qLCt0Ki3A,ben woodward,Ghost (Acoustic),ghost - acoustic,55,149610,False,0.42,0.166,1,-17.235,1,0.0763,0.924,6e-06,0.101,0.267,77.489,4,Acoustic
2,1iJBSr7s7jYXzM8EGcbK5b,ingrid michaelson;zayn,To Begin Again,to begin again,57,210826,False,0.438,0.359,0,-9.734,1,0.0557,0.21,0.0,0.117,0.12,76.332,4,Acoustic
3,6lfxq3CG4xtTiEg7opyCyx,kina grannis,Crazy Rich Asians (Original Motion Picture Sou...,can't help falling in love,71,201933,False,0.266,0.0596,0,-18.515,1,0.0363,0.905,7.1e-05,0.132,0.143,181.74,3,Acoustic
4,5vjLSffimiIP26QG5WcN2K,chord overstreet,Hold On,hold on,82,198853,False,0.618,0.443,2,-9.681,1,0.0526,0.469,0.0,0.0829,0.167,119.949,4,Acoustic


## Transformation

In [33]:
#Object to string
awards = awards.astype({col: 'string' for col in awards.select_dtypes(include='object').columns})
spotify = spotify.astype({col: 'string' for col in spotify.select_dtypes(include='object').columns})

In [34]:
# Realizar el merge
merged_df = pd.merge(spotify, awards, on=['artists', 'track_name'], how='left')

In [35]:
merged_df.head(4)

Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,key,...,tempo,time_signature,track_genre,year,title,published_at,updated_at,category,workers,was_nominated
0,5SuOikwiRyPMVoIQDJUgSV,gen hoshino,Comedy,comedy,73,230666,False,0.676,0.461,1,...,87.917,4,Acoustic,,,NaT,NaT,,,
1,4qPNDBW1i3p13qLCt0Ki3A,ben woodward,Ghost (Acoustic),ghost - acoustic,55,149610,False,0.42,0.166,1,...,77.489,4,Acoustic,,,NaT,NaT,,,
2,1iJBSr7s7jYXzM8EGcbK5b,ingrid michaelson;zayn,To Begin Again,to begin again,57,210826,False,0.438,0.359,0,...,76.332,4,Acoustic,,,NaT,NaT,,,
3,6lfxq3CG4xtTiEg7opyCyx,kina grannis,Crazy Rich Asians (Original Motion Picture Sou...,can't help falling in love,71,201933,False,0.266,0.0596,0,...,181.74,3,Acoustic,,,NaT,NaT,,,


## Save

In [36]:
merged_df.to_csv("../data/transformed_dataset.csv", index=False)

In [37]:
engine = create_engine(f"postgresql://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}")

try:
    merged_df.to_sql("transformed_dataset", engine, if_exists="replace", index=False)
    print("Succesfull migration")
except Exception as e:
    print(f"Error in migration: {e}")

Succesfull migration
