In [11]:
import pandas as pd
import sqlalchemy as sqla

# Load the data from CSV
file_path = './data/Popular_Spotify_Songs.csv'  # Update the path if needed
data = pd.read_csv('data/Popular_Spotify_Songs.csv', encoding='ISO-8859-1' , sep=',')
# data.head()

data = data.where(pd.notnull(data), None)
data

engine = sqla.create_engine("mysql://root:root@172.16.5.4:3310/spotify_db")
conn = engine.connect()

create_table_query = '''
CREATE TABLE IF NOT EXISTS spotify_songs (
    id INT AUTO_INCREMENT PRIMARY KEY,
    track_name VARCHAR(255),
    artists_name VARCHAR(255),
    artist_count INT,
    released_year YEAR,
    released_month INT,
    released_day INT,
    in_spotify_playlists INT,
    in_spotify_charts INT,
    streams BIGINT,
    in_apple_playlists INT,
    in_apple_charts INT,
    in_deezer_playlists INT,
    in_deezer_charts INT,
    in_shazam_charts INT,
    bpm INT,
    key_name VARCHAR(5),
    mode VARCHAR(10),
    danceability_percent INT,
    valence_percent INT,
    energy_percent INT,
    acousticness_percent INT,
    instrumentalness_percent INT,
    liveness_percent INT,
    speechiness_percent INT
);
'''

# Execute the create table query
try:
    conn.execute(sqla.text(create_table_query))
    print("Table 'spotify_songs' created successfully.")
except Exception as e:
    print(f"An error occurred while creating the table: {e}")

# Try to insert data into the database
try:
    data.to_sql('spotify_songs', con=engine, if_exists='append', index=False)
    print("Data successfully inserted into the database.")
except Exception as e:
    print(f"An error occurred while inserting data: {e}")


Table 'spotify_songs' created successfully.
Data successfully inserted into the database.


In [12]:
# Step 1: Define Table Creation Queries
# Define SQL for creating the tables
create_artists_query = '''
CREATE TABLE IF NOT EXISTS Artists (
    artist_id INT AUTO_INCREMENT PRIMARY KEY,
    artist_name VARCHAR(255)
);
'''

create_albums_query = '''
CREATE TABLE IF NOT EXISTS Albums (
    album_id INT AUTO_INCREMENT PRIMARY KEY,
    album_name VARCHAR(255),
    release_date DATE
);
'''

create_songs_query = '''
CREATE TABLE IF NOT EXISTS Songs (
    song_id INT AUTO_INCREMENT PRIMARY KEY,
    song_name VARCHAR(255),
    artist_id INT,
    album_id INT,
    duration INT,
    release_date DATE,
    popularity INT,
    FOREIGN KEY (artist_id) REFERENCES Artists(artist_id),
    FOREIGN KEY (album_id) REFERENCES Albums(album_id)
);
'''

create_playlists_query = '''
CREATE TABLE IF NOT EXISTS Playlists (
    playlist_id INT AUTO_INCREMENT PRIMARY KEY,
    playlist_name VARCHAR(255)
);
'''

create_playlist_songs_query = '''
CREATE TABLE IF NOT EXISTS PlaylistSongs (
    playlist_id INT,
    song_id INT,
    FOREIGN KEY (playlist_id) REFERENCES Playlists(playlist_id),
    FOREIGN KEY (song_id) REFERENCES Songs(song_id),
    PRIMARY KEY (playlist_id, song_id)
);
'''

# Execute the create table queries
try:
    conn.execute(sqla.text(create_artists_query))
    print("Table 'Artists' created successfully.")
    conn.execute(sqla.text(create_albums_query))
    print("Table 'Albums' created successfully.")
    conn.execute(sqla.text(create_songs_query))
    print("Table 'Songs' created successfully.")
    conn.execute(sqla.text(create_playlists_query))
    print("Table 'Playlists' created successfully.")
    conn.execute(sqla.text(create_playlist_songs_query))
    print("Table 'PlaylistSongs' created successfully.")
except Exception as e:
    print(f"An error occurred while creating tables: {e}")

Table 'Artists' created successfully.
Table 'Albums' created successfully.
Table 'Songs' created successfully.
Table 'Playlists' created successfully.
Table 'PlaylistSongs' created successfully.


In [14]:
data

Unnamed: 0,track_name,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
0,Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,2023,7,14,553,147,141381703,43,...,125,B,Major,80,89,83,31,0,8,4
1,LALA,Myke Towers,1,2023,3,23,1474,48,133716286,48,...,92,C#,Major,71,61,74,7,0,10,4
2,vampire,Olivia Rodrigo,1,2023,6,30,1397,113,140003974,94,...,138,F,Major,51,32,53,17,0,31,6
3,Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,...,170,A,Major,55,58,72,11,0,11,15
4,WHERE SHE GOES,Bad Bunny,1,2023,5,18,3133,50,303236322,84,...,144,A,Minor,65,23,80,14,63,11,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
948,My Mind & Me,Selena Gomez,1,2022,11,3,953,0,91473363,61,...,144,A,Major,60,24,39,57,0,8,3
949,Bigger Than The Whole Sky,Taylor Swift,1,2022,10,21,1180,0,121871870,4,...,166,F#,Major,42,7,24,83,1,12,6
950,A Veces (feat. Feid),"Feid, Paulo Londra",2,2022,11,3,573,0,73513683,2,...,92,C#,Major,80,81,67,4,0,8,6
951,En La De Ella,"Feid, Sech, Jhayco",3,2022,10,20,1320,0,133895612,29,...,97,C#,Major,82,67,77,8,0,12,5


In [19]:
# data
print(data.columns)

Index(['track_name', 'artist(s)_name', 'artist_count', 'released_year',
       'released_month', 'released_day', 'in_spotify_playlists',
       'in_spotify_charts', 'streams', 'in_apple_playlists', 'in_apple_charts',
       'in_deezer_playlists', 'in_deezer_charts', 'in_shazam_charts', 'bpm',
       'key', 'mode', 'danceability_%', 'valence_%', 'energy_%',
       'acousticness_%', 'instrumentalness_%', 'liveness_%', 'speechiness_%'],
      dtype='object')


In [20]:
# Step 2: Data Insertion
# Extracting and Inserting Artists
unique_artists = pd.DataFrame(data['artist(s)_name'].unique(), columns=['artist_name'])

# Drop any duplicates if necessary
unique_artists.drop_duplicates(inplace=True)

# Insert unique artists into the Artists table
try:
    unique_artists.to_sql('Artists', con=engine, if_exists='append', index=False)
    print("Artists successfully inserted into the database.")
except Exception as e:
    print(f"An error occurred while inserting artists: {e}")

Artists successfully inserted into the database.
