In [1]:
import pandas as pd
import sqlalchemy as sqla

# Load the data from CSV
file_path = './data/Popular_Spotify_Songs.csv'  # Update the path if needed
data = pd.read_csv('data/Popular_Spotify_Songs.csv', encoding='ISO-8859-1' , sep=',')
# data.head()

data = data.where(pd.notnull(data), None)
data

engine = sqla.create_engine("mysql://root:root@172.16.5.4:3310/spotify_db")
conn = engine.connect()

create_songs_query = '''
CREATE TABLE IF NOT EXISTS spotify_songs (
    id INT AUTO_INCREMENT PRIMARY KEY,
    track_name TEXT,
    artist_s_name TEXT,
    artist_count BIGINT,
    released_year BIGINT,
    released_month BIGINT,
    released_day BIGINT,
    in_spotify_playlists BIGINT,
    in_spotify_charts BIGINT,
    streams TEXT,
    in_apple_playlists BIGINT,
    in_apple_charts BIGINT,
    in_deezer_playlists TEXT,
    in_deezer_charts BIGINT,
    in_shazam_charts TEXT,
    bpm BIGINT,
    `key` TEXT,
    mode TEXT,
    danceability_percent BIGINT,
    valence_percent BIGINT,
    energy_percent BIGINT,
    acousticness_percent BIGINT,
    instrumentalness_percent BIGINT,
    liveness_percent BIGINT,
    speechiness_percent BIGINT
);
'''

data.rename(columns={
    'artist(s)_name': 'artist_s_name',
    'danceability_%': 'danceability_percent',
    'valence_%': 'valence_percent',
    'energy_%': 'energy_percent',
    'acousticness_%': 'acousticness_percent',
    'instrumentalness_%': 'instrumentalness_percent',
    'liveness_%': 'liveness_percent',
    'speechiness_%': 'speechiness_percent'
}, inplace=True)

# Execute the create table query
try:
    conn.execute(sqla.text(create_songs_query))
    print("Table 'spotify_songs' created successfully.")
except Exception as e:
    print(f"An error occurred while creating the table: {e}")

# Try to insert data into the database
try:
    data.to_sql('spotify_songs', con=engine, if_exists='append', index=False)
    print("Data successfully inserted into the database.")
except Exception as e:
    print(f"An error occurred while inserting data: {e}")


Table 'spotify_songs' created successfully.
Data successfully inserted into the database.


In [2]:
# Step 1: Define Table Creation Queries
# Define SQL for creating the tables
create_artists_query = '''
CREATE TABLE IF NOT EXISTS Artists (
    artist_id INT AUTO_INCREMENT PRIMARY KEY,
    artist_name VARCHAR(255)
);
'''

create_audio_features_query = '''
CREATE TABLE IF NOT EXISTS AudioFeatures (
    song_id INT,
    danceability INT,
    valence INT,
    energy INT,
    acousticness INT,
    instrumentalness INT,
    liveness INT,
    speechiness INT,
    FOREIGN KEY (song_id) REFERENCES spotify_songs(id)
);
'''

create_songs_query = '''
CREATE TABLE IF NOT EXISTS Songs (
    song_id INT AUTO_INCREMENT PRIMARY KEY,
    song_name VARCHAR(255),
    artist_id INT,
    duration INT,
    release_date DATE,
    popularity INT,
    FOREIGN KEY (artist_id) REFERENCES Artists(artist_id)
);
'''

create_playlists_query = '''
CREATE TABLE IF NOT EXISTS Playlists (
    playlist_id INT AUTO_INCREMENT PRIMARY KEY,
    playlist_name VARCHAR(255)
);
'''

create_playlist_songs_query = '''
CREATE TABLE IF NOT EXISTS PlaylistSongs (
    playlist_id INT,
    song_id INT,
    FOREIGN KEY (playlist_id) REFERENCES Playlists(playlist_id),
    FOREIGN KEY (song_id) REFERENCES Songs(song_id),
    PRIMARY KEY (playlist_id, song_id)
);
'''

# Execute the create table queries
try:
    conn.execute(sqla.text(create_artists_query))
    print("Table 'Artists' created successfully.")
    conn.execute(sqla.text(create_audio_features_query))
    print("Table 'AudioFeatures' created successfully.")
    conn.execute(sqla.text(create_songs_query))
    print("Table 'Songs' created successfully.")
    conn.execute(sqla.text(create_playlists_query))
    print("Table 'Playlists' created successfully.")
    conn.execute(sqla.text(create_playlist_songs_query))
    print("Table 'PlaylistSongs' created successfully.")
except Exception as e:
    print(f"An error occurred while creating tables: {e}")

Table 'Artists' created successfully.
Table 'AudioFeatures' created successfully.
Table 'Songs' created successfully.
Table 'Playlists' created successfully.
Table 'PlaylistSongs' created successfully.


In [3]:
# Step 2: Data Insertion
# Extracting and Inserting Artists
unique_artists = pd.DataFrame(data['artist_s_name'].unique(), columns=['artist_name'])

# Insert unique artists into the Artists table
try:
    unique_artists.to_sql('Artists', con=engine, if_exists='append', index=False)
    print("Artists successfully inserted into the database.")
except Exception as e:
    print(f"An error occurred while inserting artists: {e}")

Artists successfully inserted into the database.
