In [5]:
# Import necessary libraries
import pandas as pd

# Load CSV file (make sure the file is in the same directory)
df = pd.read_csv("top50.csv", encoding="latin1")

# Step 1: Drop unnecessary index column
df = df.drop(columns=["Unnamed: 0"])

# Step 2: Rename columns to SQL-friendly format
df = df.rename(columns={
    "Track.Name": "track_name",
    "Artist.Name": "artist_name",
    "Genre": "genre_name",
    "Beats.Per.Minute": "bpm",
    "Energy": "energy",
    "Danceability": "danceability",
    "Loudness..dB..": "loudness",
    "Liveness": "liveness",
    "Valence.": "valence",
    "Length.": "length",
    "Acousticness..": "acousticness",
    "Speechiness.": "speechiness",
    "Popularity": "popularity"
})

# Step 3: Normalize values from 0–100 to 0.0–1.0
def normalize_columns(df, columns):
    df[columns] = df[columns] / 100.0
    return df

percentage_cols = ['energy', 'danceability', 'liveness', 'valence', 'acousticness', 'speechiness']
df = normalize_columns(df, percentage_cols)

# Step 4: Remove duplicates and missing values
df = df.drop_duplicates().dropna()

# Step 5: Create IDs for artist and genre
genre_df = df[["genre_name"]].drop_duplicates().reset_index(drop=True)
genre_df["genre_id"] = genre_df.index + 1

artist_df = df[["artist_name"]].drop_duplicates().reset_index(drop=True)
artist_df["artist_id"] = artist_df.index + 1

# Merge artist_id and genre_id into main dataframe
df = df.merge(genre_df, on="genre_name", how="left")
df = df.merge(artist_df, on="artist_name", how="left")

# Reorder columns to match final table structure
df = df[[
    "track_name", "artist_id", "genre_id", "bpm", "energy", "danceability",
    "loudness", "liveness", "valence", "length", "acousticness", "speechiness", "popularity"
]]

# Utility: Show how many songs and variables
def dataset_info(df):
    num_songs = df.shape[0]
    num_variables = df.shape[1]
    print(f"Total Songs: {num_songs}")
    print(f"Total Variables: {num_variables}")

# Run summary info
dataset_info(df)

# Preview the cleaned data
df.head()


🎵 Total Songs: 50
📊 Total Variables: 13


Unnamed: 0,track_name,artist_id,genre_id,bpm,energy,danceability,loudness,liveness,valence,length,acousticness,speechiness,popularity
0,Señorita,1,1,117,0.55,0.76,-6,0.08,0.75,191,0.04,0.03,79
1,China,2,2,105,0.81,0.79,-4,0.08,0.61,302,0.08,0.09,92
2,boyfriend (with Social House),3,3,190,0.8,0.4,-4,0.16,0.7,186,0.12,0.46,85
3,Beautiful People (feat. Khalid),4,4,93,0.65,0.64,-8,0.08,0.55,198,0.12,0.19,86
4,Goodbyes (Feat. Young Thug),5,5,150,0.65,0.58,-4,0.11,0.18,175,0.45,0.07,94


In [7]:
df.tail()

Unnamed: 0,track_name,artist_id,genre_id,bpm,energy,danceability,loudness,liveness,valence,length,acousticness,speechiness,popularity
45,One Thing Right,37,21,88,0.62,0.66,-2,0.58,0.44,182,0.07,0.05,88
46,Te Robaré,38,12,176,0.75,0.67,-4,0.08,0.8,202,0.24,0.06,88
47,Happier,37,21,100,0.79,0.69,-3,0.17,0.67,214,0.19,0.05,88
48,Call You Mine,25,16,104,0.7,0.59,-6,0.41,0.5,218,0.23,0.03,88
49,Cross Me (feat. Chance the Rapper & PnB Rock),4,4,95,0.79,0.75,-6,0.07,0.61,206,0.21,0.12,82


In [9]:
# Display all rows and columns (be careful with large datasets!)
pd.set_option('display.max_rows', None)      # Show all rows
pd.set_option('display.max_columns', None)   # Show all columns
pd.set_option('display.width', None)         # Don't wrap columns
pd.set_option('display.max_colwidth', None)  # Show full content in each cell

# Now print the whole DataFrame
df


Unnamed: 0,track_name,artist_id,genre_id,bpm,energy,danceability,loudness,liveness,valence,length,acousticness,speechiness,popularity
0,Señorita,1,1,117,0.55,0.76,-6,0.08,0.75,191,0.04,0.03,79
1,China,2,2,105,0.81,0.79,-4,0.08,0.61,302,0.08,0.09,92
2,boyfriend (with Social House),3,3,190,0.8,0.4,-4,0.16,0.7,186,0.12,0.46,85
3,Beautiful People (feat. Khalid),4,4,93,0.65,0.64,-8,0.08,0.55,198,0.12,0.19,86
4,Goodbyes (Feat. Young Thug),5,5,150,0.65,0.58,-4,0.11,0.18,175,0.45,0.07,94
5,I Don't Care (with Justin Bieber),4,4,102,0.68,0.8,-5,0.09,0.84,220,0.09,0.04,84
6,Ransom,6,6,180,0.64,0.75,-6,0.07,0.23,131,0.02,0.29,92
7,How Do You Sleep?,7,4,111,0.68,0.48,-5,0.08,0.35,202,0.15,0.09,90
8,Old Town Road - Remix,8,7,136,0.62,0.88,-6,0.11,0.64,157,0.05,0.1,87
9,bad guy,9,8,135,0.43,0.7,-11,0.1,0.56,194,0.33,0.38,95
