In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import KNNImputer
from scipy import stats
import tqdm

from pyspark.sql import SparkSession
from pyspark.sql.functions import col, sum, avg, count, countDistinct
from pyspark.sql.functions import split, explode, when, regexp_extract, substring
from pyspark.sql.functions import date_format, dayofweek, dayofyear, weekofyear, quarter
from pyspark.sql.functions import corr
from pyspark.ml.stat import Correlation
from pyspark.ml.feature import VectorAssembler

import warnings
warnings.filterwarnings("ignore")

In [2]:
from IPython.display import display, HTML
display(HTML('<style>pre { white-space: pre !important; }</style>'))

### Loading the data

In [3]:
CSV_DATA_PATH = 'Dataset/spotify.csv'
PARQUET_DATA_PATH = 'Dataset/spotify.parquet'

In [4]:
spark = SparkSession.builder.appName('spotify').getOrCreate()
df_pyspark = spark.read.parquet(PARQUET_DATA_PATH, header=True, inferSchema=True)

24/05/27 17:49:36 WARN Utils: Your hostname, hadi resolves to a loopback address: 127.0.1.1; using 192.168.1.5 instead (on interface wlp2s0)
24/05/27 17:49:36 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
24/05/27 17:49:39 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
24/05/27 17:49:58 WARN GarbageCollectionMetrics: To enable non-built-in garbage collector(s) List(G1 Concurrent GC), users should configure it(them) to spark.eventLog.gcMetrics.youngGenerationGarbageCollectors or spark.eventLog.gcMetrics.oldGenerationGarbageCollectors
                                                                                

In [5]:
spark2 = SparkSession.builder.appName('spotify').getOrCreate()
df_pyspark2 = spark2.read.csv(CSV_DATA_PATH, header=True, inferSchema=True)

                                                                                

In [6]:
def show(df_pyspark, n=1):
	print(f'Number of rows: {df_pyspark.count()}')
	df_pyspark.show(n)

### Mutual Information

In [7]:
# find the tracks where track_id in df_pyspark2 is equal to id in df_pyspark
def find_track_id(df_pyspark, df_pyspark2):
	joined_df_pyspark = df_pyspark2.join(df_pyspark, df_pyspark2.track_id == df_pyspark.id, 'inner')
	# return ids of tracks
	return joined_df_pyspark.select('id')

joined_ids = find_track_id(df_pyspark, df_pyspark2)

In [8]:
# convert joined_ids to list
joined_ids_list = [row.id for row in joined_ids.collect()]
len(joined_ids_list)

                                                                                

6041

In [9]:
# select rows where id is in joined_ids_list
df_pyspark1_filtered = df_pyspark.filter(col('id').isin(joined_ids_list))
df_pyspark1_filtered.count()

                                                                                

4729

In [10]:
df_pyspark2_filtered = df_pyspark2.filter(col('track_id').isin(joined_ids_list))
df_pyspark2_filtered.count()

                                                                                

6041

In [11]:
for i in range(1):
    random_index = np.random.randint(0, len(joined_ids_list))
    df_pyspark.filter(df_pyspark.id == joined_ids_list[random_index]).show()
    df_pyspark2.filter(df_pyspark2.track_id == joined_ids_list[random_index]).show()
    

                                                                                

+--------------------+---------+--------+--------------------+-----------------+--------------------+------------+-----------+--------+------------+------+---+--------+----+-----------+------------+----------------+------------------+-------+-------+-----------+--------------+----+------------+
|                  id|     name|   album|            album_id|          artists|          artist_ids|track_number|disc_number|explicit|danceability|energy|key|loudness|mode|speechiness|acousticness|instrumentalness|          liveness|valence|  tempo|duration_ms|time_signature|year|release_date|
+--------------------+---------+--------+--------------------+-----------------+--------------------+------------+-----------+--------+------------+------+---+--------+----+-----------+------------+----------------+------------------+-------+-------+-----------+--------------+----+------------+
|1PYsy4cGp3egbeaQD...|Yottabyte|BYLAW EP|5dv1oLETxdsYOkS2S...|['Martin Garrix']|['60d24wfXkVzDSfL...|           

24/05/27 17:50:53 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: , track_id, artists, album_name, track_name, popularity, duration_ms, explicit, danceability, energy, key, loudness, mode, speechiness, acousticness, instrumentalness, liveness, valence, tempo, time_signature, track_genre
 Schema: _c0, track_id, artists, album_name, track_name, popularity, duration_ms, explicit, danceability, energy, key, loudness, mode, speechiness, acousticness, instrumentalness, liveness, valence, tempo, time_signature, track_genre
Expected: _c0 but found: 
CSV file: file:///home/hadi/Uni/semester%208/data%20science/projects/Final%20Project/Phase%201/Dataset/spotify.csv


+-----+--------------------+-------------+----------+----------+----------+-----------+--------+------------+------+---+--------+----+-----------+------------+----------------+--------+-------+-------+--------------+-----------------+
|  _c0|            track_id|      artists|album_name|track_name|popularity|duration_ms|explicit|danceability|energy|key|loudness|mode|speechiness|acousticness|instrumentalness|liveness|valence|  tempo|time_signature|      track_genre|
+-----+--------------------+-------------+----------+----------+----------+-----------+--------+------------+------+---+--------+----+-----------+------------+----------------+--------+-------+-------+--------------+-----------------+
|83513|1PYsy4cGp3egbeaQD...|Martin Garrix|  BYLAW EP| Yottabyte|        48|     210004|   False|       0.634| 0.632|  5|   -4.52|   0|     0.0313|      0.0129|         4.53E-4|   0.116|  0.449|128.075|           4.0|progressive-house|
+-----+--------------------+-------------+----------+-------

In [12]:
# add "1_" prefix to columns of df_pyspark1_filtered
df_pyspark1_filtered = df_pyspark1_filtered.toDF(*[f'1_{c}' for c in df_pyspark1_filtered.columns])
df_pyspark1_filtered.show()

+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------+-------------+----------+------------------+------------------+-----+-------------------+------+-------------+------------------+--------------------+------------------+------------------+------------------+-------------+----------------+------+--------------+
|                1_id|              1_name|             1_album|          1_album_id|           1_artists|        1_artist_ids|1_track_number|1_disc_number|1_explicit|    1_danceability|          1_energy|1_key|         1_loudness|1_mode|1_speechiness|    1_acousticness|  1_instrumentalness|        1_liveness|         1_valence|           1_tempo|1_duration_ms|1_time_signature|1_year|1_release_date|
+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------+-------------+----------+------------------+--------

In [13]:
# add "2_" prefix to columns of df_pyspark1_filtered
df_pyspark2_filtered = df_pyspark2_filtered.toDF(*[f'2_{c}' for c in df_pyspark2_filtered.columns])
df_pyspark2_filtered.show()

+-----+--------------------+--------------------+--------------------+--------------------+------------+-------------+----------+--------------+--------+-----+----------+------+-------------+--------------+------------------+----------+---------+-------+----------------+-------------+
|2__c0|          2_track_id|           2_artists|        2_album_name|        2_track_name|2_popularity|2_duration_ms|2_explicit|2_danceability|2_energy|2_key|2_loudness|2_mode|2_speechiness|2_acousticness|2_instrumentalness|2_liveness|2_valence|2_tempo|2_time_signature|2_track_genre|
+-----+--------------------+--------------------+--------------------+--------------------+------------+-------------+----------+--------------+--------+-----+----------+------+-------------+--------------+------------------+----------+---------+-------+----------------+-------------+
|    3|6lfxq3CG4xtTiEg7o...|        Kina Grannis|Crazy Rich Asians...|Can't Help Fallin...|          71|       201933|     False|         0.26

24/05/27 17:50:55 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: , track_id, artists, album_name, track_name, popularity, duration_ms, explicit, danceability, energy, key, loudness, mode, speechiness, acousticness, instrumentalness, liveness, valence, tempo, time_signature, track_genre
 Schema: _c0, track_id, artists, album_name, track_name, popularity, duration_ms, explicit, danceability, energy, key, loudness, mode, speechiness, acousticness, instrumentalness, liveness, valence, tempo, time_signature, track_genre
Expected: _c0 but found: 
CSV file: file:///home/hadi/Uni/semester%208/data%20science/projects/Final%20Project/Phase%201/Dataset/spotify.csv
                                                                                

In [14]:
# merge the two dataframes by df_pyspark1_filtered.1_id = df_pyspark2_filtered.2_track_id. only keep the rows which are present in both dataframes
final_df_pyspark = df_pyspark1_filtered.join(df_pyspark2_filtered, df_pyspark1_filtered['1_id'] == df_pyspark2_filtered['2_track_id'], 'inner')
final_df_pyspark.show()

24/05/27 17:50:55 WARN SparkStringUtils: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.
24/05/27 17:50:57 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: , track_id, artists, album_name, track_name, popularity, duration_ms, explicit, danceability, energy, key, loudness, mode, speechiness, acousticness, instrumentalness, liveness, valence, tempo, time_signature, track_genre
 Schema: _c0, track_id, artists, album_name, track_name, popularity, duration_ms, explicit, danceability, energy, key, loudness, mode, speechiness, acousticness, instrumentalness, liveness, valence, tempo, time_signature, track_genre
Expected: _c0 but found: 
CSV file: file:///home/hadi/Uni/semester%208/data%20science/projects/Final%20Project/Phase%201/Dataset/spotify.csv

+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------+-------------+----------+------------------+------------------+-----+-------------------+------+------------------+------------------+------------------+------------------+------------------+------------------+-------------+----------------+------+--------------+-----+--------------------+--------------------+--------------------+--------------------+------------+-------------+----------+--------------+--------+-----+----------+------+-------------+--------------+------------------+----------+---------+-------+----------------+-------------+
|                1_id|              1_name|             1_album|          1_album_id|           1_artists|        1_artist_ids|1_track_number|1_disc_number|1_explicit|    1_danceability|          1_energy|1_key|         1_loudness|1_mode|     1_speechiness|    1_acousticness|1_instrumentalness|        1_liven

                                                                                

In [15]:
# count the unique values of '2_track_id' in df_pyspark2_filtered
df_pyspark2_filtered.select(countDistinct('2_track_id')).show()



+--------------------------+
|count(DISTINCT 2_track_id)|
+--------------------------+
|                      4729|
+--------------------------+



                                                                                

In [16]:
final_df_pyspark.show(1)

24/05/27 17:51:24 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: , track_id, artists, album_name, track_name, popularity, duration_ms, explicit, danceability, energy, key, loudness, mode, speechiness, acousticness, instrumentalness, liveness, valence, tempo, time_signature, track_genre
 Schema: _c0, track_id, artists, album_name, track_name, popularity, duration_ms, explicit, danceability, energy, key, loudness, mode, speechiness, acousticness, instrumentalness, liveness, valence, tempo, time_signature, track_genre
Expected: _c0 but found: 
CSV file: file:///home/hadi/Uni/semester%208/data%20science/projects/Final%20Project/Phase%201/Dataset/spotify.csv

+--------------------+---------------+----------------+--------------------+---------------+--------------------+--------------+-------------+----------+--------------+--------+-----+----------+------+-------------+--------------+------------------+------------------+---------+-------+-------------+----------------+------+--------------+-----+--------------------+-----------+----------------+---------------+------------+-------------+----------+--------------+--------+-----+----------+------+-------------+--------------+------------------+----------+---------+-------+----------------+-------------+
|                1_id|         1_name|         1_album|          1_album_id|      1_artists|        1_artist_ids|1_track_number|1_disc_number|1_explicit|1_danceability|1_energy|1_key|1_loudness|1_mode|1_speechiness|1_acousticness|1_instrumentalness|        1_liveness|1_valence|1_tempo|1_duration_ms|1_time_signature|1_year|1_release_date|2__c0|          2_track_id|  2_artists|    2_album_name

                                                                                

In [17]:
# convert final_df_pyspark to pandas dataframe
final_df_pandas = final_df_pyspark.toPandas()
final_df_pandas.head()

24/05/27 17:51:34 WARN CSVHeaderChecker: CSV header does not conform to the schema.
 Header: , track_id, artists, album_name, track_name, popularity, duration_ms, explicit, danceability, energy, key, loudness, mode, speechiness, acousticness, instrumentalness, liveness, valence, tempo, time_signature, track_genre
 Schema: _c0, track_id, artists, album_name, track_name, popularity, duration_ms, explicit, danceability, energy, key, loudness, mode, speechiness, acousticness, instrumentalness, liveness, valence, tempo, time_signature, track_genre
Expected: _c0 but found: 
CSV file: file:///home/hadi/Uni/semester%208/data%20science/projects/Final%20Project/Phase%201/Dataset/spotify.csv
                                                                                

Unnamed: 0,1_id,1_name,1_album,1_album_id,1_artists,1_artist_ids,1_track_number,1_disc_number,1_explicit,1_danceability,...,2_loudness,2_mode,2_speechiness,2_acousticness,2_instrumentalness,2_liveness,2_valence,2_tempo,2_time_signature,2_track_genre
0,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,The New Abnormal,2xkZV2Hl1Omi8rk2D7t5lN,['The Strokes'],['0epOFNiUfyON9EYx7Tpr6V'],9,1,False,0.428,...,-5.423,0,0.0289,0.341,0.181,0.101,0.201,92.002,4.0,alt-rock
1,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Bloom,4N11Xw4rtuswjOeZjqKgay,['Lights & Motion'],['3ff1CmU6qfTqRAmdrq8EEG'],4,1,False,0.0789,...,-18.144,1,0.0362,0.733,0.786,0.0943,0.0409,56.449,4.0,ambient
2,2v738AcSrqklWKhBpV5alf,Uber Pussy,Pink Season,3l5PdpQHvUOXlq1w9PftPu,['Pink Guy'],['3yJnBqAoRUNeDabdYoiYWD'],11,1,True,0.87,...,-6.32,1,0.333,0.00661,1e-06,0.45,0.533,104.021,4.0,comedy
3,2BgEsaKNfHUdlh97KmvFyo,2002,Speak Your Mind (Deluxe),7lPoGKpCGgdKFAxpudhAH5,['Anne-Marie'],['1zNqDE7qDGCsyzJwohVaoX'],10,1,False,0.697,...,-2.881,0,0.117,0.0372,0.0,0.137,0.603,96.133,4.0,dance
4,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Twice As Tall,218CJKDCszsQQj7Amk7vIu,"['Burna Boy', 'Chris Martin']","['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",12,1,True,0.671,...,-7.513,0,0.195,0.272,0.0,0.405,0.355,142.0,4.0,dancehall


In [18]:
final_df_pandas.columns

Index(['1_id', '1_name', '1_album', '1_album_id', '1_artists', '1_artist_ids',
       '1_track_number', '1_disc_number', '1_explicit', '1_danceability',
       '1_energy', '1_key', '1_loudness', '1_mode', '1_speechiness',
       '1_acousticness', '1_instrumentalness', '1_liveness', '1_valence',
       '1_tempo', '1_duration_ms', '1_time_signature', '1_year',
       '1_release_date', '2__c0', '2_track_id', '2_artists', '2_album_name',
       '2_track_name', '2_popularity', '2_duration_ms', '2_explicit',
       '2_danceability', '2_energy', '2_key', '2_loudness', '2_mode',
       '2_speechiness', '2_acousticness', '2_instrumentalness', '2_liveness',
       '2_valence', '2_tempo', '2_time_signature', '2_track_genre'],
      dtype='object')

In [19]:
# reorder columns of final_df_pandas
final_df_pandas = final_df_pandas[['1_id', '2_track_id', 
                                   '1_name', '2_track_name', 
								   '1_album', '2_album_name',
                                   '1_artists', '2_artists',
                                   '1_explicit', '2_explicit',
                                   '1_danceability', '2_danceability',
                                   '1_energy', '2_energy',
                                   '1_key', '2_key',
                                   '1_loudness', '2_loudness',
                                   '1_mode', '2_mode',
                                   '1_speechiness', '2_speechiness',
                                   '1_acousticness', '2_acousticness',
                                   '1_instrumentalness', '2_instrumentalness',
                                   '1_liveness', '2_liveness',
                                   '1_valence', '2_valence',
                                   '1_tempo', '2_tempo',
                                   '1_time_signature', '2_time_signature',
                                   '1_artist_ids', 
                                   '1_album_id',
                                   '1_track_number', 
                                   '1_disc_number', 
                                   '1_duration_ms',
                                   '1_year', 
                                   '1_release_date',
                                   '2_popularity',
                                   '2_track_genre',
									]] 
final_df_pandas.head()

Unnamed: 0,1_id,2_track_id,1_name,2_track_name,1_album,2_album_name,1_artists,2_artists,1_explicit,2_explicit,...,2_time_signature,1_artist_ids,1_album_id,1_track_number,1_disc_number,1_duration_ms,1_year,1_release_date,2_popularity,2_track_genre
0,1BLOVHYYlH4JUHQGcpt75R,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,Ode To The Mets,The New Abnormal,The New Abnormal,['The Strokes'],The Strokes,False,False,...,4.0,['0epOFNiUfyON9EYx7Tpr6V'],2xkZV2Hl1Omi8rk2D7t5lN,9,1,351787,2020,2020-04-10,67,alt-rock
1,2No1A7ZuMaBGxz45jmA9Gw,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Glaciers,Bloom,Bloom,['Lights & Motion'],Lights & Motion,False,False,...,4.0,['3ff1CmU6qfTqRAmdrq8EEG'],4N11Xw4rtuswjOeZjqKgay,4,1,176373,2018,2018-02-02,49,ambient
2,2v738AcSrqklWKhBpV5alf,2v738AcSrqklWKhBpV5alf,Uber Pussy,Uber Pussy,Pink Season,Pink Season,['Pink Guy'],Pink Guy,True,True,...,4.0,['3yJnBqAoRUNeDabdYoiYWD'],3l5PdpQHvUOXlq1w9PftPu,11,1,117361,2017,2017-01-04,39,comedy
3,2BgEsaKNfHUdlh97KmvFyo,2BgEsaKNfHUdlh97KmvFyo,2002,2002,Speak Your Mind (Deluxe),Speak Your Mind (Deluxe),['Anne-Marie'],Anne-Marie,False,False,...,4.0,['1zNqDE7qDGCsyzJwohVaoX'],7lPoGKpCGgdKFAxpudhAH5,10,1,186987,2018,2018-04-27,82,dance
4,2cerDm5EbuTNJ68EIMtrF2,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Monsters You Made (feat. Chris Martin),Twice As Tall,Twice As Tall,"['Burna Boy', 'Chris Martin']",Burna Boy;Chris Martin,True,True,...,4.0,"['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",218CJKDCszsQQj7Amk7vIu,12,1,217541,2020,2020-08-13,45,dancehall


### Drop Duplicates

In [20]:
# drop duplicate rows
print(final_df_pandas.shape)
final_df_pandas.drop_duplicates(inplace=True)
print(final_df_pandas.shape)

(6041, 43)
(6027, 43)


### Converting Genre to List

In [21]:
(final_df_pandas['1_id'] == final_df_pandas['2_track_id']).all()

True

In [22]:
final_df_pandas.drop('2_track_id', axis=1, inplace=True)
final_df_pandas.rename(columns={'1_id': 'track_id'}, inplace=True)
final_df_pandas.head()

Unnamed: 0,track_id,1_name,2_track_name,1_album,2_album_name,1_artists,2_artists,1_explicit,2_explicit,1_danceability,...,2_time_signature,1_artist_ids,1_album_id,1_track_number,1_disc_number,1_duration_ms,1_year,1_release_date,2_popularity,2_track_genre
0,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,Ode To The Mets,The New Abnormal,The New Abnormal,['The Strokes'],The Strokes,False,False,0.428,...,4.0,['0epOFNiUfyON9EYx7Tpr6V'],2xkZV2Hl1Omi8rk2D7t5lN,9,1,351787,2020,2020-04-10,67,alt-rock
1,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Glaciers,Bloom,Bloom,['Lights & Motion'],Lights & Motion,False,False,0.0789,...,4.0,['3ff1CmU6qfTqRAmdrq8EEG'],4N11Xw4rtuswjOeZjqKgay,4,1,176373,2018,2018-02-02,49,ambient
2,2v738AcSrqklWKhBpV5alf,Uber Pussy,Uber Pussy,Pink Season,Pink Season,['Pink Guy'],Pink Guy,True,True,0.87,...,4.0,['3yJnBqAoRUNeDabdYoiYWD'],3l5PdpQHvUOXlq1w9PftPu,11,1,117361,2017,2017-01-04,39,comedy
3,2BgEsaKNfHUdlh97KmvFyo,2002,2002,Speak Your Mind (Deluxe),Speak Your Mind (Deluxe),['Anne-Marie'],Anne-Marie,False,False,0.697,...,4.0,['1zNqDE7qDGCsyzJwohVaoX'],7lPoGKpCGgdKFAxpudhAH5,10,1,186987,2018,2018-04-27,82,dance
4,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Monsters You Made (feat. Chris Martin),Twice As Tall,Twice As Tall,"['Burna Boy', 'Chris Martin']",Burna Boy;Chris Martin,True,True,0.671,...,4.0,"['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",218CJKDCszsQQj7Amk7vIu,12,1,217541,2020,2020-08-13,45,dancehall


In [23]:
# now if we have a track with multiple genres, we have multiple rows for that track. we will keep only one row for each track and a list of genres for that track
temp = final_df_pandas.groupby('track_id')['2_track_genre'].apply(list).reset_index()
temp.head()

Unnamed: 0,track_id,2_track_genre
0,000qpdoc97IMTBvF8gwcpy,[minimal-techno]
1,001YQlnDSduXd5LgBd66gT,"[punk-rock, ska]"
2,00BYitnjj9tACCkLapk5uS,[grunge]
3,00Y9r5SfbDdEjJhNg7laQr,[german]
4,00yJsUfhS7Tp3mPMbgmjoJ,[german]


In [24]:
# show a row with multiple genres
for row in temp.iterrows():
	if len(row[1]['2_track_genre']) > 2:
		print(row[1])
		print("==========")
		print(final_df_pandas['2_track_genre'][final_df_pandas['track_id'] == row[1]['track_id']])
		break

track_id                           02shCNmb6IvgB5jLqKjtkK
2_track_genre    [edm, electro, house, progressive-house]
Name: 22, dtype: object
2914                  edm
2916              electro
3994                house
5193    progressive-house
Name: 2_track_genre, dtype: object


In [25]:
final_df_pandas.drop('2_track_genre', axis=1, inplace=True)
final_df_pandas.drop_duplicates(inplace=True)
final_df_pandas = final_df_pandas.merge(temp, on='track_id', how='left')
final_df_pandas.head()

Unnamed: 0,track_id,1_name,2_track_name,1_album,2_album_name,1_artists,2_artists,1_explicit,2_explicit,1_danceability,...,2_time_signature,1_artist_ids,1_album_id,1_track_number,1_disc_number,1_duration_ms,1_year,1_release_date,2_popularity,2_track_genre
0,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,Ode To The Mets,The New Abnormal,The New Abnormal,['The Strokes'],The Strokes,False,False,0.428,...,4.0,['0epOFNiUfyON9EYx7Tpr6V'],2xkZV2Hl1Omi8rk2D7t5lN,9,1,351787,2020,2020-04-10,67,"[alt-rock, garage]"
1,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Glaciers,Bloom,Bloom,['Lights & Motion'],Lights & Motion,False,False,0.0789,...,4.0,['3ff1CmU6qfTqRAmdrq8EEG'],4N11Xw4rtuswjOeZjqKgay,4,1,176373,2018,2018-02-02,49,[ambient]
2,2v738AcSrqklWKhBpV5alf,Uber Pussy,Uber Pussy,Pink Season,Pink Season,['Pink Guy'],Pink Guy,True,True,0.87,...,4.0,['3yJnBqAoRUNeDabdYoiYWD'],3l5PdpQHvUOXlq1w9PftPu,11,1,117361,2017,2017-01-04,39,[comedy]
3,2BgEsaKNfHUdlh97KmvFyo,2002,2002,Speak Your Mind (Deluxe),Speak Your Mind (Deluxe),['Anne-Marie'],Anne-Marie,False,False,0.697,...,4.0,['1zNqDE7qDGCsyzJwohVaoX'],7lPoGKpCGgdKFAxpudhAH5,10,1,186987,2018,2018-04-27,82,"[dance, house, pop]"
4,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Monsters You Made (feat. Chris Martin),Twice As Tall,Twice As Tall,"['Burna Boy', 'Chris Martin']",Burna Boy;Chris Martin,True,True,0.671,...,4.0,"['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",218CJKDCszsQQj7Amk7vIu,12,1,217541,2020,2020-08-13,45,[dancehall]


In [26]:
temp = final_df_pandas.groupby('track_id')
count = 0
# show groups with more than one row
for name, group in temp:
	if count == 5:
		break
	if len(group) > 1:
		if group['2_popularity'].nunique() == 1:	
			print(group)
			print("==========")
			count += 1

In [27]:
for name, group in temp:
	if (len(group) > 1) and (group['2_popularity'].nunique() != 1):
		# keep only the first row in group
		final_df_pandas.drop(group.index[1:], inplace=True)

final_df_pandas.reset_index(drop=True, inplace=True)
final_df_pandas.head()

Unnamed: 0,track_id,1_name,2_track_name,1_album,2_album_name,1_artists,2_artists,1_explicit,2_explicit,1_danceability,...,2_time_signature,1_artist_ids,1_album_id,1_track_number,1_disc_number,1_duration_ms,1_year,1_release_date,2_popularity,2_track_genre
0,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,Ode To The Mets,The New Abnormal,The New Abnormal,['The Strokes'],The Strokes,False,False,0.428,...,4.0,['0epOFNiUfyON9EYx7Tpr6V'],2xkZV2Hl1Omi8rk2D7t5lN,9,1,351787,2020,2020-04-10,67,"[alt-rock, garage]"
1,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Glaciers,Bloom,Bloom,['Lights & Motion'],Lights & Motion,False,False,0.0789,...,4.0,['3ff1CmU6qfTqRAmdrq8EEG'],4N11Xw4rtuswjOeZjqKgay,4,1,176373,2018,2018-02-02,49,[ambient]
2,2v738AcSrqklWKhBpV5alf,Uber Pussy,Uber Pussy,Pink Season,Pink Season,['Pink Guy'],Pink Guy,True,True,0.87,...,4.0,['3yJnBqAoRUNeDabdYoiYWD'],3l5PdpQHvUOXlq1w9PftPu,11,1,117361,2017,2017-01-04,39,[comedy]
3,2BgEsaKNfHUdlh97KmvFyo,2002,2002,Speak Your Mind (Deluxe),Speak Your Mind (Deluxe),['Anne-Marie'],Anne-Marie,False,False,0.697,...,4.0,['1zNqDE7qDGCsyzJwohVaoX'],7lPoGKpCGgdKFAxpudhAH5,10,1,186987,2018,2018-04-27,82,"[dance, house, pop]"
4,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Monsters You Made (feat. Chris Martin),Twice As Tall,Twice As Tall,"['Burna Boy', 'Chris Martin']",Burna Boy;Chris Martin,True,True,0.671,...,4.0,"['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",218CJKDCszsQQj7Amk7vIu,12,1,217541,2020,2020-08-13,45,[dancehall]


In [28]:
print(len(final_df_pandas))
print(final_df_pandas['track_id'].nunique())

4729
4729


### Merging the columns

In [29]:
# check if '1_name' and '2_track_name' are equal
(final_df_pandas['1_name'] == final_df_pandas['2_track_name']).all()

False

In [30]:
final_df_pandas[final_df_pandas['1_name'] != final_df_pandas['2_track_name']]

Unnamed: 0,track_id,1_name,2_track_name,1_album,2_album_name,1_artists,2_artists,1_explicit,2_explicit,1_danceability,...,2_time_signature,1_artist_ids,1_album_id,1_track_number,1_disc_number,1_duration_ms,1_year,1_release_date,2_popularity,2_track_genre
21,3KhhGdo024E5njEJBsQUpU,haveTomorrow's Another Day,Tomorrow's Another Day,Collie Buddz,Collie Buddz,['Collie Buddz'],Collie Buddz,False,False,0.706,...,4.0,['5Ayl2bJtN5mdCsxZoxs9n1'],2BGWnHTib774X9f7RPoato,4,1,195800,2007,2007-07-02,34,[dancehall]
37,4KoSQS6Bs9ZjgkdvR7UFJM,No Es Fácil Amar,No Es Fácil Amar,Super Pop Venezuela,Super Pop Venezuela,['Los Amigos Invisibles'],Los Amigos Invisibles,False,False,0.732,...,4.0,['5x3mrCTZmkoTXURN7pWdGN'],06UChHNzoOpOKngPglcvLE,5,1,249520,2006,2006-09-05,29,[afrobeat]
96,5jjZikDrEd0by1o7V3fO4y,Overwhelmed,overwhelmed,Overwhelmed,Overwhelmed,['Royal & the Serpent'],Royal & the Serpent,False,False,0.804,...,4.0,['64EHXDoln95lnccszdPum0'],7E7HRywUyuqW5qH793yPbP,1,1,159293,2020,2020-06-26,70,"[alternative, electro, indie-pop, indie]"
131,2GnYCy70I8cR4oEA2wsSDx,"The Hours: An Unwelcome Friend (From ""The Hours"")","""The Hours: An Unwelcome Friend (From """"The Ho...","Philip Glass: Glassworlds, Vol. 4 – On Love","Philip Glass: Glassworlds, Vol. 4 – On Love","['Philip Glass', 'Nicolas Horvath']",Philip Glass;Nicolas Horvath,False,False,0.253,...,3.0,"['69lxxQvsfAIoQbB20bEPFC', '6EyYsJlQEb3uHINVS5...",2HItrNjxkcndzejvNPdE6v,7,1,256147,2016,2016-06-10,59,[ambient]
173,3D8dwH690MXQRhtIZTSS9c,"Leaving, On a Jet Plane - ""Greatest Hits"" Version","""Leaving, On a Jet Plane - """"Greatest Hits"""" V...",John Denver's Greatest Hits,John Denver's Greatest Hits,['John Denver'],John Denver,False,False,0.406,...,4.0,['7EK1bQADBoqbYXnT4Cqv9w'],4zn2Kj85Hew0USyxc4TJEX,6,1,245533,1973,1973-11-01,62,"[country, folk, singer-songwriter, songwriter]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4673,7kmYuFYVsk3UCIw5wuE6B1,Love Is A Stranger - Remastered Version,Love Is a Stranger - Remastered Version,Ultimate Collection,Ultimate Collection,"['Eurythmics', 'Annie Lennox', 'Dave Stewart']",Eurythmics;Annie Lennox;Dave Stewart,False,False,0.780,...,4.0,"['0NKDgy9j66h3DLnN8qu1bB', '5MspMQqdVbdwP6ax3G...",1t7fYXGuOKQ61AY4qRtIot,2,1,222613,2005,2005-11-08,23,[synth-pop]
4694,7o9PkTfxL0Mm7gk3Bp98hv,I See You Baby - Fatboy Slim Radio Edit,I See You Baby (feat. Gramma Funk) - Fatboy Sl...,Groove Armada Greatest Hits,Groove Armada Greatest Hits,"['Groove Armada', 'Gramma Funk']",Groove Armada;Gramma Funk,False,False,0.572,...,4.0,"['67tgMwUfnmqzYsNAtnP6YJ', '09dzhikZDQAWGs8jyI...",4gAC6u1x2Bld7EBhJppCnQ,3,1,244707,2007,2007,53,[trip-hop]
4702,5s3lnmStpDUDDSJ1curTd7,Otra Vez (feat. Sabina Sciubba),Otra Vez,Amatoria,Amatoria,['Federico Aubele'],Federico Aubele;Sabina Sciuba,False,False,0.702,...,4.0,['3FIFlkBCqeOewVa9hiNgwq'],7u64RVEfkE88QvI2jyWuDW,3,1,194080,2009,2009-05-19,19,[trip-hop]
4719,2R26Us31r9gJQhv23xXzuy,United - Single Long Version,United (Single Long Version),"The Past, The Present, The Future","The Past, The Present, The Future","[""Mark 'Oh""]",Mark 'Oh,False,False,0.538,...,4.0,['3eLW10XmpXVzwszGQ9gDXX'],33ynyiIWIDFpexO0ikTInX,5,1,238307,2009,2009-11-06,25,[techno]


In [31]:
final_df_pandas.drop('2_track_name', axis=1, inplace=True)
final_df_pandas.rename(columns={'1_name': 'track_name'}, inplace=True)
final_df_pandas.head()

Unnamed: 0,track_id,track_name,1_album,2_album_name,1_artists,2_artists,1_explicit,2_explicit,1_danceability,2_danceability,...,2_time_signature,1_artist_ids,1_album_id,1_track_number,1_disc_number,1_duration_ms,1_year,1_release_date,2_popularity,2_track_genre
0,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,The New Abnormal,The New Abnormal,['The Strokes'],The Strokes,False,False,0.428,0.426,...,4.0,['0epOFNiUfyON9EYx7Tpr6V'],2xkZV2Hl1Omi8rk2D7t5lN,9,1,351787,2020,2020-04-10,67,"[alt-rock, garage]"
1,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Bloom,Bloom,['Lights & Motion'],Lights & Motion,False,False,0.0789,0.0789,...,4.0,['3ff1CmU6qfTqRAmdrq8EEG'],4N11Xw4rtuswjOeZjqKgay,4,1,176373,2018,2018-02-02,49,[ambient]
2,2v738AcSrqklWKhBpV5alf,Uber Pussy,Pink Season,Pink Season,['Pink Guy'],Pink Guy,True,True,0.87,0.87,...,4.0,['3yJnBqAoRUNeDabdYoiYWD'],3l5PdpQHvUOXlq1w9PftPu,11,1,117361,2017,2017-01-04,39,[comedy]
3,2BgEsaKNfHUdlh97KmvFyo,2002,Speak Your Mind (Deluxe),Speak Your Mind (Deluxe),['Anne-Marie'],Anne-Marie,False,False,0.697,0.697,...,4.0,['1zNqDE7qDGCsyzJwohVaoX'],7lPoGKpCGgdKFAxpudhAH5,10,1,186987,2018,2018-04-27,82,"[dance, house, pop]"
4,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Twice As Tall,Twice As Tall,"['Burna Boy', 'Chris Martin']",Burna Boy;Chris Martin,True,True,0.671,0.671,...,4.0,"['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",218CJKDCszsQQj7Amk7vIu,12,1,217541,2020,2020-08-13,45,[dancehall]


In [32]:
(final_df_pandas['1_album'] == final_df_pandas['2_album_name']).all()

False

In [33]:
final_df_pandas[final_df_pandas['1_album'] != final_df_pandas['2_album_name']]

Unnamed: 0,track_id,track_name,1_album,2_album_name,1_artists,2_artists,1_explicit,2_explicit,1_danceability,2_danceability,...,2_time_signature,1_artist_ids,1_album_id,1_track_number,1_disc_number,1_duration_ms,1_year,1_release_date,2_popularity,2_track_genre
220,5tsAOP03wpq2WehkxrAakt,Ships Sail Away,Haste Make | Hard Hearted Stranger,Haste Make / Hard Hearted Stranger,['Mandolin Orange'],Watchhouse,False,False,0.436,0.42,...,3.0,['675tsBPpaZtqyiBwEf3ZEP'],5nBEPxMYprdGPngcur3JoV,8,1,216093,2011,2011-11-08,24,[bluegrass]
266,57BuFDoc2hUHmSyoL8OynI,My First Lover,Time (The Relevator),Time (The Revelator),['Gillian Welch'],Gillian Welch,False,False,0.653,0.653,...,4.0,['2H5elA2mJKrHmqkN9GSfkz'],55FP2ypQcghszSqylyBRbp,2,1,227840,2001,2001-07-31,29,[bluegrass]
308,7afCEhjQKgWLoLOUs0TX4m,I'll Be Mellow When I'm Dead,"""Weird Al"" Yankovic","""""""Weird Al"""" Yankovic""","['""Weird Al"" Yankovic']","""""""Weird Al"""" Yankovic""",False,False,0.661,0.661,...,4.0,['1bDWGdIC2hardyt55nlQgG'],1gnhuWdl30liEUM2jy9lxl,10,1,220907,1983,1983,26,[comedy]
335,0ROMalDdNg3L2HoZP4qFyJ,"The Four Seasons - Winter in F Minor, RV. 297:...",The Four Seasons & Concertos for Bassoon and V...,"""The Four Seasons & Concertos for Bassoon and ...","['Antonio Vivaldi', 'Adrian Chandler', 'La Ser...",Antonio Vivaldi;Adrian Chandler;La Serenissima,False,False,0.400,0.4,...,4.0,"['2QOIawHpSlOwXDvSqQ9YJR', '7qJsLLT3iv2Uc0Xbpn...",1mSVDEox7b18dnZ4Iblnkb,10,1,201853,2015,2015-09-04,63,[classical]
337,1u8T5EVNoIP7bxcn4ZMlsm,The Biggest Ball of Twine In Minnesota,"The Essential ""Weird Al"" Yankovic","""The Essential """"Weird Al"""" Yankovic""","['""Weird Al"" Yankovic']","""""""Weird Al"""" Yankovic""",False,False,0.724,0.724,...,4.0,['1bDWGdIC2hardyt55nlQgG'],2WMY5Qm6ztMQsAubpumeb4,14,1,407480,2009,2009,22,[comedy]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4624,35kPxG6AKmgsRPiKoiLsBr,Journey On,Ragtime: The Musical (Original Broadway Cast R...,Ragtime: The Musical (Original Broadway Cast R...,"['Marin Mazzie', 'Peter Friedman', 'Mark Jacob...",Marin Mazzie;Peter Friedman;Mark Jacoby;Rod Ca...,False,False,0.521,0.521,...,4.0,"['26HZcBntJsXRix4UH8VpL8', '4uqPgf1BAv7sl9tNx6...",1ITaTY1cwy5bnG3Dyoucr1,3,1,253333,1998,1998,24,[show-tunes]
4639,42bdU7oDyRvyRXaKbUrtfu,Victory Is Yours (Live),Victory,Victory (Live),"['Bethel Music', 'Bethany Wohrle']",Bethel Music;Bethany Wohrle,False,False,0.418,0.418,...,4.0,"['26T4yOaOoFJvUvxR87Y9HO', '26opZlzcsx1NhaN5QE...",6EsmModhcfmKIeCeNQjeDY,5,1,333080,2019,2019-01-25,44,[world-music]
4646,2JQTce5g0LUUVvhtEFEiGR,When The Children Cry,Mark &apos;Oh,Mark 'Oh,"[""Mark 'Oh""]",Mark 'Oh,False,False,0.476,0.476,...,4.0,['3eLW10XmpXVzwszGQ9gDXX'],4bUZMLuz9femb3vIBt1sbb,2,1,232787,2010,2010-05-21,26,[techno]
4670,2y5w00ZbmGhfp1bQytUtDE,Endless Alleluia (Live),Victory,Victory (Live),"['Bethel Music', 'Cory Asbury']",Bethel Music;Cory Asbury,False,False,0.394,0.394,...,4.0,"['26T4yOaOoFJvUvxR87Y9HO', '2gXmjQIxCO8lMnSncl...",6EsmModhcfmKIeCeNQjeDY,14,1,323347,2019,2019-01-25,45,[world-music]


In [34]:
final_df_pandas.drop('2_album_name', axis=1, inplace=True)
final_df_pandas.rename(columns={'1_album': 'album_name'}, inplace=True)
final_df_pandas.head()

Unnamed: 0,track_id,track_name,album_name,1_artists,2_artists,1_explicit,2_explicit,1_danceability,2_danceability,1_energy,...,2_time_signature,1_artist_ids,1_album_id,1_track_number,1_disc_number,1_duration_ms,1_year,1_release_date,2_popularity,2_track_genre
0,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,The New Abnormal,['The Strokes'],The Strokes,False,False,0.428,0.426,0.617,...,4.0,['0epOFNiUfyON9EYx7Tpr6V'],2xkZV2Hl1Omi8rk2D7t5lN,9,1,351787,2020,2020-04-10,67,"[alt-rock, garage]"
1,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Bloom,['Lights & Motion'],Lights & Motion,False,False,0.0789,0.0789,0.16,...,4.0,['3ff1CmU6qfTqRAmdrq8EEG'],4N11Xw4rtuswjOeZjqKgay,4,1,176373,2018,2018-02-02,49,[ambient]
2,2v738AcSrqklWKhBpV5alf,Uber Pussy,Pink Season,['Pink Guy'],Pink Guy,True,True,0.87,0.87,0.597,...,4.0,['3yJnBqAoRUNeDabdYoiYWD'],3l5PdpQHvUOXlq1w9PftPu,11,1,117361,2017,2017-01-04,39,[comedy]
3,2BgEsaKNfHUdlh97KmvFyo,2002,Speak Your Mind (Deluxe),['Anne-Marie'],Anne-Marie,False,False,0.697,0.697,0.683,...,4.0,['1zNqDE7qDGCsyzJwohVaoX'],7lPoGKpCGgdKFAxpudhAH5,10,1,186987,2018,2018-04-27,82,"[dance, house, pop]"
4,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Twice As Tall,"['Burna Boy', 'Chris Martin']",Burna Boy;Chris Martin,True,True,0.671,0.671,0.646,...,4.0,"['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",218CJKDCszsQQj7Amk7vIu,12,1,217541,2020,2020-08-13,45,[dancehall]


In [35]:
(final_df_pandas['1_artists'] == final_df_pandas['2_artists']).all()

False

In [36]:
# show rows with same 'track_id'
count = 0
for cur_id in final_df_pandas['track_id'].unique():
	if final_df_pandas[final_df_pandas['track_id'] == cur_id].shape[0] < 2:
		continue
	if count > 5:
		break
	# show the difference between rows with same 'track_id'
	display(final_df_pandas[final_df_pandas['track_id'] == cur_id])
	count += 1


In [37]:
final_df_pandas[final_df_pandas['1_artists'] != final_df_pandas['2_artists']]

Unnamed: 0,track_id,track_name,album_name,1_artists,2_artists,1_explicit,2_explicit,1_danceability,2_danceability,1_energy,...,2_time_signature,1_artist_ids,1_album_id,1_track_number,1_disc_number,1_duration_ms,1_year,1_release_date,2_popularity,2_track_genre
0,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,The New Abnormal,['The Strokes'],The Strokes,False,False,0.4280,0.426,0.617,...,4.0,['0epOFNiUfyON9EYx7Tpr6V'],2xkZV2Hl1Omi8rk2D7t5lN,9,1,351787,2020,2020-04-10,67,"[alt-rock, garage]"
1,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Bloom,['Lights & Motion'],Lights & Motion,False,False,0.0789,0.0789,0.160,...,4.0,['3ff1CmU6qfTqRAmdrq8EEG'],4N11Xw4rtuswjOeZjqKgay,4,1,176373,2018,2018-02-02,49,[ambient]
2,2v738AcSrqklWKhBpV5alf,Uber Pussy,Pink Season,['Pink Guy'],Pink Guy,True,True,0.8700,0.87,0.597,...,4.0,['3yJnBqAoRUNeDabdYoiYWD'],3l5PdpQHvUOXlq1w9PftPu,11,1,117361,2017,2017-01-04,39,[comedy]
3,2BgEsaKNfHUdlh97KmvFyo,2002,Speak Your Mind (Deluxe),['Anne-Marie'],Anne-Marie,False,False,0.6970,0.697,0.683,...,4.0,['1zNqDE7qDGCsyzJwohVaoX'],7lPoGKpCGgdKFAxpudhAH5,10,1,186987,2018,2018-04-27,82,"[dance, house, pop]"
4,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Twice As Tall,"['Burna Boy', 'Chris Martin']",Burna Boy;Chris Martin,True,True,0.6710,0.671,0.646,...,4.0,"['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",218CJKDCszsQQj7Amk7vIu,12,1,217541,2020,2020-08-13,45,[dancehall]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4724,6oF8ueLn5hIl4PRp17sxW6,That Would Be Enough,Hamilton (Original Broadway Cast Recording),"['Phillipa Soo', 'Lin-Manuel Miranda']",Phillipa Soo;Lin-Manuel Miranda,False,False,0.4990,0.499,0.200,...,4.0,"['2OEGI2wrCVmvavKEOMlccy', '4aXXDj9aZnlshx7mzj...",1kCHru7uhxBUdzkm4gzRQc,17,1,178446,2015,2015-09-25,63,[show-tunes]
4725,2qf1Cbi3mIFzVnKERuNwx5,In This Corner,Kinky Boots (Original Broadway Cast Recording),"['Billy Porter', 'Tory Ross', 'Daniel Stewart ...",Billy Porter;Tory Ross;Daniel Stewart Sherman;...,False,False,0.6890,0.689,0.726,...,4.0,"['3sjUEh7eXUEL5oZLAEZXD4', '5zwDN0Orwiu22wYaxl...",0VfNZpKsykX5WPuyEdBilR,11,1,296800,2013,2013-05-27,32,[show-tunes]
4726,4NUomtCzWb8WNW1DOjCa8A,El Flete,De Pura Cepa,"[""Juan D'Arienzo y su Orquesta Típica""]",Juan D'Arienzo y su Orquesta Típica,False,False,0.8480,0.848,0.381,...,4.0,['3EZYBGl6qytwvUPRBVQgOP'],02OrTuXLp4AT6uGFiEVYeB,9,1,173640,2005,2005-02-26,17,[tango]
4727,2gVNHsFCboH2rih4HZvRmQ,It Takes Two,Hairspray (Original Broadway Cast Recording),"['Matthew Morrison', 'Marissa Jaret Winokur', ...",Matthew Morrison;Marissa Jaret Winokur;Hairspr...,False,False,0.3140,0.314,0.553,...,3.0,"['3Fx1vJHj530oWC2AxnZq5z', '4zdCODAJOsWFl6I9H7...",3XPjEfpawBvohxBMDXwXXW,6,1,186027,2002,2002-08-13,29,[show-tunes]


In [38]:
final_df_pandas.drop('2_artists', axis=1, inplace=True)
final_df_pandas.rename(columns={'1_artists': 'artists'}, inplace=True)
final_df_pandas.head()

Unnamed: 0,track_id,track_name,album_name,artists,1_explicit,2_explicit,1_danceability,2_danceability,1_energy,2_energy,...,2_time_signature,1_artist_ids,1_album_id,1_track_number,1_disc_number,1_duration_ms,1_year,1_release_date,2_popularity,2_track_genre
0,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,The New Abnormal,['The Strokes'],False,False,0.428,0.426,0.617,0.617,...,4.0,['0epOFNiUfyON9EYx7Tpr6V'],2xkZV2Hl1Omi8rk2D7t5lN,9,1,351787,2020,2020-04-10,67,"[alt-rock, garage]"
1,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Bloom,['Lights & Motion'],False,False,0.0789,0.0789,0.16,0.16,...,4.0,['3ff1CmU6qfTqRAmdrq8EEG'],4N11Xw4rtuswjOeZjqKgay,4,1,176373,2018,2018-02-02,49,[ambient]
2,2v738AcSrqklWKhBpV5alf,Uber Pussy,Pink Season,['Pink Guy'],True,True,0.87,0.87,0.597,0.597,...,4.0,['3yJnBqAoRUNeDabdYoiYWD'],3l5PdpQHvUOXlq1w9PftPu,11,1,117361,2017,2017-01-04,39,[comedy]
3,2BgEsaKNfHUdlh97KmvFyo,2002,Speak Your Mind (Deluxe),['Anne-Marie'],False,False,0.697,0.697,0.683,0.683,...,4.0,['1zNqDE7qDGCsyzJwohVaoX'],7lPoGKpCGgdKFAxpudhAH5,10,1,186987,2018,2018-04-27,82,"[dance, house, pop]"
4,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Twice As Tall,"['Burna Boy', 'Chris Martin']",True,True,0.671,0.671,0.646,0.646,...,4.0,"['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",218CJKDCszsQQj7Amk7vIu,12,1,217541,2020,2020-08-13,45,[dancehall]


In [43]:
(str(final_df_pandas['1_explicit']) == final_df_pandas['2_explicit']).all()

False

In [48]:
# convert 2_explicit type from string to boolean
final_df_pandas['2_explicit'] = final_df_pandas['2_explicit'].map({'True': True, 'False': False})

In [49]:
final_df_pandas[final_df_pandas['1_explicit'] != final_df_pandas['2_explicit']]

Unnamed: 0,track_id,track_name,album_name,artists,1_explicit,2_explicit,1_danceability,2_danceability,1_energy,2_energy,...,2_time_signature,1_artist_ids,1_album_id,1_track_number,1_disc_number,1_duration_ms,1_year,1_release_date,2_popularity,2_track_genre
621,32OlwWuMpZ6b0aN2RZOeMS,Uptown Funk (feat. Bruno Mars),Uptown Special,"['Mark Ronson', 'Bruno Mars']",False,True,0.856,0.856,0.609,0.609,...,4.0,"['3hv9jJF3adDNsBSIQDqcjp', '0du5cEVh5yTK9QJze8...",3vLaOYCNCzngDf8QdBg2V1,4,1,269667,2015,2015-01-12,83,[dance]
1759,0q7oMII7kWTj1ZSX6GT6LU,Crawl Outta Love,Awake,"['ILLENIUM', 'Annika Wells']",False,True,0.376,0.377,0.723,0.723,...,4.0,"['45eNHdiiabvmbp4erw26rg', '0kErUwb6xgWfkdn0Ry...",5rvIgXvAPGWspXY4rDLkeU,2,1,242012,2017,2017-09-21,59,"[dub, dubstep]"
1984,525GY0mXYnxajolwgS8Eaz,Cooler Than Me,Cooler Than Me,['Lucky Luke'],False,True,0.889,0.889,0.716,0.716,...,4.0,['5ee4yhrWOxaxvL77BoVpVR'],5BmY6ZPLmhyIm9bRdN4mWP,1,1,179760,2019,2019-05-17,71,"[electro, electronic]"
2000,19m23w2ANVhtB7rApM6pbN,Back To U,Back To U,"['SLANDER', 'William Black']",False,True,0.586,0.586,0.499,0.499,...,4.0,"['20DZAfCuP1TKZl5KcY7z3Q', '7d5SfGXKpgS3JK8BFI...",0hKOffv9vWtya3CkEXrVhO,1,1,214400,2020,2020-04-24,57,"[dub, dubstep]"
2088,4nRyBgsqXEP2oPfzaMeZr7,Once,Ten,['Pearl Jam'],False,True,0.413,0.413,0.885,0.885,...,4.0,['1w5Kfo2jwwIPruYS2UWh56'],5B4PYA7wNN4WdEXdIJu58a,1,1,231368,1991,1991-08-27,58,[grunge]
2618,4n7jnSxVLd8QioibtTDBDq,On My Way,On My Way,"['Alan Walker', 'Sabrina Carpenter', 'Farruko']",False,True,0.509,0.509,0.689,0.689,...,4.0,"['7vk5e3vY1uw9plTHJAMwjN', '74KM79TiuVKeVCqs8Q...",1bcvtuHyO79DNAOOhHEkEm,1,1,193798,2019,2019-03-21,74,"[electro, house]"
2652,62nQ8UZVqR2RMvkJHkcO2o,Jeremy,Ten,['Pearl Jam'],False,True,0.285,0.285,0.846,0.846,...,4.0,['1w5Kfo2jwwIPruYS2UWh56'],5B4PYA7wNN4WdEXdIJu58a,6,1,318227,1991,1991-08-27,71,[grunge]
2695,2r9hCNjupNy2C2g3r6SNz6,Strongest (Alan Walker Remix),Strongest (Alan Walker Remix),"['Ina Wroldsen', 'Alan Walker']",False,True,0.722,0.722,0.787,0.787,...,4.0,"['7hssUdpvtY5oiARaUDgFZ3', '7vk5e3vY1uw9plTHJA...",3d1YhgZvB8NLZLakA5l4rk,1,1,210286,2017,2017-12-01,66,[house]
3344,0Q3SC6kEhxYagDP3bFe5K9,Keep It Mello (feat. Omar LinX),Joytime,"['Omar LinX', 'Marshmello']",False,True,0.686,0.686,0.888,0.888,...,4.0,"['6fSNDuge2fLINdnCCbpZx6', '64KEffDW9EtZ1y2vBY...",6opKASPALOkPwCfHvqIS9H,10,1,243380,2016,2016-01-08,60,[progressive-house]
3369,45bE4HXI0AwGZXfZtMp8JR,you broke me first,you broke me first,['Tate McRae'],False,True,0.642,0.667,0.374,0.373,...,4.0,['45dkTj5sMRSjrmBSBeiHym'],1RWiRfdNZKDe8VXzzf2VEc,1,1,169266,2020,2020-04-17,84,[pop]


In [50]:
final_df_pandas.drop('1_explicit', axis=1, inplace=True)
final_df_pandas.rename(columns={'2_explicit': 'explicit'}, inplace=True)
final_df_pandas.head()

Unnamed: 0,track_id,track_name,album_name,artists,explicit,1_danceability,2_danceability,1_energy,2_energy,1_key,...,2_time_signature,1_artist_ids,1_album_id,1_track_number,1_disc_number,1_duration_ms,1_year,1_release_date,2_popularity,2_track_genre
0,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,The New Abnormal,['The Strokes'],False,0.428,0.426,0.617,0.617,1,...,4.0,['0epOFNiUfyON9EYx7Tpr6V'],2xkZV2Hl1Omi8rk2D7t5lN,9,1,351787,2020,2020-04-10,67,"[alt-rock, garage]"
1,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Bloom,['Lights & Motion'],False,0.0789,0.0789,0.16,0.16,7,...,4.0,['3ff1CmU6qfTqRAmdrq8EEG'],4N11Xw4rtuswjOeZjqKgay,4,1,176373,2018,2018-02-02,49,[ambient]
2,2v738AcSrqklWKhBpV5alf,Uber Pussy,Pink Season,['Pink Guy'],True,0.87,0.87,0.597,0.597,7,...,4.0,['3yJnBqAoRUNeDabdYoiYWD'],3l5PdpQHvUOXlq1w9PftPu,11,1,117361,2017,2017-01-04,39,[comedy]
3,2BgEsaKNfHUdlh97KmvFyo,2002,Speak Your Mind (Deluxe),['Anne-Marie'],False,0.697,0.697,0.683,0.683,1,...,4.0,['1zNqDE7qDGCsyzJwohVaoX'],7lPoGKpCGgdKFAxpudhAH5,10,1,186987,2018,2018-04-27,82,"[dance, house, pop]"
4,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Twice As Tall,"['Burna Boy', 'Chris Martin']",True,0.671,0.671,0.646,0.646,7,...,4.0,"['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",218CJKDCszsQQj7Amk7vIu,12,1,217541,2020,2020-08-13,45,[dancehall]


In [51]:
(final_df_pandas['1_danceability'] == final_df_pandas['2_danceability']).all()

False

In [52]:
final_df_pandas[final_df_pandas['1_danceability'] != final_df_pandas['2_danceability']]

Unnamed: 0,track_id,track_name,album_name,artists,explicit,1_danceability,2_danceability,1_energy,2_energy,1_key,...,2_time_signature,1_artist_ids,1_album_id,1_track_number,1_disc_number,1_duration_ms,1_year,1_release_date,2_popularity,2_track_genre
0,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,The New Abnormal,['The Strokes'],False,0.4280,0.426,0.617,0.617,1,...,4.0,['0epOFNiUfyON9EYx7Tpr6V'],2xkZV2Hl1Omi8rk2D7t5lN,9,1,351787,2020,2020-04-10,67,"[alt-rock, garage]"
1,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Bloom,['Lights & Motion'],False,0.0789,0.0789,0.160,0.16,7,...,4.0,['3ff1CmU6qfTqRAmdrq8EEG'],4N11Xw4rtuswjOeZjqKgay,4,1,176373,2018,2018-02-02,49,[ambient]
2,2v738AcSrqklWKhBpV5alf,Uber Pussy,Pink Season,['Pink Guy'],True,0.8700,0.87,0.597,0.597,7,...,4.0,['3yJnBqAoRUNeDabdYoiYWD'],3l5PdpQHvUOXlq1w9PftPu,11,1,117361,2017,2017-01-04,39,[comedy]
3,2BgEsaKNfHUdlh97KmvFyo,2002,Speak Your Mind (Deluxe),['Anne-Marie'],False,0.6970,0.697,0.683,0.683,1,...,4.0,['1zNqDE7qDGCsyzJwohVaoX'],7lPoGKpCGgdKFAxpudhAH5,10,1,186987,2018,2018-04-27,82,"[dance, house, pop]"
4,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Twice As Tall,"['Burna Boy', 'Chris Martin']",True,0.6710,0.671,0.646,0.646,7,...,4.0,"['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",218CJKDCszsQQj7Amk7vIu,12,1,217541,2020,2020-08-13,45,[dancehall]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4724,6oF8ueLn5hIl4PRp17sxW6,That Would Be Enough,Hamilton (Original Broadway Cast Recording),"['Phillipa Soo', 'Lin-Manuel Miranda']",False,0.4990,0.499,0.200,0.2,2,...,4.0,"['2OEGI2wrCVmvavKEOMlccy', '4aXXDj9aZnlshx7mzj...",1kCHru7uhxBUdzkm4gzRQc,17,1,178446,2015,2015-09-25,63,[show-tunes]
4725,2qf1Cbi3mIFzVnKERuNwx5,In This Corner,Kinky Boots (Original Broadway Cast Recording),"['Billy Porter', 'Tory Ross', 'Daniel Stewart ...",False,0.6890,0.689,0.726,0.726,5,...,4.0,"['3sjUEh7eXUEL5oZLAEZXD4', '5zwDN0Orwiu22wYaxl...",0VfNZpKsykX5WPuyEdBilR,11,1,296800,2013,2013-05-27,32,[show-tunes]
4726,4NUomtCzWb8WNW1DOjCa8A,El Flete,De Pura Cepa,"[""Juan D'Arienzo y su Orquesta Típica""]",False,0.8480,0.848,0.381,0.381,9,...,4.0,['3EZYBGl6qytwvUPRBVQgOP'],02OrTuXLp4AT6uGFiEVYeB,9,1,173640,2005,2005-02-26,17,[tango]
4727,2gVNHsFCboH2rih4HZvRmQ,It Takes Two,Hairspray (Original Broadway Cast Recording),"['Matthew Morrison', 'Marissa Jaret Winokur', ...",False,0.3140,0.314,0.553,0.553,9,...,3.0,"['3Fx1vJHj530oWC2AxnZq5z', '4zdCODAJOsWFl6I9H7...",3XPjEfpawBvohxBMDXwXXW,6,1,186027,2002,2002-08-13,29,[show-tunes]


In [53]:
final_df_pandas.drop('2_danceability', axis=1, inplace=True)
final_df_pandas.rename(columns={'1_danceability': 'danceability'}, inplace=True)
final_df_pandas.head()

Unnamed: 0,track_id,track_name,album_name,artists,explicit,danceability,1_energy,2_energy,1_key,2_key,...,2_time_signature,1_artist_ids,1_album_id,1_track_number,1_disc_number,1_duration_ms,1_year,1_release_date,2_popularity,2_track_genre
0,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,The New Abnormal,['The Strokes'],False,0.428,0.617,0.617,1,1,...,4.0,['0epOFNiUfyON9EYx7Tpr6V'],2xkZV2Hl1Omi8rk2D7t5lN,9,1,351787,2020,2020-04-10,67,"[alt-rock, garage]"
1,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Bloom,['Lights & Motion'],False,0.0789,0.16,0.16,7,7,...,4.0,['3ff1CmU6qfTqRAmdrq8EEG'],4N11Xw4rtuswjOeZjqKgay,4,1,176373,2018,2018-02-02,49,[ambient]
2,2v738AcSrqklWKhBpV5alf,Uber Pussy,Pink Season,['Pink Guy'],True,0.87,0.597,0.597,7,7,...,4.0,['3yJnBqAoRUNeDabdYoiYWD'],3l5PdpQHvUOXlq1w9PftPu,11,1,117361,2017,2017-01-04,39,[comedy]
3,2BgEsaKNfHUdlh97KmvFyo,2002,Speak Your Mind (Deluxe),['Anne-Marie'],False,0.697,0.683,0.683,1,1,...,4.0,['1zNqDE7qDGCsyzJwohVaoX'],7lPoGKpCGgdKFAxpudhAH5,10,1,186987,2018,2018-04-27,82,"[dance, house, pop]"
4,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Twice As Tall,"['Burna Boy', 'Chris Martin']",True,0.671,0.646,0.646,7,7,...,4.0,"['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",218CJKDCszsQQj7Amk7vIu,12,1,217541,2020,2020-08-13,45,[dancehall]


In [54]:
final_df_pandas.drop('2_energy', axis=1, inplace=True)
final_df_pandas.rename(columns={'1_energy': 'energy'}, inplace=True)
final_df_pandas.head()

Unnamed: 0,track_id,track_name,album_name,artists,explicit,danceability,energy,1_key,2_key,1_loudness,...,2_time_signature,1_artist_ids,1_album_id,1_track_number,1_disc_number,1_duration_ms,1_year,1_release_date,2_popularity,2_track_genre
0,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,The New Abnormal,['The Strokes'],False,0.428,0.617,1,1,-5.424,...,4.0,['0epOFNiUfyON9EYx7Tpr6V'],2xkZV2Hl1Omi8rk2D7t5lN,9,1,351787,2020,2020-04-10,67,"[alt-rock, garage]"
1,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Bloom,['Lights & Motion'],False,0.0789,0.16,7,7,-18.144,...,4.0,['3ff1CmU6qfTqRAmdrq8EEG'],4N11Xw4rtuswjOeZjqKgay,4,1,176373,2018,2018-02-02,49,[ambient]
2,2v738AcSrqklWKhBpV5alf,Uber Pussy,Pink Season,['Pink Guy'],True,0.87,0.597,7,7,-6.32,...,4.0,['3yJnBqAoRUNeDabdYoiYWD'],3l5PdpQHvUOXlq1w9PftPu,11,1,117361,2017,2017-01-04,39,[comedy]
3,2BgEsaKNfHUdlh97KmvFyo,2002,Speak Your Mind (Deluxe),['Anne-Marie'],False,0.697,0.683,1,1,-2.881,...,4.0,['1zNqDE7qDGCsyzJwohVaoX'],7lPoGKpCGgdKFAxpudhAH5,10,1,186987,2018,2018-04-27,82,"[dance, house, pop]"
4,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Twice As Tall,"['Burna Boy', 'Chris Martin']",True,0.671,0.646,7,7,-7.513,...,4.0,"['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",218CJKDCszsQQj7Amk7vIu,12,1,217541,2020,2020-08-13,45,[dancehall]


In [55]:
final_df_pandas.drop('2_key', axis=1, inplace=True)
final_df_pandas.rename(columns={'1_key': 'key'}, inplace=True)
final_df_pandas.head()

Unnamed: 0,track_id,track_name,album_name,artists,explicit,danceability,energy,key,1_loudness,2_loudness,...,2_time_signature,1_artist_ids,1_album_id,1_track_number,1_disc_number,1_duration_ms,1_year,1_release_date,2_popularity,2_track_genre
0,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,The New Abnormal,['The Strokes'],False,0.428,0.617,1,-5.424,-5.423,...,4.0,['0epOFNiUfyON9EYx7Tpr6V'],2xkZV2Hl1Omi8rk2D7t5lN,9,1,351787,2020,2020-04-10,67,"[alt-rock, garage]"
1,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Bloom,['Lights & Motion'],False,0.0789,0.16,7,-18.144,-18.144,...,4.0,['3ff1CmU6qfTqRAmdrq8EEG'],4N11Xw4rtuswjOeZjqKgay,4,1,176373,2018,2018-02-02,49,[ambient]
2,2v738AcSrqklWKhBpV5alf,Uber Pussy,Pink Season,['Pink Guy'],True,0.87,0.597,7,-6.32,-6.32,...,4.0,['3yJnBqAoRUNeDabdYoiYWD'],3l5PdpQHvUOXlq1w9PftPu,11,1,117361,2017,2017-01-04,39,[comedy]
3,2BgEsaKNfHUdlh97KmvFyo,2002,Speak Your Mind (Deluxe),['Anne-Marie'],False,0.697,0.683,1,-2.881,-2.881,...,4.0,['1zNqDE7qDGCsyzJwohVaoX'],7lPoGKpCGgdKFAxpudhAH5,10,1,186987,2018,2018-04-27,82,"[dance, house, pop]"
4,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Twice As Tall,"['Burna Boy', 'Chris Martin']",True,0.671,0.646,7,-7.513,-7.513,...,4.0,"['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",218CJKDCszsQQj7Amk7vIu,12,1,217541,2020,2020-08-13,45,[dancehall]


In [56]:
final_df_pandas.drop('2_loudness', axis=1, inplace=True)
final_df_pandas.rename(columns={'1_loudness': 'loudness'}, inplace=True)
final_df_pandas.head()

Unnamed: 0,track_id,track_name,album_name,artists,explicit,danceability,energy,key,loudness,1_mode,...,2_time_signature,1_artist_ids,1_album_id,1_track_number,1_disc_number,1_duration_ms,1_year,1_release_date,2_popularity,2_track_genre
0,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,The New Abnormal,['The Strokes'],False,0.428,0.617,1,-5.424,0,...,4.0,['0epOFNiUfyON9EYx7Tpr6V'],2xkZV2Hl1Omi8rk2D7t5lN,9,1,351787,2020,2020-04-10,67,"[alt-rock, garage]"
1,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Bloom,['Lights & Motion'],False,0.0789,0.16,7,-18.144,1,...,4.0,['3ff1CmU6qfTqRAmdrq8EEG'],4N11Xw4rtuswjOeZjqKgay,4,1,176373,2018,2018-02-02,49,[ambient]
2,2v738AcSrqklWKhBpV5alf,Uber Pussy,Pink Season,['Pink Guy'],True,0.87,0.597,7,-6.32,1,...,4.0,['3yJnBqAoRUNeDabdYoiYWD'],3l5PdpQHvUOXlq1w9PftPu,11,1,117361,2017,2017-01-04,39,[comedy]
3,2BgEsaKNfHUdlh97KmvFyo,2002,Speak Your Mind (Deluxe),['Anne-Marie'],False,0.697,0.683,1,-2.881,0,...,4.0,['1zNqDE7qDGCsyzJwohVaoX'],7lPoGKpCGgdKFAxpudhAH5,10,1,186987,2018,2018-04-27,82,"[dance, house, pop]"
4,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Twice As Tall,"['Burna Boy', 'Chris Martin']",True,0.671,0.646,7,-7.513,0,...,4.0,"['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",218CJKDCszsQQj7Amk7vIu,12,1,217541,2020,2020-08-13,45,[dancehall]


In [57]:
final_df_pandas.drop('2_mode', axis=1, inplace=True)
final_df_pandas.rename(columns={'1_mode': 'mode'}, inplace=True)
final_df_pandas.head()

Unnamed: 0,track_id,track_name,album_name,artists,explicit,danceability,energy,key,loudness,mode,...,2_time_signature,1_artist_ids,1_album_id,1_track_number,1_disc_number,1_duration_ms,1_year,1_release_date,2_popularity,2_track_genre
0,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,The New Abnormal,['The Strokes'],False,0.428,0.617,1,-5.424,0,...,4.0,['0epOFNiUfyON9EYx7Tpr6V'],2xkZV2Hl1Omi8rk2D7t5lN,9,1,351787,2020,2020-04-10,67,"[alt-rock, garage]"
1,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Bloom,['Lights & Motion'],False,0.0789,0.16,7,-18.144,1,...,4.0,['3ff1CmU6qfTqRAmdrq8EEG'],4N11Xw4rtuswjOeZjqKgay,4,1,176373,2018,2018-02-02,49,[ambient]
2,2v738AcSrqklWKhBpV5alf,Uber Pussy,Pink Season,['Pink Guy'],True,0.87,0.597,7,-6.32,1,...,4.0,['3yJnBqAoRUNeDabdYoiYWD'],3l5PdpQHvUOXlq1w9PftPu,11,1,117361,2017,2017-01-04,39,[comedy]
3,2BgEsaKNfHUdlh97KmvFyo,2002,Speak Your Mind (Deluxe),['Anne-Marie'],False,0.697,0.683,1,-2.881,0,...,4.0,['1zNqDE7qDGCsyzJwohVaoX'],7lPoGKpCGgdKFAxpudhAH5,10,1,186987,2018,2018-04-27,82,"[dance, house, pop]"
4,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Twice As Tall,"['Burna Boy', 'Chris Martin']",True,0.671,0.646,7,-7.513,0,...,4.0,"['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",218CJKDCszsQQj7Amk7vIu,12,1,217541,2020,2020-08-13,45,[dancehall]


In [58]:
final_df_pandas.drop('2_speechiness', axis=1, inplace=True)
final_df_pandas.rename(columns={'1_speechiness': 'speechiness'}, inplace=True)
final_df_pandas.head()

Unnamed: 0,track_id,track_name,album_name,artists,explicit,danceability,energy,key,loudness,mode,...,2_time_signature,1_artist_ids,1_album_id,1_track_number,1_disc_number,1_duration_ms,1_year,1_release_date,2_popularity,2_track_genre
0,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,The New Abnormal,['The Strokes'],False,0.428,0.617,1,-5.424,0,...,4.0,['0epOFNiUfyON9EYx7Tpr6V'],2xkZV2Hl1Omi8rk2D7t5lN,9,1,351787,2020,2020-04-10,67,"[alt-rock, garage]"
1,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Bloom,['Lights & Motion'],False,0.0789,0.16,7,-18.144,1,...,4.0,['3ff1CmU6qfTqRAmdrq8EEG'],4N11Xw4rtuswjOeZjqKgay,4,1,176373,2018,2018-02-02,49,[ambient]
2,2v738AcSrqklWKhBpV5alf,Uber Pussy,Pink Season,['Pink Guy'],True,0.87,0.597,7,-6.32,1,...,4.0,['3yJnBqAoRUNeDabdYoiYWD'],3l5PdpQHvUOXlq1w9PftPu,11,1,117361,2017,2017-01-04,39,[comedy]
3,2BgEsaKNfHUdlh97KmvFyo,2002,Speak Your Mind (Deluxe),['Anne-Marie'],False,0.697,0.683,1,-2.881,0,...,4.0,['1zNqDE7qDGCsyzJwohVaoX'],7lPoGKpCGgdKFAxpudhAH5,10,1,186987,2018,2018-04-27,82,"[dance, house, pop]"
4,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Twice As Tall,"['Burna Boy', 'Chris Martin']",True,0.671,0.646,7,-7.513,0,...,4.0,"['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",218CJKDCszsQQj7Amk7vIu,12,1,217541,2020,2020-08-13,45,[dancehall]


In [59]:
final_df_pandas.drop('2_acousticness', axis=1, inplace=True)
final_df_pandas.rename(columns={'1_acousticness': 'acousticness'}, inplace=True)
final_df_pandas.head()

Unnamed: 0,track_id,track_name,album_name,artists,explicit,danceability,energy,key,loudness,mode,...,2_time_signature,1_artist_ids,1_album_id,1_track_number,1_disc_number,1_duration_ms,1_year,1_release_date,2_popularity,2_track_genre
0,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,The New Abnormal,['The Strokes'],False,0.428,0.617,1,-5.424,0,...,4.0,['0epOFNiUfyON9EYx7Tpr6V'],2xkZV2Hl1Omi8rk2D7t5lN,9,1,351787,2020,2020-04-10,67,"[alt-rock, garage]"
1,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Bloom,['Lights & Motion'],False,0.0789,0.16,7,-18.144,1,...,4.0,['3ff1CmU6qfTqRAmdrq8EEG'],4N11Xw4rtuswjOeZjqKgay,4,1,176373,2018,2018-02-02,49,[ambient]
2,2v738AcSrqklWKhBpV5alf,Uber Pussy,Pink Season,['Pink Guy'],True,0.87,0.597,7,-6.32,1,...,4.0,['3yJnBqAoRUNeDabdYoiYWD'],3l5PdpQHvUOXlq1w9PftPu,11,1,117361,2017,2017-01-04,39,[comedy]
3,2BgEsaKNfHUdlh97KmvFyo,2002,Speak Your Mind (Deluxe),['Anne-Marie'],False,0.697,0.683,1,-2.881,0,...,4.0,['1zNqDE7qDGCsyzJwohVaoX'],7lPoGKpCGgdKFAxpudhAH5,10,1,186987,2018,2018-04-27,82,"[dance, house, pop]"
4,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Twice As Tall,"['Burna Boy', 'Chris Martin']",True,0.671,0.646,7,-7.513,0,...,4.0,"['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",218CJKDCszsQQj7Amk7vIu,12,1,217541,2020,2020-08-13,45,[dancehall]


In [60]:
(final_df_pandas['1_instrumentalness'] == final_df_pandas['2_instrumentalness']).all()

False

In [64]:
final_df_pandas[final_df_pandas['1_instrumentalness'] != final_df_pandas['2_instrumentalness']][['1_instrumentalness', '2_instrumentalness']]

Unnamed: 0,1_instrumentalness,2_instrumentalness
0,0.179000,0.181000
1,0.786000,0.786000
29,0.000047,0.000051
83,0.000887,0.000887
89,0.010800,0.010500
...,...,...
4710,0.010900,0.011800
4719,0.016800,0.015400
4720,0.822000,0.822000
4721,0.612000,0.614000


In [65]:
final_df_pandas.drop('1_instrumentalness', axis=1, inplace=True)
final_df_pandas.rename(columns={'2_instrumentalness': 'instrumentalness'}, inplace=True)
final_df_pandas.head()

Unnamed: 0,track_id,track_name,album_name,artists,explicit,danceability,energy,key,loudness,mode,...,2_time_signature,1_artist_ids,1_album_id,1_track_number,1_disc_number,1_duration_ms,1_year,1_release_date,2_popularity,2_track_genre
0,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,The New Abnormal,['The Strokes'],False,0.428,0.617,1,-5.424,0,...,4.0,['0epOFNiUfyON9EYx7Tpr6V'],2xkZV2Hl1Omi8rk2D7t5lN,9,1,351787,2020,2020-04-10,67,"[alt-rock, garage]"
1,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Bloom,['Lights & Motion'],False,0.0789,0.16,7,-18.144,1,...,4.0,['3ff1CmU6qfTqRAmdrq8EEG'],4N11Xw4rtuswjOeZjqKgay,4,1,176373,2018,2018-02-02,49,[ambient]
2,2v738AcSrqklWKhBpV5alf,Uber Pussy,Pink Season,['Pink Guy'],True,0.87,0.597,7,-6.32,1,...,4.0,['3yJnBqAoRUNeDabdYoiYWD'],3l5PdpQHvUOXlq1w9PftPu,11,1,117361,2017,2017-01-04,39,[comedy]
3,2BgEsaKNfHUdlh97KmvFyo,2002,Speak Your Mind (Deluxe),['Anne-Marie'],False,0.697,0.683,1,-2.881,0,...,4.0,['1zNqDE7qDGCsyzJwohVaoX'],7lPoGKpCGgdKFAxpudhAH5,10,1,186987,2018,2018-04-27,82,"[dance, house, pop]"
4,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Twice As Tall,"['Burna Boy', 'Chris Martin']",True,0.671,0.646,7,-7.513,0,...,4.0,"['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",218CJKDCszsQQj7Amk7vIu,12,1,217541,2020,2020-08-13,45,[dancehall]


In [66]:
final_df_pandas.drop('2_liveness', axis=1, inplace=True)
final_df_pandas.rename(columns={'1_liveness': 'liveness'}, inplace=True)
final_df_pandas.head()

Unnamed: 0,track_id,track_name,album_name,artists,explicit,danceability,energy,key,loudness,mode,...,2_time_signature,1_artist_ids,1_album_id,1_track_number,1_disc_number,1_duration_ms,1_year,1_release_date,2_popularity,2_track_genre
0,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,The New Abnormal,['The Strokes'],False,0.428,0.617,1,-5.424,0,...,4.0,['0epOFNiUfyON9EYx7Tpr6V'],2xkZV2Hl1Omi8rk2D7t5lN,9,1,351787,2020,2020-04-10,67,"[alt-rock, garage]"
1,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Bloom,['Lights & Motion'],False,0.0789,0.16,7,-18.144,1,...,4.0,['3ff1CmU6qfTqRAmdrq8EEG'],4N11Xw4rtuswjOeZjqKgay,4,1,176373,2018,2018-02-02,49,[ambient]
2,2v738AcSrqklWKhBpV5alf,Uber Pussy,Pink Season,['Pink Guy'],True,0.87,0.597,7,-6.32,1,...,4.0,['3yJnBqAoRUNeDabdYoiYWD'],3l5PdpQHvUOXlq1w9PftPu,11,1,117361,2017,2017-01-04,39,[comedy]
3,2BgEsaKNfHUdlh97KmvFyo,2002,Speak Your Mind (Deluxe),['Anne-Marie'],False,0.697,0.683,1,-2.881,0,...,4.0,['1zNqDE7qDGCsyzJwohVaoX'],7lPoGKpCGgdKFAxpudhAH5,10,1,186987,2018,2018-04-27,82,"[dance, house, pop]"
4,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Twice As Tall,"['Burna Boy', 'Chris Martin']",True,0.671,0.646,7,-7.513,0,...,4.0,"['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",218CJKDCszsQQj7Amk7vIu,12,1,217541,2020,2020-08-13,45,[dancehall]


In [67]:
final_df_pandas.drop('2_valence', axis=1, inplace=True)
final_df_pandas.rename(columns={'1_valence': 'valence'}, inplace=True)
final_df_pandas.head()

Unnamed: 0,track_id,track_name,album_name,artists,explicit,danceability,energy,key,loudness,mode,...,2_time_signature,1_artist_ids,1_album_id,1_track_number,1_disc_number,1_duration_ms,1_year,1_release_date,2_popularity,2_track_genre
0,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,The New Abnormal,['The Strokes'],False,0.428,0.617,1,-5.424,0,...,4.0,['0epOFNiUfyON9EYx7Tpr6V'],2xkZV2Hl1Omi8rk2D7t5lN,9,1,351787,2020,2020-04-10,67,"[alt-rock, garage]"
1,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Bloom,['Lights & Motion'],False,0.0789,0.16,7,-18.144,1,...,4.0,['3ff1CmU6qfTqRAmdrq8EEG'],4N11Xw4rtuswjOeZjqKgay,4,1,176373,2018,2018-02-02,49,[ambient]
2,2v738AcSrqklWKhBpV5alf,Uber Pussy,Pink Season,['Pink Guy'],True,0.87,0.597,7,-6.32,1,...,4.0,['3yJnBqAoRUNeDabdYoiYWD'],3l5PdpQHvUOXlq1w9PftPu,11,1,117361,2017,2017-01-04,39,[comedy]
3,2BgEsaKNfHUdlh97KmvFyo,2002,Speak Your Mind (Deluxe),['Anne-Marie'],False,0.697,0.683,1,-2.881,0,...,4.0,['1zNqDE7qDGCsyzJwohVaoX'],7lPoGKpCGgdKFAxpudhAH5,10,1,186987,2018,2018-04-27,82,"[dance, house, pop]"
4,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Twice As Tall,"['Burna Boy', 'Chris Martin']",True,0.671,0.646,7,-7.513,0,...,4.0,"['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",218CJKDCszsQQj7Amk7vIu,12,1,217541,2020,2020-08-13,45,[dancehall]


In [69]:
final_df_pandas.drop('1_tempo', axis=1, inplace=True)
final_df_pandas.rename(columns={'2_tempo': 'tempo'}, inplace=True)
final_df_pandas.head()

Unnamed: 0,track_id,track_name,album_name,artists,explicit,danceability,energy,key,loudness,mode,...,2_time_signature,1_artist_ids,1_album_id,1_track_number,1_disc_number,1_duration_ms,1_year,1_release_date,2_popularity,2_track_genre
0,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,The New Abnormal,['The Strokes'],False,0.428,0.617,1,-5.424,0,...,4.0,['0epOFNiUfyON9EYx7Tpr6V'],2xkZV2Hl1Omi8rk2D7t5lN,9,1,351787,2020,2020-04-10,67,"[alt-rock, garage]"
1,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Bloom,['Lights & Motion'],False,0.0789,0.16,7,-18.144,1,...,4.0,['3ff1CmU6qfTqRAmdrq8EEG'],4N11Xw4rtuswjOeZjqKgay,4,1,176373,2018,2018-02-02,49,[ambient]
2,2v738AcSrqklWKhBpV5alf,Uber Pussy,Pink Season,['Pink Guy'],True,0.87,0.597,7,-6.32,1,...,4.0,['3yJnBqAoRUNeDabdYoiYWD'],3l5PdpQHvUOXlq1w9PftPu,11,1,117361,2017,2017-01-04,39,[comedy]
3,2BgEsaKNfHUdlh97KmvFyo,2002,Speak Your Mind (Deluxe),['Anne-Marie'],False,0.697,0.683,1,-2.881,0,...,4.0,['1zNqDE7qDGCsyzJwohVaoX'],7lPoGKpCGgdKFAxpudhAH5,10,1,186987,2018,2018-04-27,82,"[dance, house, pop]"
4,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Twice As Tall,"['Burna Boy', 'Chris Martin']",True,0.671,0.646,7,-7.513,0,...,4.0,"['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",218CJKDCszsQQj7Amk7vIu,12,1,217541,2020,2020-08-13,45,[dancehall]


In [72]:
final_df_pandas.drop('1_time_signature', axis=1, inplace=True)
final_df_pandas.rename(columns={'2_time_signature': 'time_signature'}, inplace=True)
final_df_pandas.head()

Unnamed: 0,track_id,track_name,album_name,artists,explicit,danceability,energy,key,loudness,mode,...,time_signature,1_artist_ids,1_album_id,1_track_number,1_disc_number,1_duration_ms,1_year,1_release_date,2_popularity,2_track_genre
0,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,The New Abnormal,['The Strokes'],False,0.428,0.617,1,-5.424,0,...,4.0,['0epOFNiUfyON9EYx7Tpr6V'],2xkZV2Hl1Omi8rk2D7t5lN,9,1,351787,2020,2020-04-10,67,"[alt-rock, garage]"
1,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Bloom,['Lights & Motion'],False,0.0789,0.16,7,-18.144,1,...,4.0,['3ff1CmU6qfTqRAmdrq8EEG'],4N11Xw4rtuswjOeZjqKgay,4,1,176373,2018,2018-02-02,49,[ambient]
2,2v738AcSrqklWKhBpV5alf,Uber Pussy,Pink Season,['Pink Guy'],True,0.87,0.597,7,-6.32,1,...,4.0,['3yJnBqAoRUNeDabdYoiYWD'],3l5PdpQHvUOXlq1w9PftPu,11,1,117361,2017,2017-01-04,39,[comedy]
3,2BgEsaKNfHUdlh97KmvFyo,2002,Speak Your Mind (Deluxe),['Anne-Marie'],False,0.697,0.683,1,-2.881,0,...,4.0,['1zNqDE7qDGCsyzJwohVaoX'],7lPoGKpCGgdKFAxpudhAH5,10,1,186987,2018,2018-04-27,82,"[dance, house, pop]"
4,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Twice As Tall,"['Burna Boy', 'Chris Martin']",True,0.671,0.646,7,-7.513,0,...,4.0,"['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",218CJKDCszsQQj7Amk7vIu,12,1,217541,2020,2020-08-13,45,[dancehall]


In [73]:
final_df_pandas.rename(columns={'1_artist_ids': 'artist_ids'}, inplace=True)
final_df_pandas.head()

Unnamed: 0,track_id,track_name,album_name,artists,explicit,danceability,energy,key,loudness,mode,...,time_signature,artist_ids,1_album_id,1_track_number,1_disc_number,1_duration_ms,1_year,1_release_date,2_popularity,2_track_genre
0,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,The New Abnormal,['The Strokes'],False,0.428,0.617,1,-5.424,0,...,4.0,['0epOFNiUfyON9EYx7Tpr6V'],2xkZV2Hl1Omi8rk2D7t5lN,9,1,351787,2020,2020-04-10,67,"[alt-rock, garage]"
1,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Bloom,['Lights & Motion'],False,0.0789,0.16,7,-18.144,1,...,4.0,['3ff1CmU6qfTqRAmdrq8EEG'],4N11Xw4rtuswjOeZjqKgay,4,1,176373,2018,2018-02-02,49,[ambient]
2,2v738AcSrqklWKhBpV5alf,Uber Pussy,Pink Season,['Pink Guy'],True,0.87,0.597,7,-6.32,1,...,4.0,['3yJnBqAoRUNeDabdYoiYWD'],3l5PdpQHvUOXlq1w9PftPu,11,1,117361,2017,2017-01-04,39,[comedy]
3,2BgEsaKNfHUdlh97KmvFyo,2002,Speak Your Mind (Deluxe),['Anne-Marie'],False,0.697,0.683,1,-2.881,0,...,4.0,['1zNqDE7qDGCsyzJwohVaoX'],7lPoGKpCGgdKFAxpudhAH5,10,1,186987,2018,2018-04-27,82,"[dance, house, pop]"
4,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Twice As Tall,"['Burna Boy', 'Chris Martin']",True,0.671,0.646,7,-7.513,0,...,4.0,"['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",218CJKDCszsQQj7Amk7vIu,12,1,217541,2020,2020-08-13,45,[dancehall]


In [74]:
final_df_pandas.rename(columns={'1_album_id': 'album_id'}, inplace=True)
final_df_pandas.head()

In [75]:
final_df_pandas.rename(columns={'1_track_number': 'track_number'}, inplace=True)
final_df_pandas.head()

Unnamed: 0,track_id,track_name,album_name,artists,explicit,danceability,energy,key,loudness,mode,...,time_signature,artist_ids,album_id,track_number,1_disc_number,1_duration_ms,1_year,1_release_date,2_popularity,2_track_genre
0,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,The New Abnormal,['The Strokes'],False,0.428,0.617,1,-5.424,0,...,4.0,['0epOFNiUfyON9EYx7Tpr6V'],2xkZV2Hl1Omi8rk2D7t5lN,9,1,351787,2020,2020-04-10,67,"[alt-rock, garage]"
1,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Bloom,['Lights & Motion'],False,0.0789,0.16,7,-18.144,1,...,4.0,['3ff1CmU6qfTqRAmdrq8EEG'],4N11Xw4rtuswjOeZjqKgay,4,1,176373,2018,2018-02-02,49,[ambient]
2,2v738AcSrqklWKhBpV5alf,Uber Pussy,Pink Season,['Pink Guy'],True,0.87,0.597,7,-6.32,1,...,4.0,['3yJnBqAoRUNeDabdYoiYWD'],3l5PdpQHvUOXlq1w9PftPu,11,1,117361,2017,2017-01-04,39,[comedy]
3,2BgEsaKNfHUdlh97KmvFyo,2002,Speak Your Mind (Deluxe),['Anne-Marie'],False,0.697,0.683,1,-2.881,0,...,4.0,['1zNqDE7qDGCsyzJwohVaoX'],7lPoGKpCGgdKFAxpudhAH5,10,1,186987,2018,2018-04-27,82,"[dance, house, pop]"
4,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Twice As Tall,"['Burna Boy', 'Chris Martin']",True,0.671,0.646,7,-7.513,0,...,4.0,"['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",218CJKDCszsQQj7Amk7vIu,12,1,217541,2020,2020-08-13,45,[dancehall]


In [76]:
final_df_pandas.rename(columns={'1_disc_number': 'disc_number'}, inplace=True)
final_df_pandas.head()

Unnamed: 0,track_id,track_name,album_name,artists,explicit,danceability,energy,key,loudness,mode,...,time_signature,artist_ids,album_id,track_number,disc_number,1_duration_ms,1_year,1_release_date,2_popularity,2_track_genre
0,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,The New Abnormal,['The Strokes'],False,0.428,0.617,1,-5.424,0,...,4.0,['0epOFNiUfyON9EYx7Tpr6V'],2xkZV2Hl1Omi8rk2D7t5lN,9,1,351787,2020,2020-04-10,67,"[alt-rock, garage]"
1,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Bloom,['Lights & Motion'],False,0.0789,0.16,7,-18.144,1,...,4.0,['3ff1CmU6qfTqRAmdrq8EEG'],4N11Xw4rtuswjOeZjqKgay,4,1,176373,2018,2018-02-02,49,[ambient]
2,2v738AcSrqklWKhBpV5alf,Uber Pussy,Pink Season,['Pink Guy'],True,0.87,0.597,7,-6.32,1,...,4.0,['3yJnBqAoRUNeDabdYoiYWD'],3l5PdpQHvUOXlq1w9PftPu,11,1,117361,2017,2017-01-04,39,[comedy]
3,2BgEsaKNfHUdlh97KmvFyo,2002,Speak Your Mind (Deluxe),['Anne-Marie'],False,0.697,0.683,1,-2.881,0,...,4.0,['1zNqDE7qDGCsyzJwohVaoX'],7lPoGKpCGgdKFAxpudhAH5,10,1,186987,2018,2018-04-27,82,"[dance, house, pop]"
4,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Twice As Tall,"['Burna Boy', 'Chris Martin']",True,0.671,0.646,7,-7.513,0,...,4.0,"['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",218CJKDCszsQQj7Amk7vIu,12,1,217541,2020,2020-08-13,45,[dancehall]


In [77]:
final_df_pandas.rename(columns={'1_duration_ms': 'duration_ms'}, inplace=True)
final_df_pandas.head()

Unnamed: 0,track_id,track_name,album_name,artists,explicit,danceability,energy,key,loudness,mode,...,time_signature,artist_ids,album_id,track_number,disc_number,duration_ms,1_year,1_release_date,2_popularity,2_track_genre
0,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,The New Abnormal,['The Strokes'],False,0.428,0.617,1,-5.424,0,...,4.0,['0epOFNiUfyON9EYx7Tpr6V'],2xkZV2Hl1Omi8rk2D7t5lN,9,1,351787,2020,2020-04-10,67,"[alt-rock, garage]"
1,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Bloom,['Lights & Motion'],False,0.0789,0.16,7,-18.144,1,...,4.0,['3ff1CmU6qfTqRAmdrq8EEG'],4N11Xw4rtuswjOeZjqKgay,4,1,176373,2018,2018-02-02,49,[ambient]
2,2v738AcSrqklWKhBpV5alf,Uber Pussy,Pink Season,['Pink Guy'],True,0.87,0.597,7,-6.32,1,...,4.0,['3yJnBqAoRUNeDabdYoiYWD'],3l5PdpQHvUOXlq1w9PftPu,11,1,117361,2017,2017-01-04,39,[comedy]
3,2BgEsaKNfHUdlh97KmvFyo,2002,Speak Your Mind (Deluxe),['Anne-Marie'],False,0.697,0.683,1,-2.881,0,...,4.0,['1zNqDE7qDGCsyzJwohVaoX'],7lPoGKpCGgdKFAxpudhAH5,10,1,186987,2018,2018-04-27,82,"[dance, house, pop]"
4,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Twice As Tall,"['Burna Boy', 'Chris Martin']",True,0.671,0.646,7,-7.513,0,...,4.0,"['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",218CJKDCszsQQj7Amk7vIu,12,1,217541,2020,2020-08-13,45,[dancehall]


In [80]:
final_df_pandas.rename(columns={'1_year': 'year'}, inplace=True)
final_df_pandas.head()

In [81]:
final_df_pandas.rename(columns={'1_release_date': 'release_date'}, inplace=True)
final_df_pandas.head()

Unnamed: 0,track_id,track_name,album_name,artists,explicit,danceability,energy,key,loudness,mode,...,time_signature,artist_ids,album_id,track_number,disc_number,duration_ms,year,release_date,2_popularity,2_track_genre
0,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,The New Abnormal,['The Strokes'],False,0.428,0.617,1,-5.424,0,...,4.0,['0epOFNiUfyON9EYx7Tpr6V'],2xkZV2Hl1Omi8rk2D7t5lN,9,1,351787,2020,2020-04-10,67,"[alt-rock, garage]"
1,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Bloom,['Lights & Motion'],False,0.0789,0.16,7,-18.144,1,...,4.0,['3ff1CmU6qfTqRAmdrq8EEG'],4N11Xw4rtuswjOeZjqKgay,4,1,176373,2018,2018-02-02,49,[ambient]
2,2v738AcSrqklWKhBpV5alf,Uber Pussy,Pink Season,['Pink Guy'],True,0.87,0.597,7,-6.32,1,...,4.0,['3yJnBqAoRUNeDabdYoiYWD'],3l5PdpQHvUOXlq1w9PftPu,11,1,117361,2017,2017-01-04,39,[comedy]
3,2BgEsaKNfHUdlh97KmvFyo,2002,Speak Your Mind (Deluxe),['Anne-Marie'],False,0.697,0.683,1,-2.881,0,...,4.0,['1zNqDE7qDGCsyzJwohVaoX'],7lPoGKpCGgdKFAxpudhAH5,10,1,186987,2018,2018-04-27,82,"[dance, house, pop]"
4,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Twice As Tall,"['Burna Boy', 'Chris Martin']",True,0.671,0.646,7,-7.513,0,...,4.0,"['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",218CJKDCszsQQj7Amk7vIu,12,1,217541,2020,2020-08-13,45,[dancehall]


In [82]:
final_df_pandas.rename(columns={'2_popularity': 'popularity'}, inplace=True)
final_df_pandas['popularity'] = final_df_pandas['popularity'].astype(int)
final_df_pandas.head()

Unnamed: 0,track_id,track_name,album_name,artists,explicit,danceability,energy,key,loudness,mode,...,time_signature,artist_ids,album_id,track_number,disc_number,duration_ms,year,release_date,popularity,2_track_genre
0,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,The New Abnormal,['The Strokes'],False,0.428,0.617,1,-5.424,0,...,4.0,['0epOFNiUfyON9EYx7Tpr6V'],2xkZV2Hl1Omi8rk2D7t5lN,9,1,351787,2020,2020-04-10,67,"[alt-rock, garage]"
1,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Bloom,['Lights & Motion'],False,0.0789,0.16,7,-18.144,1,...,4.0,['3ff1CmU6qfTqRAmdrq8EEG'],4N11Xw4rtuswjOeZjqKgay,4,1,176373,2018,2018-02-02,49,[ambient]
2,2v738AcSrqklWKhBpV5alf,Uber Pussy,Pink Season,['Pink Guy'],True,0.87,0.597,7,-6.32,1,...,4.0,['3yJnBqAoRUNeDabdYoiYWD'],3l5PdpQHvUOXlq1w9PftPu,11,1,117361,2017,2017-01-04,39,[comedy]
3,2BgEsaKNfHUdlh97KmvFyo,2002,Speak Your Mind (Deluxe),['Anne-Marie'],False,0.697,0.683,1,-2.881,0,...,4.0,['1zNqDE7qDGCsyzJwohVaoX'],7lPoGKpCGgdKFAxpudhAH5,10,1,186987,2018,2018-04-27,82,"[dance, house, pop]"
4,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Twice As Tall,"['Burna Boy', 'Chris Martin']",True,0.671,0.646,7,-7.513,0,...,4.0,"['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",218CJKDCszsQQj7Amk7vIu,12,1,217541,2020,2020-08-13,45,[dancehall]


In [83]:
final_df_pandas.rename(columns={'2_track_genre': 'track_genre'}, inplace=True)
final_df_pandas.head()

Unnamed: 0,track_id,track_name,album_name,artists,explicit,danceability,energy,key,loudness,mode,...,time_signature,artist_ids,album_id,track_number,disc_number,duration_ms,year,release_date,popularity,track_genre
0,1BLOVHYYlH4JUHQGcpt75R,Ode To The Mets,The New Abnormal,['The Strokes'],False,0.428,0.617,1,-5.424,0,...,4.0,['0epOFNiUfyON9EYx7Tpr6V'],2xkZV2Hl1Omi8rk2D7t5lN,9,1,351787,2020,2020-04-10,67,"[alt-rock, garage]"
1,2No1A7ZuMaBGxz45jmA9Gw,Glaciers,Bloom,['Lights & Motion'],False,0.0789,0.16,7,-18.144,1,...,4.0,['3ff1CmU6qfTqRAmdrq8EEG'],4N11Xw4rtuswjOeZjqKgay,4,1,176373,2018,2018-02-02,49,[ambient]
2,2v738AcSrqklWKhBpV5alf,Uber Pussy,Pink Season,['Pink Guy'],True,0.87,0.597,7,-6.32,1,...,4.0,['3yJnBqAoRUNeDabdYoiYWD'],3l5PdpQHvUOXlq1w9PftPu,11,1,117361,2017,2017-01-04,39,[comedy]
3,2BgEsaKNfHUdlh97KmvFyo,2002,Speak Your Mind (Deluxe),['Anne-Marie'],False,0.697,0.683,1,-2.881,0,...,4.0,['1zNqDE7qDGCsyzJwohVaoX'],7lPoGKpCGgdKFAxpudhAH5,10,1,186987,2018,2018-04-27,82,"[dance, house, pop]"
4,2cerDm5EbuTNJ68EIMtrF2,Monsters You Made (feat. Chris Martin),Twice As Tall,"['Burna Boy', 'Chris Martin']",True,0.671,0.646,7,-7.513,0,...,4.0,"['3wcj11K77LjEY1PkEazffa', '0LQoZQIV0mIs0y0XQb...",218CJKDCszsQQj7Amk7vIu,12,1,217541,2020,2020-08-13,45,[dancehall]


In [84]:
final_df_pandas.dtypes

track_id             object
track_name           object
album_name           object
artists              object
explicit               bool
danceability        float64
energy              float64
key                   int64
loudness            float64
mode                  int64
speechiness         float64
acousticness        float64
instrumentalness    float64
liveness            float64
valence             float64
tempo               float64
time_signature      float64
artist_ids           object
album_id             object
track_number          int64
disc_number           int64
duration_ms           int64
year                  int64
release_date         object
popularity            int64
track_genre          object
dtype: object

In [85]:
# save final_df_pandas to csv file
OUTPUT_CSV_PATH = 'Dataset/spotify_merged.csv'
final_df_pandas.to_csv(OUTPUT_CSV_PATH, index=False)