In [1]:
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.regression import LinearRegression
from pyspark.ml import Pipeline
from pyspark.ml.evaluation import RegressionEvaluator

In [2]:
spotify_sdf=spark.read.parquet('gs://my-bucket-mpat/cleaned/Final_clean_data2.parquet')

                                                                                

# TEST LINEAR REG

In [3]:
features = ['artist_followers','danceability', 'instrumentalness', 'liveness', 'valence', 'energy', 'key', 
                'speechiness', 'acousticness', 'duration', 'loudness', 'tempo', 'time_signature', 'artist_popularity']

label='track_popularity'

In [4]:

# Assemble features into a single feature vector column
assembler = VectorAssembler(inputCols=features, outputCol="features")

# Initialize Linear Regression model
lr = LinearRegression(labelCol="track_popularity", featuresCol="features")

# Split the data into training and testing sets (80% training, 20% testing)
(train_data, test_data) = spotify_sdf.randomSplit([0.8, 0.2], seed=123)

# Define Pipeline
pipeline = Pipeline(stages=[assembler, lr])




In [6]:
from pyspark.ml.tuning import CrossValidator, ParamGridBuilder
from pyspark.ml.evaluation import BinaryClassificationEvaluator


evaluator = RegressionEvaluator(labelCol='track_popularity')


# Create a grid to hold hyperparameters
grid = ParamGridBuilder()

# Build the parameter grid
grid = grid.build()

# Create the CrossValidator using the hyperparameter grid
cv = CrossValidator(estimator=pipeline, estimatorParamMaps=grid, evaluator=evaluator, numFolds=3)

# Train the models
all_models = cv.fit(train_data)

# Get the best model from all of the models trained
bestModel = all_models.bestModel

# Use the model 'bestModel' to predict the test set
test_results = bestModel.transform(test_data)

# Show the predicted tip
test_results.select('prediction','track_popularity','artist_followers','danceability', 'instrumentalness', 'liveness', 'valence', 'energy', 'key', 
                'speechiness').show(truncate=False)

# Calculate RMSE and R2
rmse = evaluator.evaluate(test_results, {evaluator.metricName:'rmse'})
r2 =evaluator.evaluate(test_results,{evaluator.metricName:'r2'})
print(f"RMSE: {rmse} R-squared:{r2}")

24/05/03 19:42:02 WARN Instrumentation: [983a0f72] regParam is zero, which might cause numerical instability and overfitting.
24/05/03 19:42:28 WARN Instrumentation: [ba1e71eb] regParam is zero, which might cause numerical instability and overfitting.
24/05/03 19:42:39 WARN Instrumentation: [7cff929f] regParam is zero, which might cause numerical instability and overfitting.
24/05/03 19:42:47 WARN Instrumentation: [b2735554] regParam is zero, which might cause numerical instability and overfitting.
24/05/03 19:42:53 WARN SparkStringUtils: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.
                                                                                

+------------------+----------------+----------------+------------+----------------+--------+-------+------+----+-----------+
|prediction        |track_popularity|artist_followers|danceability|instrumentalness|liveness|valence|energy|key |speechiness|
+------------------+----------------+----------------+------------+----------------+--------+-------+------+----+-----------+
|45.98961951236973 |49              |489658          |0.552       |0.0             |0.111   |0.714  |0.804 |8.0 |0.0303     |
|36.68736457014061 |18              |32614           |0.44        |0.0             |0.232   |0.7    |0.525 |5.0 |0.0324     |
|34.19395754491041 |43              |63056           |0.739       |0.77            |0.0983  |0.25   |0.808 |11.0|0.0776     |
|26.605806623479467|36              |6597            |0.49        |0.0             |0.312   |0.612  |0.949 |8.0 |0.0565     |
|19.81142103725319 |35              |181             |0.596       |0.853           |0.0856  |0.339  |0.0199|2.0 |0.308



RMSE: 20.792239815613907 R-squared:0.31997263760400807


                                                                                

# DURATION SCALING

In [7]:
train_data.printSchema()

root
 |-- track_name: string (nullable = true)
 |-- artists_names: string (nullable = true)
 |-- track_uri: string (nullable = true)
 |-- track_popularity: long (nullable = true)
 |-- artists_uris: string (nullable = true)
 |-- playlist_uris: string (nullable = true)
 |-- artist_popularity: long (nullable = true)
 |-- artist_followers: long (nullable = true)
 |-- playlist_name: string (nullable = true)
 |-- n_tracks: double (nullable = true)
 |-- danceability: double (nullable = true)
 |-- instrumentalness: double (nullable = true)
 |-- liveness: double (nullable = true)
 |-- valence: double (nullable = true)
 |-- energy: double (nullable = true)
 |-- key: double (nullable = true)
 |-- speechiness: double (nullable = true)
 |-- acousticness: double (nullable = true)
 |-- duration: double (nullable = true)
 |-- loudness: double (nullable = true)
 |-- tempo: double (nullable = true)
 |-- time_signature: double (nullable = true)
 |-- __index_level_0__: long (nullable = true)



In [8]:
train_data.select(features).show(truncate=False)



+----------------+------------+----------------+--------+-------+------+----+-----------+------------+---------+--------+-------+--------------+-----------------+
|artist_followers|danceability|instrumentalness|liveness|valence|energy|key |speechiness|acousticness|duration |loudness|tempo  |time_signature|artist_popularity|
+----------------+------------+----------------+--------+-------+------+----+-----------+------------+---------+--------+-------+--------------+-----------------+
|78663825        |0.0         |0.0             |0.669   |0.0    |0.278 |1.0 |0.0        |0.768       |13.57837 |-21.63  |0.0    |0.0           |89               |
|24              |0.634       |0.127           |0.829   |0.82   |0.206 |10.0|0.0416     |0.927       |186.43973|-15.029 |93.24  |4.0           |0                |
|10930205        |0.371       |0.582           |0.183   |0.31   |0.545 |8.0 |0.0307     |0.185       |157.66667|-9.315  |150.316|4.0           |80               |
|489658          |0.55

                                                                                

In [9]:
spotify_sdf.groupby('time_signature').count().show()

[Stage 37:>                                                         (0 + 8) / 8]

+--------------+------+
|time_signature| count|
+--------------+------+
|           1.0|  4609|
|           5.0|  8184|
|           4.0|387748|
|           0.0|   158|
|           3.0| 44056|
+--------------+------+



                                                                                

In [10]:
spotify_sdf.groupby('key').count().show()

+----+-----+
| key|count|
+----+-----+
| 1.0|43576|
| 6.0|29433|
| 5.0|38677|
| 2.0|46137|
| 4.0|33430|
|10.0|29741|
| 8.0|29823|
| 0.0|52018|
| 7.0|50094|
|11.0|33731|
| 3.0|15605|
| 9.0|42490|
+----+-----+





In [3]:
from pyspark.ml.feature import MinMaxScaler



dur_assembler = VectorAssembler(inputCols=['duration'], outputCol='durationVector')
spotify_sdf = dur_assembler.transform(spotify_sdf)


dur_scaler = MinMaxScaler(inputCol="durationVector", outputCol="DurScaled")
spotify_sdf = dur_scaler.fit(spotify_sdf).transform(spotify_sdf)



                                                                                

In [12]:
spotify_sdf.select('DurScaled','durationVector','duration').show()

+--------------------+--------------+---------+
|           DurScaled|durationVector| duration|
+--------------------+--------------+---------+
|[0.04715336726998...|    [191.2258]| 191.2258|
|[0.04715336726998...|    [191.2258]| 191.2258|
|[0.04857783205893...|       [196.8]|    196.8|
|[0.04857783205893...|       [196.8]|    196.8|
|[0.04857783205893...|       [196.8]|    196.8|
|[0.04857783205893...|       [196.8]|    196.8|
|[0.05105307153778...|   [206.48608]|206.48608|
|[0.05745642689421...|   [231.54362]|231.54362|
|[0.06361972484723...|   [255.66177]|255.66177|
|[0.06361972484723...|   [255.66177]|255.66177|
|[0.03889175515016...|   [158.89655]|158.89655|
|[0.04675436789018...|   [189.66444]|189.66444|
|[0.05113152928589...|    [206.7931]| 206.7931|
|[0.05113152928589...|    [206.7931]| 206.7931|
|[0.04970204557252...|   [201.19926]|201.19926|
|[0.04970204557252...|   [201.19926]|201.19926|
|[0.05574772590081...|   [224.85715]|224.85715|
|[0.08440538819458...|       [337.0]|   

# SECON TEST

In [4]:
features = ['artist_followers','danceability', 'instrumentalness', 'liveness', 'valence', 'energy', 'key', 
                'speechiness', 'acousticness', 'DurScaled', 'loudness', 'tempo', 'time_signature','artist_popularity']

# Assemble features into a single feature vector column
assembler = VectorAssembler(inputCols=features, outputCol="features")

# Initialize Linear Regression model
lr = LinearRegression(labelCol="track_popularity", featuresCol="features")

# Split the data into training and testing sets (80% training, 20% testing)
(train_data, test_data) = spotify_sdf.randomSplit([0.8, 0.2], seed=123)

# Define Pipeline
pipeline = Pipeline(stages=[assembler, lr])

In [14]:
evaluator = RegressionEvaluator(labelCol='track_popularity')


# Create a grid to hold hyperparameters
grid = ParamGridBuilder()

# Build the parameter grid
grid = grid.build()

# Create the CrossValidator using the hyperparameter grid
cv = CrossValidator(estimator=pipeline, estimatorParamMaps=grid, evaluator=evaluator, numFolds=3)

# Train the models
all_models = cv.fit(train_data)

# Get the best model from all of the models trained
bestModel = all_models.bestModel

# Use the model 'bestModel' to predict the test set
test_results = bestModel.transform(test_data)

# Show the predicted tip
test_results.select('prediction','track_popularity','artist_followers','danceability', 'instrumentalness', 'liveness', 'valence', 'energy', 'key', 
                'speechiness').show(truncate=False)

# Calculate RMSE and R2
rmse = evaluator.evaluate(test_results, {evaluator.metricName:'rmse'})
r2 =evaluator.evaluate(test_results,{evaluator.metricName:'r2'})
print(f"RMSE: {rmse} R-squared:{r2}")

24/05/03 19:43:55 WARN Instrumentation: [a341613e] regParam is zero, which might cause numerical instability and overfitting.
24/05/03 19:44:09 WARN Instrumentation: [ecbbb9e5] regParam is zero, which might cause numerical instability and overfitting.
24/05/03 19:44:23 WARN Instrumentation: [2c1de1e7] regParam is zero, which might cause numerical instability and overfitting.
24/05/03 19:44:30 WARN Instrumentation: [afbdb8b0] regParam is zero, which might cause numerical instability and overfitting.
                                                                                

+------------------+----------------+----------------+------------+----------------+--------+-------+------+----+-----------+
|prediction        |track_popularity|artist_followers|danceability|instrumentalness|liveness|valence|energy|key |speechiness|
+------------------+----------------+----------------+------------+----------------+--------+-------+------+----+-----------+
|45.989619512365365|49              |489658          |0.552       |0.0             |0.111   |0.714  |0.804 |8.0 |0.0303     |
|36.68736457013523 |18              |32614           |0.44        |0.0             |0.232   |0.7    |0.525 |5.0 |0.0324     |
|34.19395754491349 |43              |63056           |0.739       |0.77            |0.0983  |0.25   |0.808 |11.0|0.0776     |
|26.605806623474695|36              |6597            |0.49        |0.0             |0.312   |0.612  |0.949 |8.0 |0.0565     |
|19.811421037255794|35              |181             |0.596       |0.853           |0.0856  |0.339  |0.0199|2.0 |0.308



RMSE: 20.792239815614032 R-squared:0.31997263760399985


                                                                                

# LOUDNESS SCALING

In [5]:
dur_assembler = VectorAssembler(inputCols=['loudness'], outputCol='loudnessVector')
spotify_sdf = dur_assembler.transform(spotify_sdf)


dur_scaler = MinMaxScaler(inputCol="loudnessVector", outputCol="loudnessScaled")
spotify_sdf = dur_scaler.fit(spotify_sdf).transform(spotify_sdf)



                                                                                

In [16]:
spotify_sdf.select('loudnessScaled','loudnessVector','loudness').show()

+--------------------+--------------+--------+
|      loudnessScaled|loudnessVector|loudness|
+--------------------+--------------+--------+
|[0.7781048672975556]|      [-9.515]|  -9.515|
|[0.7781048672975556]|      [-9.515]|  -9.515|
|[0.8658179464258192]|      [-3.824]|  -3.824|
|[0.8658179464258192]|      [-3.824]|  -3.824|
|[0.8658179464258192]|      [-3.824]|  -3.824|
|[0.8658179464258192]|      [-3.824]|  -3.824|
|[0.8044141672574828]|      [-7.808]|  -7.808|
| [0.804876545112666]|      [-7.778]|  -7.778|
| [0.860685552233285]|      [-4.157]|  -4.157|
| [0.860685552233285]|      [-4.157]|  -4.157|
|[0.7835455133935452]|      [-9.162]|  -9.162|
|[0.6799574612373231]|     [-15.883]| -15.883|
| [0.816328103326038]|      [-7.035]|  -7.035|
| [0.816328103326038]|      [-7.035]|  -7.035|
|[0.8252674085262476]|      [-6.455]|  -6.455|
|[0.8252674085262476]|      [-6.455]|  -6.455|
|[0.7997441509201318]|      [-8.111]|  -8.111|
|[0.7756542646650842]|      [-9.674]|  -9.674|
|[0.771323325

# THIRD TEST

In [8]:
features = ['artist_followers','danceability', 'instrumentalness', 'liveness', 'valence', 'energy', 'key', 
                'speechiness', 'acousticness', 'DurScaled', 'loudnessScaled', 'tempo', 'time_signature','artist_popularity']

# Assemble features into a single feature vector column
assembler = VectorAssembler(inputCols=features, outputCol="features")

# Initialize Linear Regression model
lr = LinearRegression(labelCol="track_popularity", featuresCol="features")

# Split the data into training and testing sets (80% training, 20% testing)
(train_data, test_data) = spotify_sdf.randomSplit([0.8, 0.2], seed=123)

# Define Pipeline
pipeline = Pipeline(stages=[assembler, lr])

In [18]:
evaluator = RegressionEvaluator(labelCol='track_popularity')


# Create a grid to hold hyperparameters
grid = ParamGridBuilder()

# Build the parameter grid
grid = grid.build()

# Create the CrossValidator using the hyperparameter grid
cv = CrossValidator(estimator=pipeline, estimatorParamMaps=grid, evaluator=evaluator, numFolds=3)

# Train the models
all_models = cv.fit(train_data)

# Get the best model from all of the models trained
bestModel = all_models.bestModel

# Use the model 'bestModel' to predict the test set
test_results = bestModel.transform(test_data)

# Show the predicted tip
test_results.select('prediction','track_popularity','artist_followers','danceability', 'instrumentalness', 'liveness', 'valence', 'energy', 'key', 
                'speechiness').show(truncate=False)

# Calculate RMSE and R2
rmse = evaluator.evaluate(test_results, {evaluator.metricName:'rmse'})
r2 =evaluator.evaluate(test_results,{evaluator.metricName:'r2'})
print(f"RMSE: {rmse} R-squared:{r2}")

24/05/03 19:45:24 WARN Instrumentation: [bd96e464] regParam is zero, which might cause numerical instability and overfitting.
24/05/03 19:45:38 WARN Instrumentation: [f75ca085] regParam is zero, which might cause numerical instability and overfitting.
24/05/03 19:45:53 WARN Instrumentation: [28efa9f8] regParam is zero, which might cause numerical instability and overfitting.
24/05/03 19:46:00 WARN Instrumentation: [a446f35d] regParam is zero, which might cause numerical instability and overfitting.
                                                                                

+------------------+----------------+----------------+------------+----------------+--------+-------+------+----+-----------+
|prediction        |track_popularity|artist_followers|danceability|instrumentalness|liveness|valence|energy|key |speechiness|
+------------------+----------------+----------------+------------+----------------+--------+-------+------+----+-----------+
|45.98961951237543 |49              |489658          |0.552       |0.0             |0.111   |0.714  |0.804 |8.0 |0.0303     |
|36.68736457013288 |18              |32614           |0.44        |0.0             |0.232   |0.7    |0.525 |5.0 |0.0324     |
|34.19395754491406 |43              |63056           |0.739       |0.77            |0.0983  |0.25   |0.808 |11.0|0.0776     |
|26.60580662349178 |36              |6597            |0.49        |0.0             |0.312   |0.612  |0.949 |8.0 |0.0565     |
|19.811421037230048|35              |181             |0.596       |0.853           |0.0856  |0.339  |0.0199|2.0 |0.308



RMSE: 20.792239815613993 R-squared:0.3199726376040025


                                                                                

# ENCODING KEY

In [6]:
from pyspark.ml.feature import OneHotEncoder, StringIndexer

indexer = StringIndexer(inputCols=['key'], outputCols=['KeyIndex'])
spotify_sdf = indexer.fit(spotify_sdf).transform(spotify_sdf)

                                                                                

In [7]:
encoder = OneHotEncoder(inputCols=['KeyIndex'], outputCols=['KeyVector'], dropLast=False)
spotify_sdf = encoder.fit(spotify_sdf).transform(spotify_sdf)

# FOURTH TEST

In [8]:
features = ['artist_followers','danceability', 'instrumentalness', 'liveness', 'valence', 'energy', 'KeyVector', 
                'speechiness', 'acousticness', 'DurScaled', 'loudnessScaled', 'tempo', 'time_signature','artist_popularity']

# Assemble features into a single feature vector column
assembler = VectorAssembler(inputCols=features, outputCol="features")

# Initialize Linear Regression model
lr = LinearRegression(labelCol="track_popularity", featuresCol="features")

# Split the data into training and testing sets (80% training, 20% testing)
(train_data, test_data) = spotify_sdf.randomSplit([0.8, 0.2], seed=123)

# Define Pipeline
pipeline = Pipeline(stages=[assembler, lr])

from pyspark.ml.tuning import CrossValidator, ParamGridBuilder
from pyspark.ml.evaluation import BinaryClassificationEvaluator


In [9]:
evaluator = RegressionEvaluator(labelCol='track_popularity')


# Create a grid to hold hyperparameters
grid = ParamGridBuilder()

# Build the parameter grid
grid = grid.build()

# Create the CrossValidator using the hyperparameter grid
cv = CrossValidator(estimator=pipeline, estimatorParamMaps=grid, evaluator=evaluator, numFolds=3)

# Train the models
all_models = cv.fit(train_data)

# Get the best model from all of the models trained
bestModel = all_models.bestModel

# Use the model 'bestModel' to predict the test set
test_results = bestModel.transform(test_data)

# Show the predicted tip
test_results.select('prediction','track_popularity','artist_followers','danceability', 'instrumentalness', 'liveness', 'valence', 'energy', 'key', 
                'speechiness').show(truncate=False)

# Calculate RMSE and R2
rmse = evaluator.evaluate(test_results, {evaluator.metricName:'rmse'})
r2 =evaluator.evaluate(test_results,{evaluator.metricName:'r2'})
print(f"RMSE: {rmse} R-squared:{r2}")

24/05/17 20:59:14 WARN SparkStringUtils: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.
24/05/17 20:59:25 WARN Instrumentation: [e24ea526] regParam is zero, which might cause numerical instability and overfitting.
24/05/17 20:59:48 WARN Instrumentation: [2a1b2dd6] regParam is zero, which might cause numerical instability and overfitting.
24/05/17 20:59:49 WARN Instrumentation: [2a1b2dd6] Cholesky solver failed due to singular covariance matrix. Retrying with Quasi-Newton solver.
24/05/17 21:00:06 WARN Instrumentation: [2f422dd5] regParam is zero, which might cause numerical instability and overfitting.
24/05/17 21:00:15 WARN Instrumentation: [ff83754a] regParam is zero, which might cause numerical instability and overfitting.
24/05/17 21:00:20 WARN Instrumentation: [ff83754a] Cholesky solver failed due to singular covariance matrix. Retrying with Quasi-Newton solver.
                   

+-------------------+----------------+----------------+------------+----------------+--------+-------+------+----+-----------+
|prediction         |track_popularity|artist_followers|danceability|instrumentalness|liveness|valence|energy|key |speechiness|
+-------------------+----------------+----------------+------------+----------------+--------+-------+------+----+-----------+
|52.495168660847085 |39              |10930205        |0.371       |0.582           |0.183   |0.31   |0.545 |8.0 |0.0307     |
|34.374649275115225 |43              |63056           |0.739       |0.77            |0.0983  |0.25   |0.808 |11.0|0.0776     |
|38.58767914381838  |56              |149020          |0.264       |0.0442          |0.127   |0.159  |0.951 |7.0 |0.146      |
|38.962748701295894 |31              |485841          |0.561       |1.09E-6         |0.685   |0.578  |0.491 |7.0 |0.121      |
|5.631748890333073  |0               |1873341         |0.392       |0.781           |0.0909  |0.0977 |0.29  |2.



RMSE: 20.734361287190037 R-squared:0.3277834994110199


                                                                                

In [10]:
# Create a grid to hold hyperparameters

grid = ParamGridBuilder()

# Add hyperparameters to the grid
grid = grid.addGrid(lr.regParam, [0.0, 0.2, 0.4, 0.6, 0.8, 1.0])
grid = grid.addGrid(lr.elasticNetParam, [0, 1])

# Build the grid
grid = grid.build()

print('Number of models to be tested: ', len(grid))

# Create the CrossValidator using the pipeline and the new hyperparameter grid
cv = CrossValidator(estimator=pipeline, estimatorParamMaps=grid, evaluator=evaluator, numFolds=3)

# Call cv.fit() to create models with all of the combinations of parameters in the grid
all_models = cv.fit(train_data)

# Print average metrics for each model
print("Average Metrics for Each model: ", all_models.avgMetrics)

Number of models to be tested:  12


24/05/17 21:00:47 WARN Instrumentation: [a9b91c5a] regParam is zero, which might cause numerical instability and overfitting.
24/05/17 21:00:55 WARN Instrumentation: [2f2c5e68] regParam is zero, which might cause numerical instability and overfitting.
24/05/17 21:01:28 WARN Instrumentation: [b11d9487] regParam is zero, which might cause numerical instability and overfitting.
24/05/17 21:01:29 WARN Instrumentation: [b11d9487] Cholesky solver failed due to singular covariance matrix. Retrying with Quasi-Newton solver.
24/05/17 21:01:35 WARN Instrumentation: [9263a6bb] regParam is zero, which might cause numerical instability and overfitting.
24/05/17 21:01:35 WARN Instrumentation: [9263a6bb] Cholesky solver failed due to singular covariance matrix. Retrying with Quasi-Newton solver.
24/05/17 21:02:05 WARN Instrumentation: [11d7b26c] regParam is zero, which might cause numerical instability and overfitting.
24/05/17 21:02:13 WARN Instrumentation: [c30a5ba1] regParam is zero, which might c

Average Metrics for Each model:  [20.742060602935307, 20.742060602935307, 20.74230338363797, 20.756030797042786, 20.743086542484818, 20.778510729499533, 20.74437439597971, 20.806363845442593, 20.746134084261577, 20.840046917318023, 20.748335252212737, 20.877070474630386]


                                                                                

In [14]:
bestModel = cv_model.bestModel


lr_model = bestModel.stages[-1]


coefficients = lr_model.coefficients
intercept = lr_model.intercept

print("Intercept:", intercept)
for i, feature in enumerate(features):
    print(f"Coefficient for {feature}: {coefficients[i]}")

Intercept: 9.426188764432252
Coefficient for artist_followers: 1.1258761566672822e-07
Coefficient for danceability: 8.001562344660753
Coefficient for instrumentalness: -4.839850073133193
Coefficient for liveness: -5.660768597265518
Coefficient for valence: -2.657148116154767
Coefficient for energy: -0.4001126738104274
Coefficient for KeyVector: -0.5743494121465644
Coefficient for speechiness: -0.8211715970350232
Coefficient for acousticness: -0.7185976535062186
Coefficient for DurScaled: 0.33634080805933814
Coefficient for loudnessScaled: 0.006746080839926616
Coefficient for tempo: -0.1616891564250245
Coefficient for time_signature: 0.708947477173173
Coefficient for artist_popularity: 0.5613293519339584


In [23]:
spotify_sdf.write.mode("overwrite").format("parquet").save("gs://my-bucket-mpat/trusted/spotifyDF_features.parquet")

                                                                                

In [25]:
all_models.save("gs://my-bucket-mpat/Model/all_LR_models")

                                                                                