In [9]:
library(dplyr)
library(randomForest)
library(caret)
library(tidyverse)
library(recipes)
library(boot)
library(tidymodels)
library(ranger)
library(xgboost)
library(e1071)       #for calculating variable importance
library(rpart)       #for fitting decision trees
library(ipred)
library(SimDesign)


Attaching package: ‘SimDesign’


The following objects are masked from ‘package:caret’:

    MAE, RMSE




In [2]:
songs <- read.csv('processed_songs.csv', stringsAsFactors = TRUE)
head(songs, 10)

Unnamed: 0_level_0,instance_id,artist_name,track_name,popularity,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,obtained_date,valence,music_genre
Unnamed: 0_level_1,<int>,<fct>,<fct>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<dbl>,<dbl>,<int>,<dbl>,<dbl>,<fct>,<dbl>,<fct>
1,32894,Röyksopp,Röyksopp's Night Out,0,0.00468,0.652,-1,0.941,0.792,2,0.115,-5.201,1,0.0748,100.889,4-Apr,0.759,Electronic
2,46652,Thievery Corporation,The Shining Path,0,0.0127,0.622,218293,0.89,0.95,6,0.124,-7.043,1,0.03,115.002,4-Apr,0.531,Electronic
3,30097,Dillon Francis,Hurricane,0,0.00306,0.62,215613,0.755,0.0118,12,0.534,-4.617,0,0.0345,127.994,4-Apr,0.333,Electronic
4,62177,Dubloadz,Nitro,0,0.0254,0.774,166875,0.7,0.00253,5,0.157,-4.498,0,0.239,128.014,4-Apr,0.27,Electronic
5,24907,What So Not,Divide & Conquer,0,0.00465,0.638,222369,0.587,0.909,10,0.157,-6.266,0,0.0413,145.036,4-Apr,0.323,Electronic
6,43760,Jordan Comolli,Clash,0,0.0289,0.572,214408,0.803,7.74e-06,3,0.106,-4.294,0,0.351,149.995,4-Apr,0.23,Electronic
7,30738,Hraach,Delirio,0,0.0297,0.809,416132,0.706,0.903,11,0.0635,-9.339,1,0.0484,120.008,4-Apr,0.761,Electronic
8,84950,Kayzo,NEVER ALONE,0,0.00299,0.509,292800,0.921,0.000276,9,0.178,-3.175,1,0.268,149.948,4-Apr,0.273,Electronic
9,56950,Shlump,Lazer Beam,0,0.00934,0.578,204800,0.731,0.0112,1,0.111,-7.091,1,0.173,139.933,4-Apr,0.203,Electronic
10,49030,Chase & Status,Lost & Not Found - Acoustic,0,0.855,0.607,170463,0.158,0.0,10,0.106,-13.787,1,0.0345,57.528,4-Apr,0.307,Electronic


In [3]:
set.seed(11111)
feats <- names(songs)[c(5:11,13:15,17)]
train_songs <- songs %>%
  mutate_if(is.numeric, scale)

training_songs <- sample(1:nrow(train_songs), nrow(train_songs)*.75, replace = FALSE)
train_set <- train_songs[training_songs, c('music_genre', feats)] 
test_set <- train_songs[-training_songs, c('music_genre', feats)] 

# Random Forest 

In [4]:
songs_rf <- randomForest(music_genre~., data = train_set, mtry = 4)

pred_train <- predict(songs_rf)
pred_test <- predict(songs_rf, test_set)

In [5]:
confusionMatrix(pred_train, as.factor(train_set$music_genre))

Confusion Matrix and Statistics

             Reference
Prediction    Alternative Anime Blues Classical Country Electronic Hip-Hop Jazz
  Alternative         658   304   233        94     145        198     146   87
  Anime               348  1577   207       156     159        183      30  125
  Blues               292   178  1344        50     323        157      59  479
  Classical             9   381    39      2580       5         12       3  241
  Country             343   264   422        11    1878         63      75  122
  Electronic          280   223   151        55      32       2062     130  435
  Hip-Hop             328    31    85         4     136        152    1335  137
  Jazz                204   200   471       169     126        370      61 1625
  Rap                 304    38    53         0     136         86    1526   82
  Rock                559   184   363        19     427         97      33   55
             Reference
Prediction     Rap Rock
  Alternative  24

In [6]:
confusionMatrix(pred_test, as.factor(test_set$music_genre))

Confusion Matrix and Statistics

             Reference
Prediction    Alternative Anime Blues Classical Country Electronic Hip-Hop Jazz
  Alternative         247   100    60        33      50         62      60   33
  Anime               114   546    66        65      48         64       9   45
  Blues                87    62   410        15     111         48      14  159
  Classical             4    97    17       912       3          3       0   85
  Country             122    82   115         7     626         22      19   46
  Electronic          102    83    44        11       7        635      37  129
  Hip-Hop             131    12    33         0      48         49     453   55
  Jazz                 66    79   185        63      28        118      23  514
  Rap                 106     8    14         0      52         29     496   18
  Rock                189    39   121         5     146         38      10   15
             Reference
Prediction    Rap Rock
  Alternative  70 

In [7]:
var(as.numeric(pred_test), as.numeric(test_set$music_genre))

In [10]:
bias(as.numeric(pred_test), as.numeric(test_set$music_genre))

# Boosting

In [24]:
train_resp <- train_songs[training_songs, 'music_genre']
test_resp <- train_songs[-training_songs, 'music_genre']

In [25]:
matrix_train_gb <- xgb.DMatrix(data = as.matrix(train_set[,-1]), label = as.integer(as.factor(train_set[,1])))
matrix_test_gb <- xgb.DMatrix(data = as.matrix(test_set[,-1]), label = as.integer(as.factor(test_set[,1])))

model_gb <- xgboost(data = matrix_train_gb, 
                    nrounds = 50,
                    verbose = FALSE,
                    params = list(objective = "multi:softmax",
                                  num_class = 10 + 1))

predict_gb_one <- predict(model_gb, matrix_test_gb)
predict_gb <- levels(as.factor(test_set$music_genre))[predict_gb_one]


In [26]:
confusionMatrix(as.factor(predict_gb), as.factor(test_set$music_genre))

Confusion Matrix and Statistics

             Reference
Prediction    Alternative Anime Blues Classical Country Electronic Hip-Hop Jazz
  Alternative         218    96    68        25      40         59      30   20
  Anime               124   543    55        75      37         63       9   48
  Blues                92    51   432        15     124         48      15  168
  Classical            12   102    19       914       5          6       1   83
  Country             136    89   125         6     641         23      19   41
  Electronic          115    79    37        11       9        632      33  118
  Hip-Hop             118     4    28         0      48         54     509   48
  Jazz                 79    74   175        56      27        104      28  526
  Rap                 100    14    19         0      50         35     462   23
  Rock                174    56   107         9     138         44      15   24
             Reference
Prediction    Rap Rock
  Alternative  68 

In [27]:
var(as.numeric(predict_gb_one), as.numeric(test_set$music_genre))

In [28]:
bias(as.numeric(predict_gb_one), as.numeric(test_set$music_genre))

# Bagging

In [None]:
gbag <- bagging(music_genre ~ ., data = train_set, coob=TRUE)
predict_bag <- predict(gbag, newdata=test_set)

In [20]:
confusionMatrix(as.factor(predict_bag), as.factor(test_set$music_genre))

Confusion Matrix and Statistics

             Reference
Prediction    Alternative Anime Blues Classical Country Electronic Hip-Hop Jazz
  Alternative         214   100    67        28      71         78      66   37
  Anime               116   516    75        87      51         58      11   40
  Blues                85    59   363        23     122         55      16  157
  Classical             9    91    14       886       2          2       1   82
  Country             117    88   120         6     561         24      15   41
  Electronic          100    83    50        11      11        592      44  137
  Hip-Hop             133    17    31         0      50         49     390   52
  Jazz                 56    76   175        61      24        138      22  483
  Rap                  99     7    20         2      55         29     542   31
  Rock                239    71   150         7     172         43      14   39
             Reference
Prediction    Rap Rock
  Alternative  70 

In [21]:
var(as.numeric(predict_bag), as.numeric(test_set$music_genre))

In [22]:
bias(as.numeric(predict_bag), as.numeric(test_set$music_genre))