# NLP Methods on Music Reviews
This notebook covers ingesting and preprocessing the text of music reviews, retrieved from the Kaggle dataset [song reviews](https://www.kaggle.com/nolanbconaway/pitchfork-data)

In [30]:
# !pip install textblob

Collecting textblob
  Downloading textblob-0.17.1-py2.py3-none-any.whl (636 kB)
[K     |████████████████████████████████| 636 kB 1.4 MB/s eta 0:00:01
Installing collected packages: textblob
Successfully installed textblob-0.17.1


In [112]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from textblob import TextBlob

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.linear_model import LinearRegression

## Baseline Implementation

In [32]:
df = pd.read_parquet("datasets/processed_reviews.parquet")

#### Importing Lexicons/Counting Sentiments

In [33]:
 
def sentiment_counter(token_list):
    """Function to make list of each sentiment type"""
    pos_word_list = []
    neg_word_list = []
    neu_word_list = []

    for word in token_list:
        testimonial = TextBlob(word)
        if testimonial.sentiment.polarity >= 0.5:
            pos_word_list.append(word)
        elif testimonial.sentiment.polarity <= -0.5:
            neg_word_list.append(word)
        else:
            neu_word_list.append(word)
    return pos_word_list, neg_word_list, neu_word_list
        

In [37]:
# Adding the sentiments to the df

df["Sentiment List (Pos/Neg/Neu)"] = df['preprocessed_review'].apply(lambda x:sentiment_counter(x.split()))
# df.to_parquet("datasets/reviews_w_sentiment.parquet")
df.head()

Unnamed: 0,id,review,track,artist,url,score,best_new_music,author,author_type,date,weekday,day,month,year,preprocessed_review,Sentiment List (Pos/Neg/Neu)
0,22703,"“Trip-hop” eventually became a ’90s punchline,...",mezzanine,massive attack,http://pitchfork.com/reviews/albums/22703-mezz...,9.3,0,nate patrin,contributor,2017-01-08,6,8,1,2017,trip-hop eventually became 90 punchline music-...,"([best, famous, love, beautiful, top, sure, cr..."
1,22721,"Eight years, five albums, and two EPs in, the ...",prelapsarian,krallice,http://pitchfork.com/reviews/albums/22721-prel...,7.9,0,zoe camp,contributor,2017-01-07,5,7,1,2017,eight year five album two eps new york-based o...,"([great, best, spontaneously, good], [grim, ha..."
2,22659,Minneapolis’ Uranium Club seem to revel in bei...,all of them naturals,uranium club,http://pitchfork.com/reviews/albums/22659-all-...,7.3,0,david glickman,contributor,2017-01-07,5,7,1,2017,minneapolis uranium club seem revel aggressive...,"([many, love, talented, perfect], [vaguely, ca..."
3,22661,Kleenex began with a crash. It transpired one ...,first songs,"kleenex, liliput",http://pitchfork.com/reviews/albums/22661-firs...,9.0,1,jenn pelly,associate reviews editor,2017-01-06,4,6,1,2017,kleenex began crash transpired one night long ...,"([nice, delighted, ok, greatest, perfect, amus..."
4,22725,It is impossible to consider a given release b...,new start,taso,http://pitchfork.com/reviews/albums/22725-new-...,8.1,0,kevin lozano,tracks coordinator,2017-01-06,4,6,1,2017,impossible consider given release footwork art...,"([remarkable, best, warm, perfect, perfect, ab..."


In [38]:
# make columns of counts of each sentiment

df["Positive Sentiment Count"] = df['Sentiment List (Pos/Neg/Neu)'].apply(lambda x:len(x[0]))
df["Negative Sentiment Count"] = df['Sentiment List (Pos/Neg/Neu)'].apply(lambda x:len(x[1]))
df["Neutral Sentiment Count"] = df['Sentiment List (Pos/Neg/Neu)'].apply(lambda x:len(x[2]))
df

Unnamed: 0,id,review,track,artist,url,score,best_new_music,author,author_type,date,weekday,day,month,year,preprocessed_review,Sentiment List (Pos/Neg/Neu),Positive Sentiment Count,Negative Sentiment Count,Neutral Sentiment Count
0,22703,"“Trip-hop” eventually became a ’90s punchline,...",mezzanine,massive attack,http://pitchfork.com/reviews/albums/22703-mezz...,9.3,0,nate patrin,contributor,2017-01-08,6,8,1,2017,trip-hop eventually became 90 punchline music-...,"([best, famous, love, beautiful, top, sure, cr...",11,3,846
1,22721,"Eight years, five albums, and two EPs in, the ...",prelapsarian,krallice,http://pitchfork.com/reviews/albums/22721-prel...,7.9,0,zoe camp,contributor,2017-01-07,5,7,1,2017,eight year five album two eps new york-based o...,"([great, best, spontaneously, good], [grim, ha...",4,3,269
2,22659,Minneapolis’ Uranium Club seem to revel in bei...,all of them naturals,uranium club,http://pitchfork.com/reviews/albums/22659-all-...,7.3,0,david glickman,contributor,2017-01-07,5,7,1,2017,minneapolis uranium club seem revel aggressive...,"([many, love, talented, perfect], [vaguely, ca...",4,3,319
3,22661,Kleenex began with a crash. It transpired one ...,first songs,"kleenex, liliput",http://pitchfork.com/reviews/albums/22661-firs...,9.0,1,jenn pelly,associate reviews editor,2017-01-06,4,6,1,2017,kleenex began crash transpired one night long ...,"([nice, delighted, ok, greatest, perfect, amus...",16,2,727
4,22725,It is impossible to consider a given release b...,new start,taso,http://pitchfork.com/reviews/albums/22725-new-...,8.1,0,kevin lozano,tracks coordinator,2017-01-06,4,6,1,2017,impossible consider given release footwork art...,"([remarkable, best, warm, perfect, perfect, ab...",11,1,297
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18396,1535,The marketing guys of yer average modern megac...,let us replay!,coldcut,http://pitchfork.com/reviews/albums/1535-let-u...,8.9,0,james p. wisdom,,1999-01-26,1,26,1,1999,marketing guy yer average modern megaconglomer...,"([love, interesting, good, good, pleased], [va...",5,2,273
18397,1341,"Well, it's been two weeks now, and I guess it'...",1999,cassius,http://pitchfork.com/reviews/albums/1341-1999/,4.8,0,james p. wisdom,,1999-01-26,1,26,1,1999,well 's two week guess 's time tell guy someth...,"([plausible, sure, sure, sure, beloved, better...",6,2,126
18398,5376,"Out of Tune is a Steve Martin album. Yes, I'l...",out of tune,mojave 3,http://pitchfork.com/reviews/albums/5376-out-o...,6.3,0,jason josephes,contributor,1999-01-12,1,12,1,1999,tune steve martin album yes 'll explain upon t...,"([notably, catching, sure, good, good, happy, ...",9,9,439
18399,2413,"Well, kids, I just went back and re-read my re...","singles breaking up, vol. 1",don caballero,http://pitchfork.com/reviews/albums/2413-singl...,7.2,0,james p. wisdom,,1999-01-12,1,12,1,1999,well kid went back re-read review guy last alb...,"([love, sure, sure, love, perfect, enjoying], ...",6,2,235


In [74]:
# Making Labels
df['label'] = df['score'].apply(lambda sc: 1 if sc>=7 else 0)
df.head()

Unnamed: 0,id,review,track,artist,url,score,best_new_music,author,author_type,date,weekday,day,month,year,preprocessed_review,Sentiment List (Pos/Neg/Neu),Positive Sentiment Count,Negative Sentiment Count,Neutral Sentiment Count,label
0,22703,"“Trip-hop” eventually became a ’90s punchline,...",mezzanine,massive attack,http://pitchfork.com/reviews/albums/22703-mezz...,9.3,0,nate patrin,contributor,2017-01-08,6,8,1,2017,trip-hop eventually became 90 punchline music-...,"([best, famous, love, beautiful, top, sure, cr...",11,3,846,1
1,22721,"Eight years, five albums, and two EPs in, the ...",prelapsarian,krallice,http://pitchfork.com/reviews/albums/22721-prel...,7.9,0,zoe camp,contributor,2017-01-07,5,7,1,2017,eight year five album two eps new york-based o...,"([great, best, spontaneously, good], [grim, ha...",4,3,269,1
2,22659,Minneapolis’ Uranium Club seem to revel in bei...,all of them naturals,uranium club,http://pitchfork.com/reviews/albums/22659-all-...,7.3,0,david glickman,contributor,2017-01-07,5,7,1,2017,minneapolis uranium club seem revel aggressive...,"([many, love, talented, perfect], [vaguely, ca...",4,3,319,1
3,22661,Kleenex began with a crash. It transpired one ...,first songs,"kleenex, liliput",http://pitchfork.com/reviews/albums/22661-firs...,9.0,1,jenn pelly,associate reviews editor,2017-01-06,4,6,1,2017,kleenex began crash transpired one night long ...,"([nice, delighted, ok, greatest, perfect, amus...",16,2,727,1
4,22725,It is impossible to consider a given release b...,new start,taso,http://pitchfork.com/reviews/albums/22725-new-...,8.1,0,kevin lozano,tracks coordinator,2017-01-06,4,6,1,2017,impossible consider given release footwork art...,"([remarkable, best, warm, perfect, perfect, ab...",11,1,297,1


## Modeling

In [94]:
# Isolate the feature set

df_model = df[["score","label", "Positive Sentiment Count", "Negative Sentiment Count", "Neutral Sentiment Count"]]
df_model

Unnamed: 0,score,label,Positive Sentiment Count,Negative Sentiment Count,Neutral Sentiment Count
0,9.3,1,11,3,846
1,7.9,1,4,3,269
2,7.3,1,4,3,319
3,9.0,1,16,2,727
4,8.1,1,11,1,297
...,...,...,...,...,...
18396,8.9,1,5,2,273
18397,4.8,0,6,2,126
18398,6.3,0,9,9,439
18399,7.2,1,6,2,235


In [95]:
X = df_model.drop(['label','score'],axis = 1)
y = df_model['label']

In [96]:
# Split data 80/20
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [107]:
# instantiate linear regression model, fit and predict
model = LinearRegression().fit(x_train, y_train)

preds_train = model.predict(x_train)
preds_test = model.predict(x_test)

In [109]:
# Round preds to first decimal point
preds_train = [round(num*10,1) for num in preds_train]
preds_test = [round(num*10,1) for num in preds_test]


### Evaluation w/ Mean Squared Error

In [111]:
# Mean squared error
mse_train = mean_squared_error(df_model.loc[x_train.index]['score'],preds_train)
mse_test = mean_squared_error(df_model.loc[x_test.index]['score'],preds_test)

print("Training MSE:",mse_train)
print("Testing MSE:",mse_test)

Training MSE: 2.8311787098089867
Testing MSE: 2.7824225122349104
