# Sentiment Analysis

### Setup

In [142]:
import numpy as np 
import pandas as pd

In [143]:
train = pd.read_csv("train.csv")
val = pd.read_csv("val.csv")

In [144]:
train.head()

Unnamed: 0,phrase_id,label,phrase
0,211506,0.47222,should have been the vehicle for Chan that '' ...
1,173853,0.44444,the faith of the Tonga people is in every way ...
2,115950,0.43056,between sappy and sanguine
3,164954,0.61111,like it did
4,185019,0.25,"It 's a hellish , numbing experience to watch ,"


In [145]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer 

In [146]:
vectorizer = CountVectorizer(
    analyzer = 'word',
    max_features = 1000
)
tfidf_vectorizer = TfidfVectorizer(
    use_idf=True,
    max_features = 1000)

In [147]:
train_words = train.iloc[:,2]
train_labels = train.iloc[:,1]

In [148]:
train_bag = vectorizer.fit_transform(train_words)
train_bag_tfidf = tfidf_vectorizer.fit_transform(train_words)

In [149]:
val_words = val.iloc[:,2]
val_labels = val.iloc[:,1]

### Linear Regression

In [150]:
from sklearn.linear_model import LinearRegression

In [151]:
lin_model = LinearRegression()

In [152]:
lin_model = lin_model.fit(X=train_bag_tfidf, y=train_labels)

In [153]:
val_bag = vectorizer.fit_transform(val_words)
val_bag_tfidf = tfidf_vectorizer.fit_transform(val_words)

In [154]:
val_pred = lin_model.predict(val_bag_tfidf)

In [155]:
from sklearn import metrics
print('Mean Absolute Error:', metrics.mean_absolute_error(val_pred, val_labels))  

Mean Absolute Error: 0.1527279657151534


In [156]:
for i in range (0, 100): 
    print(val_words[i])
    print("prediction:")
    print(val_pred[i])
    print("label:")
    print(val_labels[i])

and Mary-Louise Parker
prediction:
0.7232987847759793
label:
0.5
believe any viewer , young or old ,
prediction:
0.4549158815499396
label:
0.5
want a little more than this
prediction:
0.6653611647790632
label:
0.43056000000000005
, the documentary gives an especially poignant portrait of her friendship with the never flagging legal investigator David Presson .
prediction:
0.6923522928737783
label:
0.77778
back to Newcastle , the first half of Gangster No .
prediction:
0.48156717825543754
label:
0.5
listless , witless
prediction:
0.5056754478392266
label:
0.20833000000000002
as cool and crowd-pleasing as a documentary can get
prediction:
0.4992512024697715
label:
0.88889
the master of innuendo . It is not what you see
prediction:
0.3370911736568589
label:
0.52778
the hapless victims of the arrogant
prediction:
0.49296622186828837
label:
0.44443999999999995
first . Many of the effective horror elements
prediction:
0.5146611160270762
label:
0.52778
is forced and heavy-handed , and occasio

### Neural Networks

In [161]:
from sklearn.neural_network import MLPRegressor

In [162]:
neural_net = MLPRegressor()

In [163]:
neural_net.fit(train_bag_tfidf, train_labels)

MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
             beta_2=0.999, early_stopping=False, epsilon=1e-08,
             hidden_layer_sizes=(100,), learning_rate='constant',
             learning_rate_init=0.001, max_fun=15000, max_iter=200,
             momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
             power_t=0.5, random_state=None, shuffle=True, solver='adam',
             tol=0.0001, validation_fraction=0.1, verbose=False,
             warm_start=False)

In [168]:
val_predict = neural_net.predict(val_bag_tfidf)
print(val_predict)

[0.73394904 0.38496237 0.62599397 ... 0.4969355  0.77070908 0.60290371]


In [167]:
from sklearn import metrics
print('Mean Absolute Error:', metrics.mean_absolute_error(val_predict, val_labels))  

Mean Absolute Error: 0.15763156962158048


In [170]:
for i in range (0, 100): 
    print(val_words[i])
    print("prediction:")
    print(val_predict[i])
    print("label:")
    print(val_labels[i])

and Mary-Louise Parker
prediction:
0.7339490382710991
label:
0.5
believe any viewer , young or old ,
prediction:
0.3849623699730234
label:
0.5
want a little more than this
prediction:
0.6259939691097495
label:
0.43056000000000005
, the documentary gives an especially poignant portrait of her friendship with the never flagging legal investigator David Presson .
prediction:
0.6133965194487367
label:
0.77778
back to Newcastle , the first half of Gangster No .
prediction:
0.6085919877313032
label:
0.5
listless , witless
prediction:
0.49693550005041665
label:
0.20833000000000002
as cool and crowd-pleasing as a documentary can get
prediction:
0.3977316972653539
label:
0.88889
the master of innuendo . It is not what you see
prediction:
0.20713756268611447
label:
0.52778
the hapless victims of the arrogant
prediction:
0.5164148894103314
label:
0.44443999999999995
first . Many of the effective horror elements
prediction:
0.5763360043856052
label:
0.52778
is forced and heavy-handed , and occasio