In [35]:
import numpy as np
import pandas as pd
import requests
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression

def calculatewordfreq(words):
    wordfreq = {}
    for word in words:
        wordfreq[word] = wordfreq.get(word, 0) + 1
    return wordfreq

def calculate_ngrams(docs, n):
    ngram_list = []
    for doc in docs:
        words = doc.split()
        doc_ngrams = [tuple(words[i:i+n]) for i in range(len(words)-n+1)]
        ngram_list.append(doc_ngrams)
    return ngram_list

def ngrams_to_vector(ngrams, vocabulary):
    vector = []
    for doc in ngrams:
        doc_counts = calculatewordfreq(doc)
        vector.append([doc_counts.get(term, 0) for term in vocabulary])
    return vector

In [36]:
data = pd.read_csv('Musical_instruments_reviews.csv')
x = data.iloc[:, 4].values
y = data.iloc[:, 5].values

# Preprocessing y values
for i in range(len(y)):
    if(y[i] == 4 or y[i] == 5):
        y[i] = 1
    elif(y[i] == 3):
        y[i] = 0
    else:
        y[i] = -1

# Splitting data for balanced classes
x2 = []
y2 = []
c = {-1: 0, 0: 0, 1: 0}
for z in range(len(x)):
    if(c[y[z]] <= 467):
        x2.append(x[z])
        c[y[z]] += 1
        y2.append(y[z])

x = x2
y = y2

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1, stratify=y)

n = int(input("Enter the value of n for n-grams: "))
xtrain_ngrams = calculate_ngrams(x_train, n)
xtest_ngrams = calculate_ngrams(x_test, n)

vocabulary = set(gram for doc in xtrain_ngrams for gram in doc)


xtrain_vector = ngrams_to_vector(xtrain_ngrams, vocabulary)
xtest_vector = ngrams_to_vector(xtest_ngrams, vocabulary)



Enter the value of n for n-grams:  1


In [37]:
from sklearn.naive_bayes import GaussianNB
multinomial_nb = LogisticRegression()
#model = LogisticRegression()
# Training the classifier
multinomial_nb.fit(xtrain_vector, y_train)

# Making predictions on the test set
y_pred = multinomial_nb.predict(xtest_vector)
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, y_pred))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.594306049822064


In [38]:
review = "not great product but useful buy this"
review_ngrams = calculate_ngrams([review], n)
review_vector = ngrams_to_vector(review_ngrams, vocabulary)
res = multinomial_nb.predict(review_vector)
print(res[0])  # Prediction for the review

-1.0


In [39]:
review = "best product highly recommended"
review_ngrams = calculate_ngrams([review], n)
review_vector = ngrams_to_vector(review_ngrams, vocabulary)
res = multinomial_nb.predict(review_vector)
print(res[0])  # Prediction for the review

1.0


In [40]:
review = "Oh its a crybaby"
review_ngrams = calculate_ngrams([review], n)
review_vector = ngrams_to_vector(review_ngrams, vocabulary)
res = multinomial_nb.predict(review_vector)
print(res[0])  # Prediction for the review

0.0


In [41]:
x_test

['"Oh its a crybaby", "I have a crybaby wah", "You suck because you don\'t own a crybaby"...I\'m tired of hearing these statements from guitarists that are so fresh they couldn\'t even tell you how many strings their instrument has!  This wah pedal just doesn\'t cut it.  Maybe at one time, but not today, it hasn\'t been up to par in a LONG time.  As a young player I insisted on having a wah pedal.  My mom bought me this one.  I thought it was awesome until I learned better.  It took me about a week to learn how to turn it on and when I finally did I thought I broke it!The eq on this pedal isn\'t good, there is no smooth transition from heel to toe.  There is a big drop in the freq. that kills the effect.  You have to pump your foot really fast to avoid having that drop.  You have to pretty much push it toe foward and then literally stomp on it to turn it on and then follow the same steps to turn it off.  Very inconvenient for someone who is trying to rapidly move between effects on a b