In [37]:
import os

In [2]:
datafilepath = r'C:\Nilisha Workspace\Sentiment Analyzer\machine-learning-examples-master\imdb-sentiment-analysis\movie_data'

In [5]:
reviews_train = []
for line in open(os.path.join(datafilepath,'full_train.txt'), 'r', encoding='utf8'):
    reviews_train.append(line.strip())
    
reviews_test = []
for line in open(os.path.join(datafilepath,'full_train.txt'), 'r', encoding='utf8'):
    reviews_test.append(line.strip())

In [7]:
import re

REPLACE_NO_SPACE = re.compile("[.;:!\'?,\"()\[\]]")
REPLACE_WITH_SPACE = re.compile("(<br\s*/><br\s*/>)|(\-)|(\/)")

def preprocess_reviews(reviews):
    reviews = [REPLACE_NO_SPACE.sub("", line.lower()) for line in reviews]
    reviews = [REPLACE_WITH_SPACE.sub(" ", line) for line in reviews]
    
    return reviews

reviews_train_clean = preprocess_reviews(reviews_train)
reviews_test_clean = preprocess_reviews(reviews_test)

In [8]:
from sklearn.feature_extraction.text import CountVectorizer

cv = CountVectorizer(binary=True)
cv.fit(reviews_train_clean)
X = cv.transform(reviews_train_clean)
X_test = cv.transform(reviews_test_clean)

In [9]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

target = [1 if i < 12500 else 0 for i in range(25000)]

X_train, X_val, y_train, y_val = train_test_split(
    X, target, train_size = 0.75
)

for c in [0.01, 0.05, 0.25, 0.5, 1]:
    
    lr = LogisticRegression(C=c)
    lr.fit(X_train, y_train)
    print ("Accuracy for C=%s: %s" 
           % (c, accuracy_score(y_val, lr.predict(X_val))))



Accuracy for C=0.01: 0.8696
Accuracy for C=0.05: 0.87648
Accuracy for C=0.25: 0.8776
Accuracy for C=0.5: 0.87792
Accuracy for C=1: 0.87376


In [10]:

final_model = LogisticRegression(C=0.05)
final_model.fit(X, target)
print ("Final Accuracy: %s" 
       % accuracy_score(target, final_model.predict(X_test)))

Final Accuracy: 0.95148


In [11]:

feature_to_coef = {
    word: coef for word, coef in zip(
        cv.get_feature_names(), final_model.coef_[0]
    )
}
for best_positive in sorted(
    feature_to_coef.items(), 
    key=lambda x: x[1], 
    reverse=True)[:5]:
    print (best_positive)
    

for best_negative in sorted(
    feature_to_coef.items(), 
    key=lambda x: x[1])[:5]:
    print (best_negative)

('excellent', 0.9292549193423959)
('perfect', 0.7907005834710253)
('great', 0.6745323523875933)
('amazing', 0.6127040012519653)
('superb', 0.601936804694158)
('worst', -1.3645959239411622)
('waste', -1.166424223235376)
('awful', -1.0324189681376759)
('poorly', -0.8752018837121309)
('boring', -0.856354346809877)


In [12]:
import pandas as pd
ds_data = pd.read_csv(r'C:\Users\Hp\Downloads\DSWork_1575_02Jul2019\DSWork_1575_02Jul2019.csv')

In [15]:
Venue_Review = ds_data[~ds_data.OpenTextForVenueReview.isnull()]['OpenTextForVenueReview']

In [17]:
Venue_Test = cv.transform(Venue_Review.tolist())

In [21]:
Venue_Predictions = final_model.predict(Venue_Test)

In [29]:
Venue_Predictions = pd.Series(Venue_Predictions)

In [25]:
Venue_Predictions = Venue_Predictions.apply(lambda x: 'Positive' if x==1 else 'Negative')

In [None]:
Venue_Predictions = pd.DataFrame(Venue_Predictions, columns=['Sentiment'])

In [27]:
Venue_Review_DF = ds_data[~ds_data.OpenTextForVenueReview.isnull()]

In [31]:
Venue_Predictions.set_index(Venue_Review_DF.index, inplace=True)

In [35]:
Venue_Review_DF['Sentiments_Venue_Review'] = Venue_Predictions.Sentiment

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [38]:
Venue_Review_DF

Unnamed: 0,SurveyTitle,UserAgent,SurveyResponseId,EventRating,OpenTextForEventReview,VenueRating,ValueForMoneyRating,ValueForMoneyReview,OpenTextForVenueReview,GeneralComments,Sentiments_Venue_Review
5917,GIOVANNI PERNICE: DANCE IS LIFE,Mozilla/5.0 (iPhone; CPU iPhone OS 12_1 like M...,1610910,5.0,"Professional, and exhilarating show which is g...",5.0,Great Value,,Warm and inviting staff and a great traditiona...,,Positive
5918,GIOVANNI PERNICE: DANCE IS LIFE,Mozilla/5.0 (iPad; CPU OS 9_3_5 like Mac OS X)...,1610918,5.0,Excellent show. Loved it.,4.0,Great Value,,It could do with a makeover,,Positive
5921,GIOVANNI PERNICE: DANCE IS LIFE,Mozilla/5.0 (iPhone; CPU iPhone OS 11_4_1 like...,1610932,5.0,Absolutely brilliant Great dancers and great ...,2.0,OK,,Ok seats not so comfy.,,Negative
5923,GIOVANNI PERNICE: DANCE IS LIFE,Mozilla/5.0 (iPhone; CPU iPhone OS 12_1_4 like...,1610979,5.0,An absolutely emotional experience. From an It...,5.0,Great Value,,Perfect viewing and phenomenal experience,,Positive
5926,GIOVANNI PERNICE: DANCE IS LIFE,Mozilla/5.0 (Linux; Android 7.0; SAMSUNG SM-A3...,1611001,5.0,Excellent show absolutely brilliant brilliant ...,5.0,Great Value,,Nice leg room and friendly staff!,,Positive
5930,GIOVANNI PERNICE: DANCE IS LIFE,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,1611239,5.0,Lively and funny. Very enjoyable.,5.0,Great Value,,It's a great theatre.,,Positive
5931,GIOVANNI PERNICE: DANCE IS LIFE,Mozilla/5.0 (iPhone; CPU iPhone OS 12_1_4 like...,1611290,5.0,"Fantastic, best dance show we’ve ever seen. Gi...",5.0,Great Value,,It’s a good venue for me living locally. Being...,,Positive
5932,GIOVANNI PERNICE: DANCE IS LIFE,Mozilla/5.0 (iPad; CPU OS 9_3_5 like Mac OS X)...,1611305,5.0,A fantastic evenings entertainment. Excellent ...,5.0,Great Value,,"Great venue,only complaint is the bar was very...",,Positive
5936,GIOVANNI PERNICE: DANCE IS LIFE,Mozilla/5.0 (iPad; CPU OS 9_3_5 like Mac OS X)...,1611430,5.0,"Colourful , exciting and entertaining show. Re...",4.0,Great Value,,Bit of a bottleneck coming out of main theatre...,,Positive
5940,GIOVANNI PERNICE: DANCE IS LIFE,Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.3...,1611578,5.0,Great entertainment and excellent everything,5.0,Great Value,Excellent,Grand good seats and view of the stage and the...,Great entertainment and great venue,Positive
