In [1]:
from bs4 import BeautifulSoup
import requests  
import numpy as np
import pandas as pd
from langdetect import detect
import re
import pickle
from string import punctuation 
import nltk
import nltk.data
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import sent_tokenize,word_tokenize
from nltk.corpus import stopwords

In [2]:
#importing libraries for models and nlp tasks
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier

from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV

In [5]:
tfidf_vectorizer=pickle.load(open('../tfidfvectors/tfidf_vect_emogoneutral.pkl','rb'))

In [6]:
test_model_lr=pickle.load(open('../models/lr_mn_emogoneutral.pkl','rb'))
test_model_lr_cw=pickle.load(open('../models/lr_mn_emogoneutral_cw.pkl','rb'))

In [7]:
emotion = pd.read_csv('../labels_prediction/emotions_googleneutral.csv')

dic_emotions=emotion.to_dict('series')

print(dic_emotions['emotion'])

0     sadness
1         joy
2        love
3       anger
4        fear
5    surprise
6     neutral
Name: emotion, dtype: object


#### Webscraping goodreads website for getting reviews of a book
##### To get the link for the required book 

In [25]:
data = {'q': "razor's edge"}
book_url = "https://www.goodreads.com/search"
req = requests.get(book_url, params=data)

book_soup = BeautifulSoup(req.text, 'html.parser')

titles=book_soup.find_all('a', class_ = 'bookTitle')
title=[]
link=[]
for bookname in titles:
    title.append(bookname.get_text())
    link.append(bookname['href'])

##### From all the links first link is the most closest search 

In [26]:
rev="http://goodreads.com"+link[0]
rev_url = requests.get(rev)
rev_soup=BeautifulSoup(rev_url.content, 'html.parser')

##### Getting reviews from the web page of the book

In [27]:
rev_list=[]
for x in rev_soup.find_all("section", {"class": "ReviewText"}):
    rev_list.append(x.text)

In [28]:
df=pd.DataFrame(rev_list, columns=['reviews'])
df

Unnamed: 0,reviews
0,\nIn all big cities there are self-contained g...
1,"Books like this, that I’ve read so long ago in..."
2,(Book 570 From 1001 Books) - The Razor’s Edge ...
3,The best novel I've read since joining Goodrea...
4,ASPRO IL CAMMINO VERSO LA SALVEZZAIl primo fil...
5,"Oh, Mr. Maugham, there are moments when I love..."
6,In 1919 war hero Larry (Laurence) Darrell retu...
7,Tracing the intimate lives of representative B...
8,This has to be the most endearing and accessib...
9,"A Timeless, stirring drama, scaling the height..."


##### From all the languages in the reviews, selecting the english language reviews

In [29]:
def detect_en(text):
    try:
        return detect(text) == 'en'
    except:
        return False

In [30]:
df = df[df['reviews'].apply(detect_en)]
df=df.reset_index()
df

Unnamed: 0,index,reviews
0,0,\nIn all big cities there are self-contained g...
1,1,"Books like this, that I’ve read so long ago in..."
2,3,The best novel I've read since joining Goodrea...
3,5,"Oh, Mr. Maugham, there are moments when I love..."
4,6,In 1919 war hero Larry (Laurence) Darrell retu...
5,7,Tracing the intimate lives of representative B...
6,8,This has to be the most endearing and accessib...
7,9,"A Timeless, stirring drama, scaling the height..."
8,10,"In Asian countries, the custom of “home leavin..."
9,11,"It took me a long time to read this book, this..."


In [31]:
#df.to_csv("razorsedge.csv",index=False,header=False)

##### Cleaning the text

In [32]:
def text_cleaning(text):
   
    text=re.sub("\(.*?\)","",text)

    text = re.sub(r"[^A-Za-z]", " ", str(text))
    
     #remove tags
    text=re.sub("&lt;/?.*?&gt;"," &lt;&gt; ",text)

    
    # remove special characters and digits
    text=re.sub("(\\d|\\W)+"," ",text)
    
    # Remove punctuation from text
    text = "".join([c for c in text if c not in punctuation])
    stopwords = nltk.corpus.stopwords.words('english')
    text = text.split()
    text = [w for w in text if not w in stopwords]
    text = " ".join(text)
        
    text = text.split()
    lemmatizer = WordNetLemmatizer()
    lemmatized_words = [lemmatizer.lemmatize(word) for word in text]
    text = " ".join(lemmatized_words)
    text=text.lower()
    
    return text 

In [33]:
df['cleaned_review'] = df['reviews'].apply(lambda x: text_cleaning(x))
df = df[df['cleaned_review'].map(len) > 0]


In [34]:
df

Unnamed: 0,index,reviews,cleaned_review
0,0,\nIn all big cities there are self-contained g...,in big city self contained group exist without...
1,1,"Books like this, that I’ve read so long ago in...",books like i read long ago past come back even...
2,3,The best novel I've read since joining Goodrea...,the best novel i read since joining goodreads ...
3,5,"Oh, Mr. Maugham, there are moments when I love...",oh mr maugham moment i love much i could burst...
4,6,In 1919 war hero Larry (Laurence) Darrell retu...,in war hero larry darrell return hometown chic...
5,7,Tracing the intimate lives of representative B...,tracing intimate life representative british a...
6,8,This has to be the most endearing and accessib...,this endearing accessible maugham book with ri...
7,9,"A Timeless, stirring drama, scaling the height...",a timeless stirring drama scaling height ecsta...
8,10,"In Asian countries, the custom of “home leavin...",in asian country custom home leaving common us...
9,11,"It took me a long time to read this book, this...",it took long time read book beautiful book exc...


##### Testing the reviews data for emotions using model

In [35]:
test_tfidf = tfidf_vectorizer.transform(df['cleaned_review'])

ytest_pred=test_model_lr.predict(test_tfidf)

ytest_pred_cw=test_model_lr_cw.predict(test_tfidf)

In [36]:
df['predicted_label']=ytest_pred

df['predicted_label_cw']=ytest_pred_cw

In [37]:
df['predicted_emotion'] = df['predicted_label'].map(dic_emotions['emotion'])
df['predicted_emotion_cw'] = df['predicted_label_cw'].map(dic_emotions['emotion'])


In [38]:
df

Unnamed: 0,index,reviews,cleaned_review,predicted_label,predicted_label_cw,predicted_emotion,predicted_emotion_cw
0,0,\nIn all big cities there are self-contained g...,in big city self contained group exist without...,6,6,neutral,neutral
1,1,"Books like this, that I’ve read so long ago in...",books like i read long ago past come back even...,6,6,neutral,neutral
2,3,The best novel I've read since joining Goodrea...,the best novel i read since joining goodreads ...,6,6,neutral,neutral
3,5,"Oh, Mr. Maugham, there are moments when I love...",oh mr maugham moment i love much i could burst...,6,6,neutral,neutral
4,6,In 1919 war hero Larry (Laurence) Darrell retu...,in war hero larry darrell return hometown chic...,6,6,neutral,neutral
5,7,Tracing the intimate lives of representative B...,tracing intimate life representative british a...,6,6,neutral,neutral
6,8,This has to be the most endearing and accessib...,this endearing accessible maugham book with ri...,6,6,neutral,neutral
7,9,"A Timeless, stirring drama, scaling the height...",a timeless stirring drama scaling height ecsta...,6,6,neutral,neutral
8,10,"In Asian countries, the custom of “home leavin...",in asian country custom home leaving common us...,6,5,neutral,surprise
9,11,"It took me a long time to read this book, this...",it took long time read book beautiful book exc...,6,6,neutral,neutral


In [22]:
df['reviews'][0]

'I read this every year at Christmas, and I always will do. Simply because of the atmosphere it evokes. This story is Christmas as far as I’m concerned. It wouldn’t be the same without it. It is perfectly festive and is also appropriately didactic. It is an allegory for what happens to those that are unnecessarily bitter and twisted, refusing to take part in a joyful occasion. It is a glimpse at what could happen to someone who rejects their family upon trivial grounds, and let’s themselves be set apart. It is also a suggestion that one shouldn’t be so concerned with money. Money isn’t everything; it certainly didn’t buy ol’ Scrooge happiness. But, Christmas did and will do so again.  ___________________________________You can connect with me on social media via My Linktree.__________________________________'

In [58]:
df['cleaned_review'][7]

'yet another read as amazing ever one dickens best i think re read still favorite christmas story charles dickens never cease make smile feel multitude emotion first read wonderful absolutely wonderful i know taken long read story i also know taken long read anything charles dickens it surpassed expectation ironically great the concept idea behind story brilliant it surprised cleverness wit even though i went knowing story well i fell love dickens storytelling especially would address reader make narrative voice known his commentary victorian life social criticism known key aspect book i attest fact i loved described different holiday scene made vivid life magic the supernatural element involving spirit pure genius overall i get chapter people call short story included many important element including greatest character development literature this story blessing right hand tiny tim ebenezer scrooge'

In [24]:
df['reviews'][19] 

"im usually not a seasonal reader, but this year i tried to make an effort to read a couple of holiday themed books and im so glad i saved this for last!i grew up very familiar with the story of ‘a christmas carol’ via multiple adaptations (shoutout to the flintstones version from my childhood!), but i cant believe i never read the actual book itself. dickens is such a well known author, so its difficult to not critique this as i normally would with a book. but i think the message of this story is so important and should be the focus of this review. i personally know how easy it is to get caught up in the hustle and bustle and materialism and stress that can surround the holiday season. we fixate so much on sales and good deals and buying things to make us happy, that we can forget a loving word or spending quality time with those we care about are really what should be a priority. we should remember that kindness is the best gift we can give. so let us follow scrooges (eventual) examp