In [1]:
from bs4 import BeautifulSoup
import requests  
import numpy as np
import pandas as pd
from langdetect import detect
import re
import pickle
from string import punctuation 
import nltk
import nltk.data
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import sent_tokenize,word_tokenize
from nltk.corpus import stopwords

In [2]:
#importing libraries for models and nlp tasks
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier

from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV

In [3]:
tfidf_vectorizer=pickle.load(open('../models/tfidf_vect.pkl','rb'))
tfidf_vectorizer_under=pickle.load(open('../models/tfidf_vect_undersampling.pkl','rb'))
tfidf_vectorizer_imb=pickle.load(open('../models/tfidf_vect_imb.pkl','rb'))
tfidf_vectorizer_cw=pickle.load(open('../models/tfidf_vect_classweights.pkl','rb'))

In [4]:
test_model_lr=pickle.load(open('../models/lr_mn.pkl','rb'))
test_model_lr_under=pickle.load(open('../models/lr_mn_neutral.pkl','rb'))
test_model_lr_imb=pickle.load(open('../models/lr_mn_imb.pkl','rb'))
test_model_lr_cw=pickle.load(open('../models/lr_mn_classweights.pkl','rb'))

In [5]:
emotion = pd.read_csv('../models/emotions.csv')
emotion_neutral = pd.read_csv('../models/emotions_neutral.csv')

dic_emotions=emotion.to_dict('series')
dic_emotions_neutral=emotion_neutral.to_dict('series')

print(dic_emotions['emotion'])
print(dic_emotions_neutral['emotion'])

0     sadness
1         joy
2        love
3       anger
4        fear
5    surprise
Name: emotion, dtype: object
0     sadness
1         joy
2        love
3       anger
4        fear
5    surprise
6     neutral
Name: emotion, dtype: object


#### Webscraping goodreads website for getting reviews of a book
##### To get the link for the required book 

In [6]:
data = {'q': "The Razor's Edge"}
book_url = "https://www.goodreads.com/search"
req = requests.get(book_url, params=data)

book_soup = BeautifulSoup(req.text, 'html.parser')

titles=book_soup.find_all('a', class_ = 'bookTitle')
title=[]
link=[]
for bookname in titles:
    title.append(bookname.get_text())
    link.append(bookname['href'])

##### From all the links first link is the most closest search 

In [7]:
rev="http://goodreads.com"+link[0]
rev_url = requests.get(rev)
rev_soup=BeautifulSoup(rev_url.content, 'html.parser')

##### Getting reviews from the web page of the book

In [8]:
rev_list=[]
for x in rev_soup.find_all("section", {"class": "ReviewText"}):
    rev_list.append(x.text)

In [9]:
df=pd.DataFrame(rev_list, columns=['reviews'])
df

Unnamed: 0,reviews
0,\nIn all big cities there are self-contained g...
1,"Books like this, that I’ve read so long ago in..."
2,(Book 570 From 1001 Books) - The Razor’s Edge ...
3,The best novel I've read since joining Goodrea...
4,ASPRO IL CAMMINO VERSO LA SALVEZZAIl primo fil...
5,"Oh, Mr. Maugham, there are moments when I love..."
6,In 1919 war hero Larry (Laurence) Darrell retu...
7,Tracing the intimate lives of representative B...
8,This has to be the most endearing and accessib...
9,"A Timeless, stirring drama, scaling the height..."


##### From all the languages in the reviews, selecting the english language reviews

In [10]:
def detect_en(text):
    try:
        return detect(text) == 'en'
    except:
        return False

In [11]:
df = df[df['reviews'].apply(detect_en)]
df=df.reset_index()
df

Unnamed: 0,index,reviews
0,0,\nIn all big cities there are self-contained g...
1,1,"Books like this, that I’ve read so long ago in..."
2,3,The best novel I've read since joining Goodrea...
3,5,"Oh, Mr. Maugham, there are moments when I love..."
4,6,In 1919 war hero Larry (Laurence) Darrell retu...
5,7,Tracing the intimate lives of representative B...
6,8,This has to be the most endearing and accessib...
7,9,"A Timeless, stirring drama, scaling the height..."
8,10,"In Asian countries, the custom of “home leavin..."
9,11,"It took me a long time to read this book, this..."


In [41]:
#df.to_csv("razorsedge.csv",index=False,header=False)

##### Cleaning the text

In [12]:
def text_cleaning(text):
   
    text=re.sub("\(.*?\)","",text)

    text = re.sub(r"[^A-Za-z]", " ", str(text))
    
     #remove tags
    text=re.sub("&lt;/?.*?&gt;"," &lt;&gt; ",text)

    
    # remove special characters and digits
    text=re.sub("(\\d|\\W)+"," ",text)
    
    # Remove punctuation from text
    text = "".join([c for c in text if c not in punctuation])
    stopwords = nltk.corpus.stopwords.words('english')
    text = text.split()
    text = [w for w in text if not w in stopwords]
    text = " ".join(text)
        
    text = text.split()
    lemmatizer = WordNetLemmatizer()
    lemmatized_words = [lemmatizer.lemmatize(word) for word in text]
    text = " ".join(lemmatized_words)
    text=text.lower()
    
    return text 

In [13]:
df['cleaned_review'] = df['reviews'].apply(lambda x: text_cleaning(x))
df = df[df['cleaned_review'].map(len) > 0]


In [14]:
df

Unnamed: 0,index,reviews,cleaned_review
0,0,\nIn all big cities there are self-contained g...,in big city self contained group exist without...
1,1,"Books like this, that I’ve read so long ago in...",books like i read long ago past come back even...
2,3,The best novel I've read since joining Goodrea...,the best novel i read since joining goodreads ...
3,5,"Oh, Mr. Maugham, there are moments when I love...",oh mr maugham moment i love much i could burst...
4,6,In 1919 war hero Larry (Laurence) Darrell retu...,in war hero larry darrell return hometown chic...
5,7,Tracing the intimate lives of representative B...,tracing intimate life representative british a...
6,8,This has to be the most endearing and accessib...,this endearing accessible maugham book with ri...
7,9,"A Timeless, stirring drama, scaling the height...",a timeless stirring drama scaling height ecsta...
8,10,"In Asian countries, the custom of “home leavin...",in asian country custom home leaving common us...
9,11,"It took me a long time to read this book, this...",it took long time read book beautiful book exc...


##### Testing the reviews data for emotions using model

In [15]:
test_tfidf = tfidf_vectorizer.transform(df['cleaned_review'])
test_tfidf_under = tfidf_vectorizer_under.transform(df['cleaned_review'])
test_tfidf_imb = tfidf_vectorizer_imb.transform(df['cleaned_review'])
test_tfidf_cw = tfidf_vectorizer_cw.transform(df['cleaned_review'])

ytest_pred=test_model_lr.predict(test_tfidf)
ytest_pred_under=test_model_lr_under.predict(test_tfidf_under)
ytest_pred_imb=test_model_lr_imb.predict(test_tfidf_imb)
ytest_pred_cw=test_model_lr_cw.predict(test_tfidf_cw)

In [16]:
df['predicted_label']=ytest_pred
df['predicted_label_under']=ytest_pred_under
df['predicted_label_imb']=ytest_pred_imb
df['predicted_label_cw']=ytest_pred_cw

In [17]:
df['predicted_emotion'] = df['predicted_label'].map(dic_emotions['emotion'])
df['predicted_emotion_under'] = df['predicted_label_under'].map(dic_emotions_neutral['emotion'])
df['predicted_emotion_imb'] = df['predicted_label_imb'].map(dic_emotions_neutral['emotion'])
df['predicted_emotion_cw'] = df['predicted_label_cw'].map(dic_emotions_neutral['emotion'])


In [18]:
df

Unnamed: 0,index,reviews,cleaned_review,predicted_label,predicted_label_under,predicted_label_imb,predicted_label_cw,predicted_emotion,predicted_emotion_under,predicted_emotion_imb,predicted_emotion_cw
0,0,\nIn all big cities there are self-contained g...,in big city self contained group exist without...,1,6,6,6,joy,neutral,neutral,neutral
1,1,"Books like this, that I’ve read so long ago in...",books like i read long ago past come back even...,1,6,6,6,joy,neutral,neutral,neutral
2,3,The best novel I've read since joining Goodrea...,the best novel i read since joining goodreads ...,1,1,1,6,joy,joy,joy,neutral
3,5,"Oh, Mr. Maugham, there are moments when I love...",oh mr maugham moment i love much i could burst...,4,4,4,6,fear,fear,fear,neutral
4,6,In 1919 war hero Larry (Laurence) Darrell retu...,in war hero larry darrell return hometown chic...,1,6,1,2,joy,neutral,joy,love
5,7,Tracing the intimate lives of representative B...,tracing intimate life representative british a...,1,6,1,6,joy,neutral,joy,neutral
6,8,This has to be the most endearing and accessib...,this endearing accessible maugham book with ri...,1,6,6,6,joy,neutral,neutral,neutral
7,9,"A Timeless, stirring drama, scaling the height...",a timeless stirring drama scaling height ecsta...,0,6,0,0,sadness,neutral,sadness,sadness
8,10,"In Asian countries, the custom of “home leavin...",in asian country custom home leaving common us...,1,5,1,5,joy,surprise,joy,surprise
9,11,"It took me a long time to read this book, this...",it took long time read book beautiful book exc...,2,6,6,6,love,neutral,neutral,neutral


In [19]:
df['reviews'][4] # predictions joy, neutral, love

'In 1919 war hero Larry (Laurence) Darrell returns to his hometown of Chicago, wounded twice, the brave aviator, has a deeper injury which changes him considerably. A comrade saved his life but lost his, over France, dying on the cold ground. Isabel Bradley, Larry\'s faithful fiancee notices the alteration .When his best friend Gay Maturin, gets his millionaire father Henry to offer his pal a good job. Darrell turns it down, he doesn\'t want to sell bonds, who does ? Still you can make a lot of lovely money, in the roaringera of wealth, naturally some acceptable dissipation occurs by hungry men, for the mighty dollar . W.Somerset Maugham the famous British author is visiting the windy city ( yes, he the novelist puts himself in the story) . Another of his books, "The Moon and Sixpence", has just been published and is an unexpected bestseller. Doing interviews with the local newspapers, Maugham gets an invitation to have lunch with Elliott Templeton, an old friend and Isabel\'s rich unc

In [21]:
df['reviews'][7] # sadness from 3 models

'A Timeless, stirring drama, scaling the heights of ecstasy to the dregs of utter despair."The sharp edge of a razor is difficult to pass over; thus the wise say the path to Salvation is hard." (Paraphrased from the Katha Upanishad)Larry Darrell is a likable fellow, engaged to young socialite, Isabel Bradley. Larry goes off to war, but returns a changed man. He breaks his engagement to Isabel and leaves his former life behind, and sets off on a series of spiritual quests. (My teen self fell in love with the ideal that was Larry Darrell!)Larry Darrell was as close to Nirvana as a human could be, according to the narrator in this story. I just saw Larry as a simple, decent person who took life as it came and made the best of things, refusing to be sucked into the sham that was success and social status.There were a few somewhat decent movie adaptations of this book, but I was totally upset when comedian Bill Murray was cast in the part of the luminous Larry Darrell. Talk about miscasting

In [23]:
df['reviews'][8] # joy and surprise

'In Asian countries, the custom of “home leaving” is not as common as it used to be, but it is ingrained in the culture deeply enough that it’s not yet considered weird. Home-leaving essentially means literally leaving your home, but also your secular life, in order to go on a spiritual journey, maybe even go live in a monastery and take vows and be ordained. There is no equivalency to this custom in Western civilization: usually, people who leave everything behind to go look for the meaning of life are considered to be a little crazy (or cashing in on the trendiness of mindfulness and enlightenment). As a culture, we’ve actually gotten a bit cynical about it, and with reason. But that quest can certainly be genuine, and the cultural bias against it probably intimidates a lot of people who could greatly benefit from giving it all up for a while and trying to see beyond the world they were raised in.This simple yet astonishingly layered novel by Mr. Maugham is about a man who goes on su

In [39]:
df['reviews'][9] # love from one model and neutral from 3 models

'It took me a long time to read this book, this beautiful book, this excellent book. I took time because every sentence deserves to would read carefully. It is indeed serving by subtle prose, sought after in its simplicity.The stories are complete. They demonstrate how each life carries a greater or lesser share of tragedy and ridicule; happiness cannot be an exact science. However, it is happy that each has its definition: it can hide in futility like the Absolute. They also allow you to position yourself facing each of the characters described in this beautiful book, this lovely book.'

In [40]:
df['reviews'][12] # anger from one model and neutral from 3 models

"I didn't love it as much as I expected. The premise that Eastern philosophy has something to offer us in the West just isn't as novel as when this book was originally published. Maugham's description of upper crust society in Paris is bitchy and wonderfully astute at times. But, like most authors, he found it easier to describe the sinners than the saints. Larry Darrell, the saint of this book, just doesn't seem human or interesting. He and his quest for enlightenment and/or belief in God are one big yawnfest. All Maugham can do is describe Larry's scintillating eyes and his smile over and over and over again and by the end of the book, even Maugham is apologizing for that. Also, Maugham allows himself to be the first person narrator and, as such, does more than his fair share of self-aggrandizing in the book. He befriends prostitutes down on their luck, flies to the deathbed of people he's mildly acquainted with and even pays the funeral expenses of heroin-addicted nymphomaniacs. Wha

In [22]:
df['reviews'][13]

'"One of Maugham\'s three major novels ..." TIME. That\'s high praise coming from TIME magazine. This MUST be good.I’m sure some of you are familiar with a little American television drama series that aired on HBO from 2002-2008 called The Wire. I was way late to the party, but over the past 6 months or so, I’ve managed to watch all 5 glorious seasons back to back to back. Well, glorious to a point. But what the hell happened in season 5? I kept waiting for it to get good, kept waiting for something to happen. Waiting for the outlaw Omar Little to come along, whistling ‘The Farmer in the Dell,’ and take that bitch Marlo down. Man. Talk about anticlimactic. How you gonna do Omar like that, huh?I had a similar experience with this book. I was really digging it for about 200 pages. Then an unfortunate thing happened…I forgive the weirdness of Maugham interjecting himself as a minor character in the story and the weirdness of him being the narrator. I forgive the implausibility of every ch

In [24]:
df['reviews'][14] # joy and surprise: joy for imbalanced models and surprise for undersampled and classweights

"I am considering starting a project to find out why some books are infinitely better at curing a reader's block than others. This would be my first field study. I picked it up with the tired feeling of not being able to concentrate, and before I knew what had happened, I was completely immersed in the strange lives of upper class Chicago people - charmingly interacting with a fictional Maugham in all kinds of settings.Maybe the trick is that these people lose the direction of their lives over and over again, but still continue to look for meaning? Who could not relate to that?Maybe the trick is in the unspectacular development of ordinary life, eloquently narrated and observed? Or maybe it is just the brilliant spirit of the author shining on each page?Whatever it is, it's wonderful!"

In [26]:
df['reviews'][17] # All surprise

'Back in the dark days of the mid-’80s, I read somewhere that Bill Murray was going to be in a movie called The Razor’s Edge, and that it was based on a book. Since this was long before the days where you could check IMDB to see what the movie was going to be about, I figured the book had to be hilarious since Murray was starring in it. So I found the book at the library and started reading. I was pretty shocked to find that it was a serious story about a guy who goes looking for the meaning of life. I was even more shocked that I loved it. And even though the movie version flopped and caused Murray to drop out of film making for years, I still want to say thanks to Bill because if it wasn’t for the movie, I probably never would have read this.Maugham engages in a bit of meta-fiction by incorporating himself into a story he claims at the beginning was true. (I guess there’s been a fair amount of debate on if it actually was based on fact, but I’m content to consider it fiction.) Maugha