In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import nltk
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
import string
punct = string.punctuation
import re
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
stop_words=set(stopwords.words('english'))
lemma=WordNetLemmatizer()

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.


In [3]:
def clean_review(text):
    text=re.sub(r'http\S+','',text)
    text=re.sub('[^a-zA-Z]',' ',text)
    text=word_tokenize(text)
    text=[i for i in text if i not in stop_words]
    text=[lemma.lemmatize(word=w,pos='v') for w in text]
    text=[i for i in text if len(i)>2]
    text=[w.lower() for w in text]
#     text=' '.join(text)
    return text

clean_review("    Hello how are you and are you 30? Like this video")

['hello', 'like', 'video']

In [4]:
def count(entry):
    charcount=[]
    for para in entry:
        if para!='\n':
            charcount.append(para)
    return len(charcount),len(entry.split())

count("Oh My God, this woman is so pretty")

(34, 8)

In [5]:
data_yelp = pd.read_csv('/content/drive/MyDrive/yelp_labelled.txt', sep='\t', header = None)
columns_name = ['Review', 'Sentiment']
data_yelp.columns = columns_name
print("Dataset size - yelp : ", data_yelp.shape)
data_amazon = pd.read_csv('/content/drive/MyDrive/amazon_cells_labelled.txt', sep = '\t', header = None)
data_amazon.columns = columns_name
print("Dataset size - amazon : ", data_amazon.shape)
data_imdb = pd.read_csv('/content/drive/MyDrive/imdb_labelled.txt', sep = '\t', header = None)
data_imdb.columns = columns_name
print("Dataset size - imdb : ", data_imdb.shape)
data = data_yelp.append([data_amazon, data_imdb], ignore_index=True)
data.shape

Dataset size - yelp :  (1000, 2)
Dataset size - amazon :  (1000, 2)
Dataset size - imdb :  (748, 2)


(2748, 2)

In [6]:
data['Sentiment'].value_counts()

1    1386
0    1362
Name: Sentiment, dtype: int64

In [7]:
from sklearn.svm import LinearSVC
tfidf = TfidfVectorizer(tokenizer = clean_review)
classifier = LinearSVC(max_iter=2000)

In [8]:
X = data['Review']
y = data['Sentiment']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.036, random_state = 42)
X_train.shape, X_test.shape

((2649,), (99,))

In [9]:
clf = Pipeline([('tfidf', tfidf), ('clf', classifier)])

In [10]:
clf.fit(X_train, y_train)

Pipeline(memory=None,
         steps=[('tfidf',
                 TfidfVectorizer(analyzer='word', binary=False,
                                 decode_error='strict',
                                 dtype=<class 'numpy.float64'>,
                                 encoding='utf-8', input='content',
                                 lowercase=True, max_df=1.0, max_features=None,
                                 min_df=1, ngram_range=(1, 1), norm='l2',
                                 preprocessor=None, smooth_idf=True,
                                 stop_words=None, strip_accents=None,
                                 sublinear_tf=False,
                                 token_pattern='(?u)\\b\\w\\w+\\b',
                                 tokenizer=<function clean_review at 0x7f4308480ea0>,
                                 use_idf=True, vocabulary=None)),
                ('clf',
                 LinearSVC(C=1.0, class_weight=None, dual=True,
                           fit_intercept=True,

In [11]:
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.86      0.84      0.85        57
           1       0.79      0.81      0.80        42

    accuracy                           0.83        99
   macro avg       0.82      0.83      0.82        99
weighted avg       0.83      0.83      0.83        99



In [12]:
def take_prediction():
  text=input()
  text=re.sub(r'http\S+','',text)
  text=re.sub('[^a-zA-Z]',' ',text)
  text=word_tokenize(text)
  text=[i for i in text if i not in stop_words]
  text=[lemma.lemmatize(word=w,pos='v') for w in text]
  text=[i for i in text if len(i)>2]
  text=[w.lower() for w in text]
  text=' '.join(text)
  make=clf.predict([text])
  return make[0]

In [13]:
take_prediction()

Dear Diary,         I did nothing much today. I woke up at 7 in the morning as usual. It was raining heavily, so I didn't go to college. I know it was just an excuse and I regret it. During the day time, I did nothing other than watching YouTube videos. At 5 in the evening, I went to the market to grab something to eat, and all of a sudden, I saw my professor sitting opposite me. I was really embarrassed as I already informed him that I wasn't feeling good and won't be able to come to class today. Sorry again, but see how Karma works. I will make sure to avoid such excuses from today. God, help me and forgive me. Lessons learned: Don't lie or make excuses.         Today was wonderful!         I am glad to tell you mom found the gift super adorable. She was amazed. It felt very nice seeing her so happy; we enjoyed it a lot. Then I gave some time to my ongoing project; later I explored concepts related to Robotic Process Automation (RPA); I found it fascinating the way our daily tasks ca

1

In [14]:
take_prediction()

Dear Diary,          I am sorry, I can hardly express my day in words. I just reached home visiting a doctor. Yeah, you have guessed it right, I've got a fever and headache. So nothing much today, and you know what how this happened...well because I forgot my umbrella while going to college and see this is the result. I just hate the rainy season...


0

In [15]:
def make_prediction(text):
  #text=input()
  text=re.sub(r'http\S+','',text)
  text=re.sub('[^a-zA-Z]',' ',text)
  text=word_tokenize(text)
  text=[i for i in text if i not in stop_words]
  text=[lemma.lemmatize(word=w,pos='v') for w in text]
  text=[i for i in text if len(i)>2]
  text=[w.lower() for w in text]
  text=' '.join(text)
  make=clf.predict([text])
  return make[0]

In [16]:
make_prediction("""Hey Diary,
        I know you are feeling sad because this is the last writing of the year for you. Still, it went great for me as I met my friends. We went to a party and had fun but left the party early as I wanted to end this year spending some quality time with family. And you know what...today, I made Chicken Biryani taking help from my mother. And everyone at home really liked it. This year brought some wonderful and cherishing moments into our lives along with COVID19 :P...Bye-bye Saayonara :)""")

1

In [None]:
# For this free service organisation needs to create their own account ON 

import requests

url = "https://kutip.p.rapidapi.com/api/quote/random"

querystring = {"lang":"en"}

headers = {
    'x-rapidapi-host': "kutip.p.rapidapi.com",
    'x-rapidapi-key': "b2aeaf8263mshf529f6a8b7e380dp19e40djsn6291f4f5XXXX" 
    }

response = requests.request("GET", url, headers=headers, params=querystring)

print(response.text)

In [18]:
import pickle
with open("analysis_model","wb") as f:
  pickle.dump(clf,f)

In [19]:
clf = pickle.load(open("analysis_model", 'rb'))

In [20]:
def make_prediction(text):
  #text=input()
  text=re.sub(r'http\S+','',text)
  text=re.sub('[^a-zA-Z]',' ',text)
  text=word_tokenize(text)
  text=[i for i in text if i not in stop_words]
  text=[lemma.lemmatize(word=w,pos='v') for w in text]
  text=[i for i in text if len(i)>2]
  text=[w.lower() for w in text]
  text=' '.join(text)
  make=clf.predict([text])
  return make[0]