In [2]:
! mkdir ~/.kaggle

! cp kaggle.json ~/.kaggle/

mkdir: cannot create directory ‘/root/.kaggle’: File exists


In [3]:

! chmod 600 ~/.kaggle/kaggle.json

In [4]:
! kaggle datasets download -d praveengovi/emotions-dataset-for-nlp

Downloading emotions-dataset-for-nlp.zip to /content
  0% 0.00/721k [00:00<?, ?B/s]
100% 721k/721k [00:00<00:00, 23.6MB/s]


In [5]:
import zipfile as zp 
with zp.ZipFile('/content/emotions-dataset-for-nlp.zip', 'r') as zip_ref:
    zip_ref.extractall()

In [65]:
import pandas as pd

messages = pd.read_csv('/content/val.txt', sep=';',
                           names=["Text", "mood"])

In [66]:
messages.head()

Unnamed: 0,Text,mood
0,im feeling quite sad and sorry for myself but ...,sadness
1,i feel like i am still looking at a blank canv...,sadness
2,i feel like a faithful servant,love
3,i am just feeling cranky and blue,anger
4,i can have for a treat or if i am feeling festive,joy


In [67]:
messages.isnull().sum()

Text    0
mood    0
dtype: int64

In [68]:
import re
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [69]:
#nltk.download("stopwords") # in order to extract stopwords
lemma=nltk.WordNetLemmatizer()
nltk.download('punkt')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [70]:
text_list=[]
for text in messages.Text:
    text=re.sub("[^a-zA-Z]"," ",text) # extracting unnecesary characters
    text=text.lower() #makes characters lowercase
    text=nltk.word_tokenize(text) # splits all the words
    text=[word for word in text if not word in set(stopwords.words("english"))] # extract stopwords
    text=[lemma.lemmatize(word) for word in text] # Lemmatisation
    text=" ".join(text) 
    text_list.append(text)

#@title Default title text
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()
corpus = []
for i in range(0, len(messages)):
    review = re.sub('[^a-zA-Z]', ' ', messages['Text'][i])
    review = review.lower()
    review = review.split()
    
    review = [ps.stem(word) for word in review if not word in stopwords.words('english')]
    review = ' '.join(review)
    corpus.append(review)

In [71]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=2500)
X = cv.fit_transform(text_list).toarray()

In [72]:
messages['mood'].unique()

array(['sadness', 'love', 'anger', 'joy', 'fear', 'surprise'],
      dtype=object)

In [73]:
messages=messages.replace({'mood': {'sadness': 0, 'love': 1,'anger': 2,'joy': 3,'fear': 4,'surprise': 5}})

In [74]:
y=messages.iloc[:,1].values

In [75]:
y

array([0, 0, 1, ..., 3, 3, 3])

In [76]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)

In [77]:

from sklearn.naive_bayes import MultinomialNB
spam_detect_model = MultinomialNB().fit(X_train, y_train)

y_pred=spam_detect_model.predict(X_test)


In [78]:
from sklearn.metrics import r2_score
r=round(r2_score(y_test, y_pred),2)*100
print("accuracy of this model is :",int(r),"%")

accuracy of this model is : 7 %


array([3, 0, 3, 3, 0, 0, 3, 0, 3, 4, 0, 3, 3, 3, 3, 3, 3, 0, 3, 3, 0, 3,
       3, 3, 0, 0, 2, 0, 0, 3, 0, 3, 3, 0, 0, 3, 3, 3, 3, 4, 3, 3, 3, 0,
       3, 3, 3, 0, 3, 0, 2, 1, 0, 1, 3, 0, 0, 3, 0, 0, 3, 0, 3, 3, 0, 2,
       0, 0, 3, 0, 3, 3, 0, 2, 3, 3, 3, 3, 3, 0, 3, 3, 4, 3, 3, 3, 3, 3,
       3, 3, 0, 0, 0, 0, 0, 3, 0, 0, 3, 0, 3, 0, 0, 2, 0, 3, 3, 0, 0, 2,
       0, 1, 0, 3, 0, 0, 3, 0, 0, 0, 0, 3, 3, 0, 3, 0, 2, 3, 0, 4, 0, 1,
       0, 3, 4, 3, 3, 0, 0, 3, 3, 3, 0, 3, 1, 3, 3, 2, 0, 0, 3, 3, 0, 1,
       3, 3, 0, 3, 0, 0, 0, 0, 3, 0, 3, 0, 2, 4, 0, 0, 3, 3, 0, 0, 0, 3,
       0, 0, 3, 3, 3, 3, 0, 3, 3, 3, 0, 3, 3, 4, 0, 0, 2, 0, 2, 1, 4, 0,
       3, 0, 0, 3, 3, 3, 3, 0, 4, 3, 3, 3, 2, 0, 3, 0, 0, 1, 3, 0, 0, 3,
       0, 0, 3, 4, 3, 1, 3, 0, 0, 3, 3, 0, 1, 2, 3, 3, 0, 0, 0, 0, 0, 0,
       4, 0, 3, 3, 3, 0, 3, 3, 0, 0, 3, 3, 2, 3, 3, 0, 0, 3, 1, 2, 3, 3,
       0, 0, 0, 3, 3, 0, 0, 3, 3, 3, 3, 3, 3, 0, 4, 0, 3, 3, 1, 2, 3, 3,
       0, 3, 0, 3, 0, 0, 2, 2, 0, 0, 2, 3, 3, 2, 0,