# Sentiment analysis : word features

# Training process

### import required modules

In [1]:
import csv
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer

import string
from nltk.corpus import stopwords as sw
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize

### define some variables

In [2]:
max_features = 300
vectorizer = TfidfVectorizer(max_features=max_features)
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()
stopwords = sw.words('english')

### function to clean the word

In [3]:
def process_word(word):
    word = lemmatizer.lemmatize(word)
    word = stemmer.stem(word)
    return word

### function to process a sentence given in string format

In [4]:
def clean_sentence(sentence):
    sentence = sentence.translate(str.maketrans('', '', string.punctuation))
    sentence = sentence.lower()
    words = word_tokenize(sentence)
    words = list(filter(lambda word: word not in stopwords, words))
    words = list(map(process_word, words))
    return ' '.join(words)

### function to clean the sentence

In [5]:
def clean(row):
    sentence = row['content']
    return clean_sentence(sentence)

### function to get the data in the form of a dataframe

In [6]:
def get_dataframe(filename):
    raw_data = dict()
    reader = csv.reader(open('mini_data.csv'))
    headers = list(reader.__next__())
    for header in headers:
        raw_data[header] = list()
    for row in reader:
        raw_data['sentiment'].append(row[0])
        raw_data['content'].append(','.join(row[1:]))
    df = pd.DataFrame(raw_data)
    return df

### extract word features

In [7]:
data = get_dataframe('data.csv')
data['clean_content'] = data.apply(clean, axis=1)
feature = np.array(vectorizer.fit_transform(data['clean_content']).toarray())
print("length:", feature[0].size)
print(feature[0])

length: 300
[0.         0.         0.         0.         0.27296524 0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.5110803  0.         0.
 0.         0.         0.         0.         0.         0.23098085
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.27296524 0.         0.         0.44328769
 0.         0.         0.         0.         0.         0.
 0.         0.         0.17965945 0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.   

### sentiment one hot encoding function

In [8]:
def sentiment_encode(sentiment):
    def encode(row):
        return 1 if row['sentiment'] == sentiment else 0
    return encode

### one hot encode the data

In [40]:
for sentiment in set(data['sentiment']):
    encoder = sentiment_encode(sentiment)
    data[sentiment] = data.apply(encoder, axis=1)
data

Unnamed: 0,content,sentiment,clean_content,fear,disgust,guilt,anger,joy,shame,sadness
0,On days when I feel close to my partner and ot...,joy,day feel close partner friend feel peac also e...,0,0,0,0,1,0,0
1,Every time I imagine that someone I love or I ...,fear,everi time imagin someon love could contact se...,1,0,0,0,0,0,0
2,When I had been obviously unjustly treated and...,anger,obvious unjustli treat possibl elucid,0,0,0,1,0,0,0
3,When I think about the short time that we live...,sadness,think short time live relat period life think ...,0,0,0,0,0,0,1
4,At a gathering I found myself involuntarily si...,disgust,gather found involuntarili sit next two peopl ...,0,1,0,0,0,0,0
5,When I realized that I was directing the feeli...,shame,realiz direct feel discont partner way tri put...,0,0,0,0,0,1,0
6,I feel guilty when when I realize that I consi...,guilt,feel guilti realiz consid materi thing import ...,0,0,1,0,0,0,0
7,After my girlfriend had taken her exam we went...,joy,girlfriend taken exam went parent place,0,0,0,0,1,0,0
8,"When, for the first time I realized the meanin...",fear,first time realiz mean death,1,0,0,0,0,0,0
9,When a car is overtaking another and I am forc...,anger,car overtak anoth forc drive road,0,0,0,1,0,0,0


 # Training with just the word features

In [23]:
from keras.models import Sequential 
from keras.layers import LSTM, Dense
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence

## Making the keras model picklable

In [25]:
### source : http://zachmoshe.com/2017/04/03/pickling-keras-models.html
import types
import tempfile
import keras.models
import h5py

def make_keras_picklable():
    def __getstate__(self):
        model_str = ""
        with tempfile.NamedTemporaryFile(suffix='.hdf5', delete=True) as fd:
            import h5py
            keras.models.save_model(self, fd.name, overwrite=True)
            model_str = fd.read()
        d = { 'model_str': model_str }
        return d

    def __setstate__(self, state):
        with tempfile.NamedTemporaryFile(suffix='.hdf5', delete=True) as fd:
            fd.write(state['model_str'])
            fd.flush()
            model = keras.models.load_model(fd.name)
        self.__dict__ = model.__dict__


    cls = keras.models.Model
    cls.__getstate__ = __getstate__
    cls.__setstate__ = __setstate__
make_keras_picklable()

In [26]:
def model_structure(label='default', random_seed=0, embedding_vector_length=128, max_features=300, 
                     n_units=60, activation='sigmoid', loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']):
    np.random.seed(0)
    model = Sequential()
    embedding_vector_length = embedding_vector_length
    model.add(Embedding(max_features, embedding_vector_length, input_length=max_features))
    model.add(LSTM(n_units))
    model.add(Dense(1, activation=activation))
    model.compile(loss=loss, optimizer=optimizer, metrics=metrics)
    print(label.upper())
    print(model.summary())
    print("-------------------------------------------------------------------------------------------------")
    return model

In [27]:
models = dict()
for sentiment in set(data['sentiment']):
    models[sentiment] = model_structure(sentiment)

FEAR
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_8 (Embedding)      (None, 300, 128)          38400     
_________________________________________________________________
lstm_8 (LSTM)                (None, 60)                45360     
_________________________________________________________________
dense_8 (Dense)              (None, 1)                 61        
Total params: 83,821
Trainable params: 83,821
Non-trainable params: 0
_________________________________________________________________
None
-------------------------------------------------------------------------------------------------
DISGUST
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_9 (Embedding)      (None, 300, 128)          38400     
_________________________________________________________________
lstm_9 (LSTM)         

# THE TRAINING PHASE

In [28]:
models['disgust'].fit(feature, data['disgust'], epochs=1, batch_size=64)

Epoch 1/1


<keras.callbacks.History at 0x11eed8d30>

In [29]:
models['shame'].fit(feature, data['shame'], epochs=1, batch_size=64)

Epoch 1/1


<keras.callbacks.History at 0x12981c7f0>

In [30]:
models['fear'].fit(feature, data['fear'], epochs=1, batch_size=64)

Epoch 1/1


<keras.callbacks.History at 0x12a8f3518>

In [31]:
models['joy'].fit(feature, data['joy'], epochs=1, batch_size=64)

Epoch 1/1


<keras.callbacks.History at 0x1276deeb8>

In [32]:
models['sadness'].fit(feature, data['sadness'], epochs=1, batch_size=64)

Epoch 1/1


<keras.callbacks.History at 0x1298b9518>

In [33]:
models['anger'].fit(feature, data['anger'], epochs=1, batch_size=64)

Epoch 1/1


<keras.callbacks.History at 0x12a8ba898>

In [34]:
models['guilt'].fit(feature, data['guilt'], epochs=1, batch_size=64)

Epoch 1/1


<keras.callbacks.History at 0x12ceb2da0>

# SERIALIZE THEM INTO THE FILE

In [46]:
import pickle
import os
os.mkdir("objects")
for sentiment in set(data['sentiment']):
    pickle.dump(models[sentiment], open("objects/"+sentiment+"_model.pickle","wb"))
pickle.dump(vectorizer, open("objects/vectorizer.pickle", "wb"))