# Sentiment analysis : word features

### import required modules

In [35]:
import string
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords as sw
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer

import csv
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

### define some variables

In [43]:
stopwords = sw.words('english')
frequency_of_words = dict()
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()

### function to get the data in the form of a dataframe

In [57]:
def get_dataframe(filename):
    raw_data = dict()
    reader = csv.reader(open('data.csv'))
    headers = list(reader.__next__())
    for header in headers:
        raw_data[header] = list()
    for row in reader:
        raw_data['sentiment'].append(row[0])
        raw_data['content'].append(','.join(row[1:]))
    df = pd.DataFrame(raw_data)
    return df

print(get_dataframe('data.csv'))

                                                content sentiment
0     On days when I feel close to my partner and ot...       joy
1     Every time I imagine that someone I love or I ...      fear
2     When I had been obviously unjustly treated and...     anger
3     When I think about the short time that we live...   sadness
4     At a gathering I found myself involuntarily si...   disgust
5     When I realized that I was directing the feeli...     shame
6     I feel guilty when when I realize that I consi...     guilt
7     After my girlfriend had taken her exam we went...       joy
8     When, for the first time I realized the meanin...      fear
9     When a car is overtaking another and I am forc...     anger
10    When I recently thought about the hard work it...   sadness
11      When I found a bristle in the liver paste tube.   disgust
12    When I was tired and unmotivated, I shouted at...     shame
13    When I think that I do not study enough.  Afte...     guilt
14    When

### function to process a word

In [45]:
def process_word(word):
    word = lemmatizer.lemmatize(word)
    word = stemmer.stem(word)
    return word

### function to process a sentence

In [46]:
def process_sentence(sentence):
    sentence = sentence.translate(str.maketrans('', '', string.punctuation))
    sentence = sentence.lower()
    words = word_tokenize(sentence)
    words = list(filter(lambda word: word not in stopwords, words))
    words = list(map(process_word, words))
    return words

### function to update the bag of words

In [47]:
def update_words_list(sentiment, words):
    for word in words:
        if word not in frequency_of_words.keys():
            frequency_of_words[word] = 1
        else:
            frequency_of_words[word] += 1

In [48]:
data = get_dataframe('data.csv')
processed = list()
for index, datum in data.iterrows():
    processed.append(process_sentence(datum['content']))
    update_words_list(datum['sentiment'], datum['content'])

In [49]:
processed

[['everi',
  'time',
  'imagin',
  'someon',
  'love',
  'could',
  'contact',
  'seriou',
  'ill'],
 ['obvious', 'unjustli', 'treat', 'possibl', 'elucid'],
 ['think',
  'short',
  'time',
  'live',
  'relat',
  'period',
  'life',
  'think',
  'use',
  'short',
  'time'],
 ['gather',
  'found',
  'involuntarili',
  'sit',
  'next',
  'two',
  'peopl',
  'express',
  'opinion',
  'consid',
  'low',
  'discrimin'],
 ['realiz',
  'direct',
  'feel',
  'discont',
  'partner',
  'way',
  'tri',
  'put',
  'blame',
  'instead',
  'sort',
  'feeli'],
 ['feel',
  'guilti',
  'realiz',
  'consid',
  'materi',
  'thing',
  'import',
  'care',
  'rel',
  'feel',
  'selfcent'],
 ['girlfriend', 'taken', 'exam', 'went', 'parent', 'place'],
 [],
 ['car', 'overtak', 'anoth', 'forc', 'drive', 'road'],
 ['recent', 'thought', 'hard', 'work', 'take', 'studi'],
 ['found', 'bristl', 'liver', 'past', 'tube'],
 ['tire', 'unmotiv'],
 ['think',
  'studi',
  'enough',
  'weekend',
  'think',
  'abl',
  'accompl