### Example 01

In [2]:
import numpy as np
from numpy import argmax
from tensorflow.keras.utils import to_categorical

data = [1, 3, 2, 0, 3, 2, 2, 1, 0, 1]
data = np.array(data)

encoded = to_categorical(data)

encoded

# 1 = [0., 1., 0., 0.]
# 3 = [0., 0., 0., 1.]
# 2 = [0., 0., 1., 0.]
# 0 = [1., 0., 0., 0.]



array([[0., 1., 0., 0.],
       [0., 0., 0., 1.],
       [0., 0., 1., 0.],
       [1., 0., 0., 0.],
       [0., 0., 0., 1.],
       [0., 0., 1., 0.],
       [0., 0., 1., 0.],
       [0., 1., 0., 0.],
       [1., 0., 0., 0.],
       [0., 1., 0., 0.]], dtype=float32)

### Example 02

In [5]:
from tensorflow.keras.utils import to_categorical

y = [1,2,3,4,5]

to_categorical(y, num_classes=10)

array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]], dtype=float32)

### Example 03

In [3]:
import pandas as pd

train = [("Thanks for an excellent report", "pos"),
         ("Your service is very quick and fast", "pos"),
        ("I am pleased with your service", "pos"),
        ("I did not know i was diabetic until you gave me this report", "neg"),
        ("Service - Little slow, probably because too many people.", "neg"),
        ("The place is not easy to locate", "neg"),
        ("The place is very easy to locate", "pos"),
        ("Not satisfied will take a second opinion", "neg"),
        ("No human contact everything is so robotic here", "neg")]

df = pd.DataFrame(train,columns=['review','sentiment'])

df.head()

Unnamed: 0,review,sentiment
0,Thanks for an excellent report,pos
1,Your service is very quick and fast,pos
2,I am pleased with your service,pos
3,I did not know i was diabetic until you gave m...,neg
4,"Service - Little slow, probably because too ma...",neg


In [8]:
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

stopwords = stopwords.words('english')

In [9]:
data = list(df['review'].values)

data

['Thanks for an excellent report',
 'Your service is very quick and fast',
 'I am pleased with your service',
 'I did not know i was diabetic until you gave me this report',
 'Service - Little slow, probably because too many people.',
 'The place is not easy to locate',
 'The place is very easy to locate',
 'Not satisfied will take a second opinion',
 'No human contact everything is so robotic here']

In [10]:
data_token = [word_tokenize(x.lower()) for x in data]
clean_data = []

for sent in data_token:
    print([x for x in sent if (x not in stopwords and x not in "-.,")])
    clean_data.append([x for x in sent if (x not in stopwords and x not in "-.,")])

['thanks', 'excellent', 'report']
['service', 'quick', 'fast']
['pleased', 'service']
['know', 'diabetic', 'gave', 'report']
['service', 'little', 'slow', 'probably', 'many', 'people']
['place', 'easy', 'locate']
['place', 'easy', 'locate']
['satisfied', 'take', 'second', 'opinion']
['human', 'contact', 'everything', 'robotic']


In [14]:
clean_data

[['thanks', 'excellent', 'report'],
 ['service', 'quick', 'fast'],
 ['pleased', 'service'],
 ['know', 'diabetic', 'gave', 'report'],
 ['service', 'little', 'slow', 'probably', 'many', 'people'],
 ['place', 'easy', 'locate'],
 ['place', 'easy', 'locate'],
 ['satisfied', 'take', 'second', 'opinion'],
 ['human', 'contact', 'everything', 'robotic']]

In [53]:
vocabs = list(set([val for sublist in clean_data for val in sublist]))

# Get Word2Id
word2id={}

i = 1
for vocab in vocabs:
    case = {vocab:i}
    word2id.update(case)
    i= i+1
    
print(word2id)

id2word = {v:k for k, v in word2id.items()}

print("\n")
print(id2word)

print("\n")
wids = [[word2id[y.lower()] for y in x] for x in clean_data]

wids

{'quick': 1, 'excellent': 2, 'know': 3, 'probably': 4, 'report': 5, 'slow': 6, 'second': 7, 'diabetic': 8, 'fast': 9, 'locate': 10, 'satisfied': 11, 'thanks': 12, 'little': 13, 'service': 14, 'contact': 15, 'human': 16, 'easy': 17, 'place': 18, 'take': 19, 'robotic': 20, 'gave': 21, 'people': 22, 'everything': 23, 'many': 24, 'opinion': 25, 'pleased': 26}


{1: 'quick', 2: 'excellent', 3: 'know', 4: 'probably', 5: 'report', 6: 'slow', 7: 'second', 8: 'diabetic', 9: 'fast', 10: 'locate', 11: 'satisfied', 12: 'thanks', 13: 'little', 14: 'service', 15: 'contact', 16: 'human', 17: 'easy', 18: 'place', 19: 'take', 20: 'robotic', 21: 'gave', 22: 'people', 23: 'everything', 24: 'many', 25: 'opinion', 26: 'pleased'}




[[12, 2, 5],
 [14, 1, 9],
 [26, 14],
 [3, 8, 21, 5],
 [14, 13, 6, 4, 24, 22],
 [18, 17, 10],
 [18, 17, 10],
 [11, 19, 7, 25],
 [16, 15, 23, 20]]

In [72]:
from tensorflow.keras.preprocessing import sequence

x = sequence.pad_sequences(wids, maxlen=6)

print(x.shape)  # 9: Senetnces 6: max Lengt of Senetnce after cleanup

x

(9, 6)


array([[ 0,  0,  0, 12,  2,  5],
       [ 0,  0,  0, 14,  1,  9],
       [ 0,  0,  0,  0, 26, 14],
       [ 0,  0,  3,  8, 21,  5],
       [14, 13,  6,  4, 24, 22],
       [ 0,  0,  0, 18, 17, 10],
       [ 0,  0,  0, 18, 17, 10],
       [ 0,  0, 11, 19,  7, 25],
       [ 0,  0, 16, 15, 23, 20]])

In [69]:
print("\n")

one_hot_arry = to_categorical(x)

print("One Hot Encoding of input:")
print(one_hot_arry)



One Hot Encoding of input:
[[[1. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 1. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[1. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 1. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[1. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 1.]
  [0. 0. 0. ... 0. 0. 0.]]

 ...

 [[1. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[1. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 1. 0.]]

 [[1. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]]
