# Sentiment Classification for Yelp Restaurant Reviews using CNN in PyTorch
- For article [Click Here](https://towardsdatascience.com/sentiment-classification-using-cnn-in-pytorch-fba3c6840430)

1. Generate Word2Vec model and save it plus KeyedVectors (weights)
2. Create input tensor which has the index from Word2Vec model as the representer of each word plus pad token index for empty places

In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:87% !important; }</style>"))

In [2]:
import pandas as pd

In [3]:
top_data_df = pd.read_csv('yelp_review.csv')
print("Columns in the original dataset:\n")
print(top_data_df.columns)
len(top_data_df)

Columns in the original dataset:

Index(['review_id', 'user_id', 'business_id', 'stars', 'date', 'text',
       'useful', 'funny', 'cool'],
      dtype='object')


5261668

### After the data is available, mapping from stars to sentiment is done and distribution for each sentiment is plotted. 

In [4]:
import matplotlib.pyplot as plt 
plt.style.use('dark_background')

print("Number of rows per star rating:")
print(top_data_df['stars'].value_counts())

# Function to map stars to sentiment
def map_sentiment(stars_received):
    if stars_received <= 2:
        return -1
    elif stars_received == 3:
        return 0
    else:
        return 1
# Mapping stars to sentiment into three categories
top_data_df['sentiment'] = [ map_sentiment(x) for x in top_data_df['stars']]
# Plotting the sentiment distribution
plt.figure()
pd.value_counts(top_data_df['sentiment']).plot.bar(title="Sentiment distribution in df")
plt.xlabel("Sentiment")
plt.ylabel("No. of rows in df")
plt.show()

Number of rows per star rating:
5    2253347
4    1223316
1     731363
3     615481
2     438161
Name: stars, dtype: int64


<Figure size 640x480 with 1 Axes>

### Positive : 1
### Negative: -1
### Neutral: 0

### let's create a samller subsample of the original dataframe

In [5]:
top_data_df_small = pd.DataFrame.sample(top_data_df, frac = 0.1).reset_index()
top_data_df_small

Unnamed: 0,index,review_id,user_id,business_id,stars,date,text,useful,funny,cool,sentiment
0,2189500,3ZC0Bn3XvJYGs9FUJpt0og,7gL0foPFp3Nui3K7PTWPtw,2HDL09kYFltw77X2G-Kwyg,5,2016-11-15,Kirby and the guys treat you like family. Best...,1,1,1,1
1,2170885,KmPbtiESkhnSo6NItOiZig,QwPCdaMMEoxcqhN3DN_4WA,BcW7Z9lPmOB_8eS2lEuOqQ,1,2016-08-05,Too bad I cannot give a negative star because ...,1,2,0,-1
2,2366333,XTPmmCh4DzNgK3P8sDgCxQ,DzyBToexbfik0zt7ppky-Q,HutsUcVbARYK_ye7lGtKhg,3,2013-01-09,Since I am always trying to find the perfect D...,6,5,2,0
3,2951344,_gld1nMMzXJLllZFwz84-Q,fHR3NigJcqU03HX-z3WOFg,ciMCFExQ2_yoBsdkDSWOlw,5,2017-01-31,Excellent doctor! Excellent treatment plan and...,0,0,0,1
4,326772,4mYUxEWaLRGeexkDyVs7zg,YBJVuZ4gTMLV3DBjZ16s9A,KSUpHqi0pu-rpouVs-KVdA,3,2014-11-14,Rien de spectaculaire! Service courtois mais s...,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
526162,2782542,gaN4m3GV0c1cjl3Nb77qPQ,F3dKpfp0EpxkL-rDZSKzvA,0v8icS8wOOgEDiHDCOQkZQ,5,2015-11-17,Great Chili chicken\n\nHad the dry with noodle...,1,1,1,1
526163,5238642,m8_g2ApN2DUDs3_rDtawEQ,33DAuLLsKBlfGw7QsTHK5Q,T7yKffTuhiIZclsgjfgowA,5,2016-01-09,Great atmosphere and food. Try the meatball sa...,0,0,0,1
526164,1098467,vnhnvNxAT4lKosbQohyZOw,HKPdEatcYoEfOL8MEKsWOQ,QbbpMJiSU4M5g3x-q-lLuQ,5,2016-03-04,Just moved to Charlotte in September '15 and j...,0,0,0,1
526165,791452,cNThdWKI50ah46I58mLISw,mcq1qdkjI7M-E1BXeFXstg,JG9UpsFR6hrqQqsKh_RyaQ,3,2017-03-01,Been wanting to try Planta for a long time aft...,0,0,0,0


In [6]:
#top_data_df_small.reset_index(inplace = True, drop = True)
top_data_df_small.drop(columns = 'index', inplace=True)
top_data_df_small.head()

Unnamed: 0,review_id,user_id,business_id,stars,date,text,useful,funny,cool,sentiment
0,3ZC0Bn3XvJYGs9FUJpt0og,7gL0foPFp3Nui3K7PTWPtw,2HDL09kYFltw77X2G-Kwyg,5,2016-11-15,Kirby and the guys treat you like family. Best...,1,1,1,1
1,KmPbtiESkhnSo6NItOiZig,QwPCdaMMEoxcqhN3DN_4WA,BcW7Z9lPmOB_8eS2lEuOqQ,1,2016-08-05,Too bad I cannot give a negative star because ...,1,2,0,-1
2,XTPmmCh4DzNgK3P8sDgCxQ,DzyBToexbfik0zt7ppky-Q,HutsUcVbARYK_ye7lGtKhg,3,2013-01-09,Since I am always trying to find the perfect D...,6,5,2,0
3,_gld1nMMzXJLllZFwz84-Q,fHR3NigJcqU03HX-z3WOFg,ciMCFExQ2_yoBsdkDSWOlw,5,2017-01-31,Excellent doctor! Excellent treatment plan and...,0,0,0,1
4,4mYUxEWaLRGeexkDyVs7zg,YBJVuZ4gTMLV3DBjZ16s9A,KSUpHqi0pu-rpouVs-KVdA,3,2014-11-14,Rien de spectaculaire! Service courtois mais s...,1,0,0,0


### Preprocessing the data


In [7]:
from gensim.parsing.preprocessing import remove_stopwords

In [8]:
from gensim.utils import simple_preprocess
# Tokenize the text column to get the new column 'tokenized_text'
top_data_df_small['tokenized_text'] = [simple_preprocess(line, deacc=True) for line in top_data_df_small['text']] 
print(top_data_df_small['tokenized_text'].head(10))

0    [kirby, and, the, guys, treat, you, like, fami...
1    [too, bad, cannot, give, negative, star, becau...
2    [since, am, always, trying, to, find, the, per...
3    [excellent, doctor, excellent, treatment, plan...
4    [rien, de, spectaculaire, service, courtois, m...
5    [this, place, has, good, sushi, however, the, ...
6    [by, far, the, worst, boston, pizza, ever, bas...
7    [attended, young, professional, group, event, ...
8    [excellent, bbq, been, going, here, for, about...
9    [came, back, here, to, bring, my, sister, and,...
Name: tokenized_text, dtype: object


In [9]:
# now we have a new column: 'tokenized_text'
top_data_df_small.head()

Unnamed: 0,review_id,user_id,business_id,stars,date,text,useful,funny,cool,sentiment,tokenized_text
0,3ZC0Bn3XvJYGs9FUJpt0og,7gL0foPFp3Nui3K7PTWPtw,2HDL09kYFltw77X2G-Kwyg,5,2016-11-15,Kirby and the guys treat you like family. Best...,1,1,1,1,"[kirby, and, the, guys, treat, you, like, fami..."
1,KmPbtiESkhnSo6NItOiZig,QwPCdaMMEoxcqhN3DN_4WA,BcW7Z9lPmOB_8eS2lEuOqQ,1,2016-08-05,Too bad I cannot give a negative star because ...,1,2,0,-1,"[too, bad, cannot, give, negative, star, becau..."
2,XTPmmCh4DzNgK3P8sDgCxQ,DzyBToexbfik0zt7ppky-Q,HutsUcVbARYK_ye7lGtKhg,3,2013-01-09,Since I am always trying to find the perfect D...,6,5,2,0,"[since, am, always, trying, to, find, the, per..."
3,_gld1nMMzXJLllZFwz84-Q,fHR3NigJcqU03HX-z3WOFg,ciMCFExQ2_yoBsdkDSWOlw,5,2017-01-31,Excellent doctor! Excellent treatment plan and...,0,0,0,1,"[excellent, doctor, excellent, treatment, plan..."
4,4mYUxEWaLRGeexkDyVs7zg,YBJVuZ4gTMLV3DBjZ16s9A,KSUpHqi0pu-rpouVs-KVdA,3,2014-11-14,Rien de spectaculaire! Service courtois mais s...,1,0,0,0,"[rien, de, spectaculaire, service, courtois, m..."


In [10]:
from gensim.parsing.porter import PorterStemmer
porter_stemmer = PorterStemmer()
# Get the stemmed_tokens
top_data_df_small['stemmed_tokens'] = [[porter_stemmer.stem(word) for word in tokens] for tokens in top_data_df_small['tokenized_text'] ]
top_data_df_small['stemmed_tokens'].head(10)


0    [kirbi, and, the, gui, treat, you, like, famil...
1    [too, bad, cannot, give, neg, star, becaus, th...
2    [sinc, am, alwai, try, to, find, the, perfect,...
3    [excel, doctor, excel, treatment, plan, and, v...
4    [rien, de, spectaculair, servic, courtoi, mai,...
5    [thi, place, ha, good, sushi, howev, the, serv...
6    [by, far, the, worst, boston, pizza, ever, bas...
7    [attend, young, profession, group, event, here...
8    [excel, bbq, been, go, here, for, about, year,...
9    [came, back, here, to, bring, my, sister, and,...
Name: stemmed_tokens, dtype: object

In [11]:
# now we have a new column: 'stemmed_tokens'
top_data_df_small.head()

Unnamed: 0,review_id,user_id,business_id,stars,date,text,useful,funny,cool,sentiment,tokenized_text,stemmed_tokens
0,3ZC0Bn3XvJYGs9FUJpt0og,7gL0foPFp3Nui3K7PTWPtw,2HDL09kYFltw77X2G-Kwyg,5,2016-11-15,Kirby and the guys treat you like family. Best...,1,1,1,1,"[kirby, and, the, guys, treat, you, like, fami...","[kirbi, and, the, gui, treat, you, like, famil..."
1,KmPbtiESkhnSo6NItOiZig,QwPCdaMMEoxcqhN3DN_4WA,BcW7Z9lPmOB_8eS2lEuOqQ,1,2016-08-05,Too bad I cannot give a negative star because ...,1,2,0,-1,"[too, bad, cannot, give, negative, star, becau...","[too, bad, cannot, give, neg, star, becaus, th..."
2,XTPmmCh4DzNgK3P8sDgCxQ,DzyBToexbfik0zt7ppky-Q,HutsUcVbARYK_ye7lGtKhg,3,2013-01-09,Since I am always trying to find the perfect D...,6,5,2,0,"[since, am, always, trying, to, find, the, per...","[sinc, am, alwai, try, to, find, the, perfect,..."
3,_gld1nMMzXJLllZFwz84-Q,fHR3NigJcqU03HX-z3WOFg,ciMCFExQ2_yoBsdkDSWOlw,5,2017-01-31,Excellent doctor! Excellent treatment plan and...,0,0,0,1,"[excellent, doctor, excellent, treatment, plan...","[excel, doctor, excel, treatment, plan, and, v..."
4,4mYUxEWaLRGeexkDyVs7zg,YBJVuZ4gTMLV3DBjZ16s9A,KSUpHqi0pu-rpouVs-KVdA,3,2014-11-14,Rien de spectaculaire! Service courtois mais s...,1,0,0,0,"[rien, de, spectaculaire, service, courtois, m...","[rien, de, spectaculair, servic, courtoi, mai,..."


In [12]:
top_data_df_small.to_csv('top_data_df_small')

### Splitting into Train and Test Sets

In [13]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(top_data_df_small, stratify = top_data_df_small.stars, test_size = 0.3, random_state = 42)

In [14]:
# set new indices for both dataframes and drop the previus indices
train.reset_index(drop=True), test.reset_index(drop = True)
len(train), len(test)

(368316, 157851)

In [15]:
train.to_csv('yelp_reviews_train.csv', index = False)
test.to_csv('yelp_reviews_test.csv', index = False)

## Start from here

In [16]:
# top_data_df_small = pd.read_csv('top_data_df_small')
top_data_df_small = pd.read_pickle("./top_data_df_small.pkl")
top_data_df_small.head()

Unnamed: 0,review_id,user_id,business_id,stars,date,text,useful,funny,cool,sentiment,stemmed_tokens
0,m5jjU8KhAPmDSa5BIopIqw,F9vYcUknd9JY2lxsaEObQQ,T6ihfy4SYiF4PvuE6Y0VPA,3,2015-01-29,Airport Wendy's. You curbed my hunger. That wa...,1,2,2,0,"[airport, wendy, curbed, hunger, needed, fries..."
1,pCURaqs8o9kCOl6fEVcsKA,H5d_nFqzwrREE-YduK2ABg,fPpO5751xJI78__uTU2q7g,5,2008-01-13,I stumbled across this store on my way to Nest...,19,6,13,1,"[stumbled, across, store, way, nest, right, ne..."
2,2C8Gr_EX_gVTlJsobcey6w,xycmBfvZtDX9Bao9kwNQCw,sdE4iWulUozJXOxzQ5Bjhw,3,2016-05-22,Pizza was decent. Very disappointed in the del...,0,3,0,0,"[pizza, decent, disappointed, delivery, told, ..."
3,JWwPv1cIS0YfiQrKtcL9nA,dccateTjyakPfsWd5U0wsQ,K6fYrrTorlpXmqutRcrHzg,3,2010-01-15,My first time: the bartenders were so cute [an...,3,1,1,0,"[first, time, bartenders, cute, happy, second,..."
4,xW3umQlqu00xiu9UgkBDHw,OvpTIjhGpg2y2kklHa47NQ,Jt28TYWanzKrJYYr0Tf1MQ,3,2014-12-11,I was in las vegas staying at the Paris hotel ...,0,0,2,0,"[las, vegas, staying, paris, hotel, sisters, b..."


In [17]:
len(top_data_df_small)

52617

In [18]:
train = pd.read_pickle("./yelp_reviews_train.pkl")
test = pd.read_pickle("./yelp_reviews_test.pkl")

In [19]:
len(train), len(test)

(36831, 15786)

### Convolutional Neural Network for Text Classification

- These layers are used to find patterns by sliding small kernel window over input. Instead of multiplying the filters on the small regions of the images, it slides through embedding vectors of few words as mentioned by window size. 
- For looking at sequences of word embeddings, the window has to look at multiple word embeddings in a sequence. They will be rectangular with size window_size * embedding_size. For example, in our case if window size is 3 and embedding size is 500, then kernel will be 3*500. This essentially represents n-grams in the model.
- The kernel weights (filter) are multiplied to word embeddings in pairs and summed up to get output values. As the network is being learned, these kernel weights are also being learned

![Conv Filter](https://miro.medium.com/max/626/1*A094Vuq3OiLFVD2ogxUS7Q.gif "chess")

### Input and output channels for Convolutional

 - We are feeding only one feature i.e. word embedding so the first parameter for conv2d is 1 (like grayscale images) and output_channels is total number of features which will be NUM_FILTERS.

### Maxpooing

 - Once we have the feature vector and it has extracted the significant features, it is enough to know that it exists in sentence like some positive phrase “great food” and it does not matter where it appears in the sentence. 
 
- Maxpooling is used to just get that information and discard the rest of it. For example, in the above animation the feature vector we had, after applying maxpooling, the max value will be chosen. In the above case it shows max when very and delicious are in the phrase, which makes sense.

In [20]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torch
# Use cuda if present
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device available for running: ")
print(device)

Device available for running: 
cuda


### Generating input and output tensor

- We're going to use `Doc2Vec` by gensim
- All review will be converted to a tensor of fixed length


In [21]:
%%capture
!pip install sent2vec
import nltk
nltk.download('punkt')
from scipy import spatial
from sent2vec.vectorizer import Vectorizer
from sent2vec.splitter import Splitter
from nltk.tokenize import word_tokenize

### Doc2Vec

In [22]:
top_data_df_small.stemmed_tokens[:10]

0    [airport, wendy, curbed, hunger, needed, fries...
1    [stumbled, across, store, way, nest, right, ne...
2    [pizza, decent, disappointed, delivery, told, ...
3    [first, time, bartenders, cute, happy, second,...
4    [las, vegas, staying, paris, hotel, sisters, b...
5    [friend, stopped, around, saturday, 5th, waiti...
6    [looking, rolex, stolen, spa, sunday, july, ma...
7    [awesome, office, friendly, staff, come, see, ...
8    [lunch, meeting, took, find, place, still, mor...
9    [awesome, one, favorite, stops, work, came, hu...
Name: stemmed_tokens, dtype: object

In [24]:
from gensim.test.utils import common_texts
from gensim.models.doc2vec import Doc2Vec, TaggedDocument

#words_list is a list of list (list of all reveiews where each review is convetred into tokens) + 'pad' token as a seperate review
words_list = [x for x in top_data_df_small['stemmed_tokens']]
docs = [TaggedDocument(doc, [i]) for i, doc in enumerate(words_list)] # tagged Documents
print(f"number of ducuments in the data is: {len(docs)}")
docs[:5]

number of ducuments in the data is: 52617


[TaggedDocument(words=['airport', 'wendy', 'curbed', 'hunger', 'needed', 'fries', 'tuuuurible', 'loaded', 'salt', 'pepper', 'dipped', 'away', 'ketchup', 'wendy', 'probably', 'tables', 'get', 'tight', 'place', 'go', 'eat', 'gate', 'tables', 'gate', 'became', 'awkward', 'eating', 'fries', 'bag', 'ground', 'balancing', 'soda', 'legs', 'holding', 'book', 'one', 'hand', 'burger', 'yes', 'would', 'like', 'tables', 'airport', 'problem', 'wendy', 'also', 'reminded', 'always', 'better', 'mcdonald', 'burger', 'king', 'okay', 'book', 'wendy'], tags=[0]),
 TaggedDocument(words=['stumbled', 'across', 'store', 'way', 'nest', 'right', 'next', 'door', 'love', 'love', 'love', 'furniture', 'expensive', 'high', 'quality', 'traditional', 'store', 'small', 'jammed', 'packed', 'full', 'furniture', 'fine', 'accessories', 'looking', 'classic', 'design', 'floor', 'lamp', 'real', 'impressive', 'designs', 'also', 'liked', 'several', 'wall', 'mirrors', 'well', 'buffet', 'lamps', 'nice', 'artwork', 'also', 'employ

In [27]:
import gensim 

model = gensim.models.doc2vec.Doc2Vec(docs, vector_size = 300, min_count=1, epoch = 30)
type(model)

gensim.models.doc2vec.Doc2Vec

In [28]:
model.train(docs, total_examples=model.corpus_count, epochs=model.epochs)

In [29]:
#Doc vector: note here we use the original document tokens, not the tagged version
rev = words_list[0]
print(rev)
print()
print(model.infer_vector(rev).shape)
model.infer_vector(rev)

['airport', 'wendy', 'curbed', 'hunger', 'needed', 'fries', 'tuuuurible', 'loaded', 'salt', 'pepper', 'dipped', 'away', 'ketchup', 'wendy', 'probably', 'tables', 'get', 'tight', 'place', 'go', 'eat', 'gate', 'tables', 'gate', 'became', 'awkward', 'eating', 'fries', 'bag', 'ground', 'balancing', 'soda', 'legs', 'holding', 'book', 'one', 'hand', 'burger', 'yes', 'would', 'like', 'tables', 'airport', 'problem', 'wendy', 'also', 'reminded', 'always', 'better', 'mcdonald', 'burger', 'king', 'okay', 'book', 'wendy']

(300,)


array([-1.70102865e-02, -3.28667648e-02, -7.58062378e-02,  1.86999977e-01,
       -5.71061596e-02, -1.41477957e-01, -4.36102338e-02,  5.08234650e-03,
       -7.32778981e-02,  3.09163854e-02,  1.22178808e-01,  5.15694544e-02,
       -4.01053727e-02, -1.39332920e-01,  1.43719926e-01,  7.37304911e-02,
       -5.61312586e-02, -2.10011913e-03, -2.19460186e-02, -5.31443954e-02,
       -2.36852542e-01,  7.19503239e-02, -6.75014630e-02, -5.50109968e-02,
        7.50751719e-02,  7.23573416e-02,  5.41276596e-02, -4.79805432e-02,
        2.23088823e-03, -3.04220300e-02, -1.80115420e-02,  1.11837462e-01,
        1.99755386e-01,  4.86937836e-02, -3.46923135e-02, -7.88292810e-02,
       -9.83174052e-03,  9.74044576e-03,  7.49345170e-03, -9.11588073e-02,
       -1.23039231e-01,  7.27808774e-02,  4.60539088e-02, -4.37726341e-02,
       -1.85607076e-02, -7.55400443e-03, -2.00974628e-01, -4.20418270e-02,
       -1.12139275e-02,  8.33669119e-03, -6.04116358e-02, -1.22287534e-02,
        1.08174138e-01,  

### For creating the output tensor, mapping from label to positive values has to be done. 
- Currently we had -1 for negative, this is not possible in neural network. 
- Three neurons in the output layer will give probabilities for each label so we just need mapping to positive numbers

In [30]:
# Function to get the output tensor
def make_target(label):
    if label == -1:
        return torch.tensor([0], dtype=torch.long, device=device)
    elif label == 0:
        return torch.tensor([1], dtype=torch.long, device=device)
    else:
        return torch.tensor([2], dtype=torch.long, device=device)


In [31]:
from torch.utils.data import Dataset

class make_dataset(Dataset):
    def __init__(self, dataframe):
        if type(dataframe) == str: # when input is the name of a csv file
            df = pd.read_csv(dataframe)
        else: # when a dataframe is directly given
            df = dataframe
                    
        X = df['stemmed_tokens'].apply(model.infer_vector)
        self.X = torch.tensor(X, dtype = torch.float32, device=device) # these are decimals
        
        self.y = df['sentiment'].apply(make_target) # returns 0, 1 or 2 as label 
        #self.y = torch.tensor(y, dtype = torch.float32) # these are 0 or 1 floats
        
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, idx):    
        return self.X[idx], self.y[idx]

In [32]:
train_data = make_dataset(train)
test_data = make_dataset(test)
len(train_data), len(test_data)

(36831, 15786)

In [33]:
train_data[5][0]

tensor([ 0.0186,  0.0169, -0.0513,  0.0302,  0.0508, -0.0257,  0.0389, -0.0472,
        -0.0114,  0.0031,  0.0255, -0.0093, -0.0074, -0.0480,  0.0434, -0.0787,
        -0.0195,  0.0429, -0.0399,  0.0239, -0.0276,  0.0547, -0.0262, -0.0795,
         0.0265,  0.0285,  0.0206, -0.0630,  0.0356, -0.0010,  0.0004, -0.0140,
        -0.0276,  0.0728,  0.0096, -0.0340,  0.0558, -0.0370, -0.0209, -0.0395,
         0.0102,  0.0165,  0.0554,  0.0047, -0.0166,  0.0174, -0.0612,  0.0281,
         0.0304,  0.0100, -0.0361, -0.0001, -0.0189,  0.0525, -0.0028,  0.0044,
        -0.0313,  0.0800, -0.0042, -0.0072,  0.0123,  0.0363,  0.0365,  0.0398,
         0.0385,  0.0511, -0.0119,  0.0061,  0.0010,  0.0592, -0.0174, -0.0083,
         0.0473,  0.0600, -0.0158, -0.0016,  0.0320,  0.0541, -0.0035, -0.0539,
         0.0362, -0.0800, -0.0216,  0.0040,  0.0059, -0.0522, -0.0534,  0.0105,
        -0.0293,  0.0113, -0.0528,  0.0490, -0.0040, -0.0415, -0.0959, -0.0348,
        -0.0083,  0.0402,  0.0138, -0.00

In [34]:
train_data[32][1]

tensor([2], device='cuda:0')

In [35]:
train_loader = torch.utils.data.DataLoader(train_data, batch_size = 32, drop_last=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size = 32, drop_last=True )

In [36]:
data_iter = iter(train_loader)

predictors, target = next(data_iter)
print(predictors.squeeze(1).shape, target.shape) # each train batch has 2 elements in it

torch.Size([32, 300]) torch.Size([32, 1])


In [37]:
target

tensor([[2],
        [1],
        [2],
        [0],
        [2],
        [1],
        [2],
        [2],
        [2],
        [1],
        [2],
        [2],
        [1],
        [1],
        [0],
        [0],
        [2],
        [0],
        [0],
        [0],
        [0],
        [1],
        [0],
        [2],
        [2],
        [1],
        [0],
        [1],
        [2],
        [2],
        [2],
        [2]], device='cuda:0')

In [38]:
tensor = torch.randn(32,100,1) # 32 is batch_size, 100 is dimnesion of input vector, 1 is added to work with conv1d
output = nn.Conv1d(in_channels =100,out_channels=1,kernel_size=1,stride=1)(tensor)
output.shape 

import torch
from torch import nn

a = torch.randn(32, 100, 1) 
a = a.permute(0, 2, 1) # (32, 1, 100)
m = nn.Conv1d(1, 50, 3, padding = 1) 
out = m(a) 
print(out.size())# (32, 50, 100)
print(m)

torch.Size([32, 50, 100])
Conv1d(1, 50, kernel_size=(3,), stride=(1,), padding=(1,))


In [39]:
# EMBEDDING_SIZE = 500
NUM_FILTERS = 100
import gensim

class CnnTextClassifier(nn.Module):
    def __init__(self, num_classes, window_sizes=(1,2,3,5)):
        super(CnnTextClassifier, self).__init__()

        # for each window size, 1 conv layer
        self.convs = nn.ModuleList([ 
                               nn.Conv1d(1, NUM_FILTERS, window_size, padding=(window_size - 1))
                               for window_size in window_sizes
        ])

        self.fc = nn.Linear(NUM_FILTERS * len(window_sizes), num_classes)

    def forward(self, x):


        # Apply a convolution + max_pool layer for each window size
        x = torch.unsqueeze(x, 1) # (32, 1 ,300)
        xs = []
        for conv in self.convs:
            x2 = torch.tanh(conv(x)) #[32, 100, 300] > >[32, 100, 301] >> [32, 100, 302] >> [32, 100, 304]
            # print(x2.shape) = torch.squeeze(x2, -1)
            
            x2 = F.max_pool1d(x2, x2.size(2))  
            # print(x2.shape) # [32, 100, 1]
            xs.append(x2) # combines all these matricies to from one final matrix of all detected features
            # print("xs: ", len(xs)) # 4, a list of 4 matricies eaxch is [32, 100, 1]
        x = torch.cat(xs, 2) # [32, 100, 4])
        
        # FC, x.size(0) is the batch_size
        x = x.view(x.size(0), -1) # flatten the feature matrix into a vector [32, 400]
        #print(x.shape)
        logits = self.fc(x)

        return logits


In [40]:
### criterion = nn.NLLLoss()
NUM_CLASSES = 3
#VOCAB_SIZE = len(w2v_model.wv.vocab)

cnn_model = CnnTextClassifier( num_classes=NUM_CLASSES)
cnn_model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(cnn_model.parameters(), lr=0.001)

In [46]:
import torch
from tqdm import tqdm
def check_accuracy(data_loader, model):
    model.to(device)
    with torch.no_grad():
        val_epoch_loss = 0
        val_epoch_acc = 0
        cnn_model.eval()
        #for data, targets in test_loader:
        for e in range(3):
            loop = tqdm(enumerate(data_loader), total = len(data_loader), leave = False)
            for batch_idx, (data, targets) in loop:
             #X_val_batch, y_val_batch = X_val_batch.to(device), y_val_batch.to(device)
                data = data.to(device) 
                targets = targets.squeeze(1).to(device) # converts [32, 1] to [32]
                
                predictions = model(data)
                val_loss = criterion(predictions, targets)

                _, preds = predictions.max(1)
                corrects = (preds == targets).float()
                test_acc = corrects.sum() / len(corrects) 

                val_epoch_loss += val_loss.item()
                val_epoch_acc += test_acc.item()
            print(f"""Epoch {e+1}: | Train Loss: {val_epoch_loss/len(data_loader):.5f} | Train Acc: {val_epoch_acc/len(data_loader):.5f}   """) 

In [47]:
check_accuracy(train_loader, cnn_model)

  1%|▏         | 16/1150 [00:00<00:07, 158.84it/s]  

Epoch 1: | Train Loss: 1.08220 | Train Acc: 0.17092   


  2%|▏         | 20/1150 [00:00<00:05, 190.99it/s]  

Epoch 2: | Train Loss: 2.16439 | Train Acc: 0.34185   


                                                    

Epoch 3: | Train Loss: 3.24659 | Train Acc: 0.51277   




In [48]:
from tqdm import tqdm
    
loop = tqdm(enumerate(train_loader), total = len(train_loader). leave = False)
for batch_idx, (data, targets) in loop:
    
    
    loop.set_description(epoch/num_epochs)

SyntaxError: invalid syntax (<ipython-input-48-1dba38e5a72f>, line 3)

In [None]:
from tqdm import trange

print("Begin training.")
EPOCHS = 10
for e in trange(1, EPOCHS+1):
    num_correct = 0
    num_samples = 0
    # TRAINING
    train_epoch_loss = 0
    train_epoch_acc = 0
    cnn_model.train()
    for inputs, labels in train_loader:
        #print("inp: ", inputs.shape, labels.shape)
        #inputs = inputs.squeeze(1).to(device) # converts [32, 1, 774] to [32, 774]
        labels = labels.squeeze(1).to(device) # converts [32, 1] to [32]
        # print("inp: ", inputs.shape, labels.shape)
        #X_train_batch, y_train_batch = X_train_batch.to(device), y_train_batch.to(device)
        optimizer.zero_grad()
        
        predictions = cnn_model(inputs)
        
        train_loss = criterion(predictions, labels)
        #train_acc = multi_acc(predictions, labels)
        #train_acc = check_accuracy(train_loader, cnn_model)
        _, preds = predictions.max(1) # _ is the max value, predictions is the max_indx
        correct = (preds == labels).float()
        #num_samples += preds.size(0)
        train_acc = correct.sum() / len(correct) # calculate the accutace for each batch in train_iterator
        assert len(correct) ==  32
        #round(float(num_correct)/float(num_samples) * 100, 2)
        
        train_loss.backward()
        optimizer.step()
        
        train_epoch_loss += train_loss.item()
        train_epoch_acc += train_acc.item()
        
    
    # VALIDATION    
    with torch.no_grad():
        
        num_correct = 0
        num_samples = 0
        val_epoch_loss = 0
        val_epoch_acc = 0

        cnn_model.eval()
        for data, targets in test_loader:
        
            targets = targets.squeeze(1).to(device) # converts [32, 1] to [32]
            predictions = cnn_model(data)

            val_loss = criterion(predictions, targets)

            _, preds = predictions.max(1)
            corrects = (preds == targets).float()
            test_acc = corrects.sum() / len(corrects) 

            val_epoch_loss += val_loss.item()
            val_epoch_acc += test_acc.item()
                              

    print(f"""Epoch {e+0:03}: 
| Train Loss: {train_epoch_loss/len(train_loader):.5f} | Val Loss: {val_epoch_loss/len(test_loader):.5f} 
| Train Acc: {train_epoch_acc/len(train_loader):.5f}   | Val Acc: {val_epoch_acc/len(test_loader):.5f}""") 