# Transfer Learning MNIST

* Train a simple convnet on the MNIST dataset the first 5 digits [0..4].
* Freeze convolutional layers and fine-tune dense layers for the classification of digits [5..9].

## 1. Import necessary libraries for the model

In [1]:
import numpy as np
import pandas as pd
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense,Dropout,Activation,Flatten
from keras.layers.convolutional import Conv2D,MaxPooling2D
from keras.utils import np_utils
from sklearn import metrics
import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


## 2. Import MNIST data and create 2 datasets with one dataset having digits from 0 to 4 and other from 5 to 9 

In [0]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train_0to4 = x_train[y_train < 5]
y_train_0to4 = y_train[y_train < 5]
x_test_0to4 = x_test[y_test < 5]
y_test_0to4 = y_test[y_test < 5]

x_train_5to9 = x_train[y_train >= 5]
y_train_5to9 = y_train[y_train >= 5] -5
x_test_5to9 = x_test[y_test >= 5]
y_test_5to9 = y_test[y_test >= 5] -5

## 3. Print x_train, y_train, x_test and y_test for both the datasets

In [3]:
print (x_train_0to4[5], y_train_0to4[5], x_test_0to4[5], y_test_0to4[5])
print (x_train_5to9[5], y_train_5to9[5], x_test_5to9[5], y_test_5to9[5])


(array([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  38,  43,
        105, 255, 253, 253, 253, 253, 253, 174,   6,   0,   0,   0,   0,
          0,   0],
       [ 

## ** 4. Let us take only the dataset (x_train, y_train, x_test, y_test) for Integers 0 to 4 in MNIST **
## Reshape x_train and x_test to a 4 Dimensional array (channel = 1) to pass it into a Conv2D layer

In [0]:
x_train = x_train_0to4
y_train = y_train_0to4
x_test = x_test_0to4
y_test = y_test_0to4

In [5]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(30596, 28, 28)
(30596,)
(5139, 28, 28)
(5139,)


In [0]:
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)

## 5. Normalize x_train and x_test by dividing it by 255

In [7]:
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

#Normalizing the input
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

('x_train shape:', (30596, 28, 28, 1))
(30596, 'train samples')
(5139, 'test samples')


## 6. Use One-hot encoding to divide y_train and y_test into required no of output classes

In [0]:
import keras
batch_size = 128
num_classes = 5
epochs = 12
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)


In [9]:
print(y_train[0])

[1. 0. 0. 0. 0.]


## 7. Build a sequential model with 2 Convolutional layers with 32 kernels of size (3,3) followed by a Max pooling layer of size (2,2) followed by a drop out layer to be trained for classification of digits 0-4  

In [10]:
input_shape = (28, 28, 1)

#Initialize the model
model = Sequential()

#Add a Convolutional Layer with 32 filters of size 3X3 and activation function as 'ReLU' 
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape,name='conv_1'))

#Add a MaxPooling Layer of size 2X2 
model.add(MaxPooling2D(pool_size=(2, 2),name='max_1'))

#Apply Dropout with 0.25 probability 
model.add(Dropout(0.25,name='drop_1'))


Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


## 8. Post that flatten the data and add 2 Dense layers with 128 neurons and neurons = output classes with activation = 'relu' and 'softmax' respectively. Add dropout layer inbetween if necessary  

In [0]:
#Flatten the layer
model.add(Flatten())

#Add Fully Connected Layer with 128 units and activation function as 'ReLU'
model.add(Dense(128, activation='relu',name='dense_1'))

#Apply Dropout with 0.5 probability 
model.add(Dropout(0.5,name='drop_2'))

#Add Fully Connected Layer with 10 units and activation function as 'softmax'
model.add(Dense(num_classes, activation='softmax',name='dense_2'))

## 9. Print the training and test accuracy

In [12]:
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
model.fit(x_train,y_train,batch_size=200,nb_epoch=12,verbose=1,validation_data=(x_test,y_test))

Instructions for updating:
Use tf.cast instead.


  


Train on 30596 samples, validate on 5139 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


<keras.callbacks.History at 0x7f23acd2d290>

In [13]:
#Testing the model on test and train set
score = model.evaluate(x_test, y_test)
print('Test accuracy:', score[1])
score = model.evaluate(x_train, y_train)
print('Train accuracy:', score[1])


('Test accuracy:', 0.9980540961276513)
('Train accuracy:', 0.9992155837364362)


## 10. Make only the dense layers to be trainable and convolutional layers to be non-trainable

In [14]:
#Freezing layers in the model which don't have 'dense' in their name
for layer in model.layers:
  if('dense' not in layer.name): #prefix detection to freeze layers which does not have dense
    #Freezing a layer
    layer.trainable = False

#Module to print colourful statements
from termcolor import colored

#Check which layers have been frozen 
for layer in model.layers:
  print (colored(layer.name, 'blue'))
  print (colored(layer.trainable, 'red'))

[34mconv_1[0m
[31mFalse[0m
[34mmax_1[0m
[31mFalse[0m
[34mdrop_1[0m
[31mFalse[0m
[34mflatten_1[0m
[31mFalse[0m
[34mdense_1[0m
[31mTrue[0m
[34mdrop_2[0m
[31mFalse[0m
[34mdense_2[0m
[31mTrue[0m


## 11. Use the model trained on 0 to 4 digit classification and train it on the dataset which has digits 5 to 9  (Using Transfer learning keeping only the dense layers to be trainable)

In [18]:
x_train_5to9 = x_train_5to9.reshape(x_train_5to9.shape[0], 28, 28, 1)
x_test_5to9 = x_test_5to9.reshape(x_test_5to9.shape[0], 28, 28, 1)
x_train_5to9 = x_train_5to9.astype('float32')
x_test_5to9 = x_test_5to9.astype('float32')

#Normalizing the input
x_train_5to9 /= 255
x_test_5to9 /= 255
print('x_train shape:', x_train_5to9.shape)
print(x_train_5to9.shape[0], 'train samples')
print(x_test_5to9.shape[0], 'test samples')

import keras
batch_size = 128
num_classes = 5
epochs = 12
# convert class vectors to binary class matrices
y_train_5to9 = keras.utils.to_categorical(y_train_5to9, num_classes)
y_test_5to9 = keras.utils.to_categorical(y_test_5to9, num_classes)


model.fit(x_train_5to9,y_train_5to9,batch_size=200,nb_epoch=12,verbose=1,validation_data=(x_test_5to9,y_test_5to9))

('x_train shape:', (29404, 28, 28, 1))
(29404, 'train samples')
(4861, 'test samples')
Train on 29404 samples, validate on 4861 samples
Epoch 1/12
 4200/29404 [===>..........................] - ETA: 0s - loss: 1.2158 - acc: 0.6771

  'Discrepancy between trainable weights and collected trainable'


Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


<keras.callbacks.History at 0x7f23a0730b10>

## 12. Print the accuracy for classification of digits 5 to 9

In [20]:
#Testing the model on test and train set
score = model.evaluate(x_test_5to9, y_test_5to9)
print('Test accuracy:', score[1])
score = model.evaluate(x_train_5to9, y_train_5to9)
print('Train accuracy:', score[1])


('Test accuracy:', 0.9907426454358277)
('Train accuracy:', 0.9975853625357094)


## Sentiment analysis <br> 

The objective of the second problem is to perform Sentiment analysis from the tweets data collected from the users targeted at various mobile devices.
Based on the tweet posted by a user (text), we will classify if the sentiment of the user targeted at a particular mobile device is positive or not.

### 13. Read the dataset (tweets.csv) and drop the NA's while reading the dataset

In [22]:
from google.colab import drive
drive.mount('/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /gdrive


In [0]:
data = pd.read_csv('/gdrive/My Drive/tweets.csv')

In [28]:
data.shape

(9093, 3)

In [27]:
data.sample(5)

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product
2033,#sxsw #evaporation @mention @mention Business ...,iPad or iPhone App,Positive emotion
494,I think I fell a bit more in love with #google...,Google,Positive emotion
2683,Is #Google launching a new social network toda...,,No emotion toward brand or product
2578,"#sxsw hardware. iPad #rickshaw bag, #lunatik, ...",,No emotion toward brand or product
5097,RT @mention @mention @mention at #sxsw: &quot;...,Apple,Positive emotion


In [0]:
data = data.dropna()

In [31]:
data.shape

(3291, 3)

In [32]:
data.sample(5)

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product
5546,"RT @mention Before It Even Begins, #Apple Wins...",Apple,Positive emotion
467,"Before it even begins, Apple wins #SXSW {link}",Apple,Positive emotion
6956,RT @mention With 150 million mobile users on G...,Other Google product or service,Positive emotion
7950,Hey Marissa Mayer. Please tell us something ne...,Google,Negative emotion
8162,"&quot;At SXSW, Apple schools the marketing exp...",Apple,Positive emotion


### 14. Preprocess the text and add the preprocessed text in a column with name `text` in the dataframe.

In [0]:
def preprocess(text):
    try:
        return text.decode('ascii')
    except Exception as e:
        return ""

In [0]:
data['text'] = [preprocess(text) for text in data.tweet_text]

In [35]:
data.sample(5)

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product,text
5620,So @mention claims Android phones were everywh...,iPhone,Positive emotion,So @mention claims Android phones were everywh...
4580,@mention New iPad Apps For Speech Therapy And ...,iPad or iPhone App,Positive emotion,@mention New iPad Apps For Speech Therapy And ...
4721,"Anybody know whether I can nab white, 3G, 64GB...",iPad,Positive emotion,"Anybody know whether I can nab white, 3G, 64GB..."
1604,Woo hoo! @mention is finally back on the iPhon...,iPhone,Positive emotion,Woo hoo! @mention is finally back on the iPhon...
2432,#tech iPad 2 Gets Temporary #Apple_Store for #...,iPad,Positive emotion,#tech iPad 2 Gets Temporary #Apple_Store for #...


### 15. Consider only rows having Positive emotion and Negative emotion and remove other rows from the dataframe.

In [0]:
data = data[ (data["is_there_an_emotion_directed_at_a_brand_or_product"] == "Positive emotion") | (data["is_there_an_emotion_directed_at_a_brand_or_product"] == "Negative emotion")]

In [57]:
data.shape

(3191, 4)

### 16. Represent text as numerical data using `CountVectorizer` and get the document term frequency matrix

#### Use `vect` as the variable name for initialising CountVectorizer.

In [0]:
from sklearn.feature_extraction.text import CountVectorizer

vect = CountVectorizer(analyzer = "word",   \
                             tokenizer = None,    \
                             preprocessor = None, \
                             stop_words = None   ) 


train_data_features = vect.fit_transform(data.text)

In [78]:
train_data_features.shape

(3191, 5482)

In [0]:
# Numpy arrays are easy to work with, so convert the result to an 
# array
train_data_features = train_data_features.toarray()

### 17. Find number of different words in vocabulary

In [0]:
terms = vect.get_feature_names()

In [83]:
len(terms)

5482

#### Tip: To see all available functions for an Object use dir

### 18. Find out how many Positive and Negative emotions are there.

Hint: Use value_counts on that column

In [88]:
data["is_there_an_emotion_directed_at_a_brand_or_product"].value_counts()

Positive emotion    2672
Negative emotion     519
Name: is_there_an_emotion_directed_at_a_brand_or_product, dtype: int64

### 19. Change the labels for Positive and Negative emotions as 1 and 0 respectively and store in a different column in the same dataframe named 'Label'

Hint: use map on that column and give labels

In [0]:
data['Label'] = data["is_there_an_emotion_directed_at_a_brand_or_product"].map({'Positive emotion': '1', 'Negative emotion': '0'})

In [94]:
data.sample(5)

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product,text,Label
4975,@mention oh hey i remember that :) didn't even...,iPad,Positive emotion,@mention oh hey i remember that :) didn't even...,1
2105,Android party #sxsw (@mention Lustre Pearl Bar...,Android,Positive emotion,Android party #sxsw (@mention Lustre Pearl Bar...,1
2322,@mention Can not wait for #iPad 2 also. They s...,iPad,Positive emotion,@mention Can not wait for #iPad 2 also. They s...,1
7370,"Verizon IPhone at #SXSW = 5 bars, baby. Suck ...",iPhone,Positive emotion,"Verizon IPhone at #SXSW = 5 bars, baby. Suck ...",1
2632,One of the best photo apps for the iPhone ��� ...,iPad or iPhone App,Positive emotion,,1


### 20. Define the feature set (independent variable or X) to be `text` column and `labels` as target (or dependent variable)  and divide into train and test datasets

In [0]:
X = train_data_features
Y = data['Label']
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state=7)

## 21. **Predicting the sentiment:**


### Use Naive Bayes and Logistic Regression and their accuracy scores for predicting the sentiment of the given text

In [0]:
from sklearn.naive_bayes import GaussianNB, BernoulliNB, MultinomialNB
from sklearn.linear_model import LogisticRegression


In [104]:
#Fit the model
model = GaussianNB()
model.fit(x_train, y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

In [0]:
y_predictTest=model.predict(x_test)

In [106]:
metrics.accuracy_score(y_test,y_predictTest)


0.7433489827856025

In [110]:
#Fit the model
model = LogisticRegression()
model.fit(x_train, y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [112]:
y_predictTest=model.predict(x_test)
metrics.accuracy_score(y_test,y_predictTest)


0.863849765258216

## 22. Create a function called `tokenize_predict` which can take count vectorizer object as input and prints the accuracy for x (text) and y (labels)

In [0]:
def tokenize_predict(vect):
   X =  vect.fit_transform(data.text)
   x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=7)
   print('Features: ', x_train.shape[1])
   nb = MultinomialNB()
   nb.fit(x_train, y_train)
   y_pred_class = nb.predict(x_test)
   print('Accuracy: ', metrics.accuracy_score(y_test, y_pred_class))

### Create a count vectorizer function which includes n_grams = 1,2  and pass it to tokenize_predict function to print the accuracy score

In [0]:

from sklearn.feature_extraction.text import CountVectorizer

# Initialize the "CountVectorizer" object, which is scikit-learn's
# bag of words tool.  
vectorizer = CountVectorizer(analyzer = "word",   \
                             ngram_range=(1,2)) 

### Create a count vectorizer function with stopwords = 'english'  and pass it to tokenize_predict function to print the accuracy score

In [131]:
tokenize_predict(vectorizer)


('Features: ', 28958)
('Accuracy: ', 0.8652751423149905)


### Create a count vectorizer function with stopwords = 'english' and max_features =300  and pass it to tokenize_predict function to print the accuracy score

In [132]:
vectorizer = CountVectorizer(analyzer = "word",   \
                             stop_words = "english",   \
                             max_features = 300) 
tokenize_predict(vectorizer)


('Features: ', 300)
('Accuracy: ', 0.8358633776091081)


### Create a count vectorizer function with n_grams = 1,2  and max_features = 15000  and pass it to tokenize_predict function to print the accuracy score

In [133]:
vectorizer = CountVectorizer(analyzer = "word",   \
                             ngram_range=(1,2),   \
                             max_features = 15000) 
tokenize_predict(vectorizer)


('Features: ', 15000)
('Accuracy: ', 0.8595825426944972)


### Create a count vectorizer function with n_grams = 1,2  and include terms that appear at least 2 times (min_df = 2)  and pass it to tokenize_predict function to print the accuracy score

In [134]:
vectorizer = CountVectorizer(analyzer = "word",   \
                             ngram_range=(1,2),   \
                             min_df = 2) 
tokenize_predict(vectorizer)


('Features: ', 9914)
('Accuracy: ', 0.8425047438330171)
