# Transfer Learning MNIST

* Train a simple convnet on the MNIST dataset the first 5 digits [0..4].
* Freeze convolutional layers and fine-tune dense layers for the classification of digits [5..9].

## 1. Import necessary libraries for the model

In [13]:
#Importing important modules
import keras
import numpy as np
import keras.utils as np_utils
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Convolution2D, MaxPooling2D,Activation

## 2. Import MNIST data and create 2 datasets with one dataset having digits from 0 to 4 and other from 5 to 9 

In [14]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train.shape , y_train.shape

((60000, 28, 28), (60000,))

In [15]:
train_mask = np.isin(y_train, [0,1,2,3,4])
test_mask = np.isin(y_test, [0,1,2,3,4])

x_train_mask04, y_train04 = x_train[train_mask], y_train[train_mask]
x_test_mask04, y_test04 = x_test[test_mask], y_test[test_mask]

train_mask1 = np.isin(y_train, [5,6,7,8,9])
test_mask1 = np.isin(y_test, [5,6,7,8,9])

x_train_mask59, y_train59 = x_train[train_mask1], y_train[train_mask1]
x_test_mask59, y_test59 = x_test[test_mask1], y_test[test_mask1]


## 3. Print x_train, y_train, x_test and y_test for both the datasets

In [16]:
print(x_train_mask04.shape , y_train04.shape,x_test_mask04.shape,y_test04.shape)

(30596, 28, 28) (30596,) (5139, 28, 28) (5139,)


In [17]:
print(x_train_mask59.shape , y_train59.shape,x_test_mask59.shape,y_test59.shape)

(29404, 28, 28) (29404,) (4861, 28, 28) (4861,)


## ** 4. Let us take only the dataset (x_train, y_train, x_test, y_test) for Integers 0 to 4 in MNIST **
## Reshape x_train and x_test to a 4 Dimensional array (channel = 1) to pass it into a Conv2D layer

In [18]:
x_train_mask04 = x_train_mask04.reshape(x_train_mask04.shape[0], 28, 28, 1).astype('float32')
x_test_mask04 = x_test_mask04.reshape(x_test_mask04.shape[0], 28, 28, 1).astype('float32')

x_train_mask59 = x_train_mask59.reshape(x_train_mask59.shape[0], 28, 28, 1).astype('float32')
x_test_mask59 = x_test_mask59.reshape(x_test_mask59.shape[0], 28, 28, 1).astype('float32')

## 5. Normalize x_train and x_test by dividing it by 255

In [19]:
x_train_mask04 /= 255
x_test_mask04 /= 255

x_train_mask59 /= 255
x_test_mask59 /= 255


## 6. Use One-hot encoding to divide y_train and y_test into required no of output classes

In [20]:
ytrain04 = keras.utils.to_categorical(y_train04,5)
ytest04 = keras.utils.to_categorical(y_test04, 5)


In [21]:
y_train59= y_train59-5
y_test59= y_test59-5

y_train59 = keras.utils.to_categorical(y_train59, 5)

y_test59 = keras.utils.to_categorical(y_test59,5)

## 7. Build a sequential model with 2 Convolutional layers with 32 kernels of size (3,3) followed by a Max pooling layer of size (2,2) followed by a drop out layer to be trained for classification of digits 0-4  

In [22]:
  # Define model
model = Sequential()
    # 1st Conv Layer
model.add(Convolution2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
    # 2nd Conv Layer
model.add(Convolution2D(32, (3, 3), activation='relu',padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

## 8. Post that flatten the data and add 2 Dense layers with 128 neurons and neurons = output classes with activation = 'relu' and 'softmax' respectively. Add dropout layer inbetween if necessary  

In [23]:
 # Fully Connected Layer
model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))

model.add(Dense(128))
model.add(Activation('relu'))
   
# Prediction Layer
model.add(Dense(5))
model.add(Activation('softmax'))

# Loss and Optimizer
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Store Training Results
early_stopping = keras.callbacks.EarlyStopping(monitor='val_acc', patience=10, verbose=1, mode='auto')
callback_list = [early_stopping]

    

## 9. Print the training and test accuracy

In [25]:
history=model.fit(x_train_mask04, ytrain04, batch_size=32, epochs=3, 
              validation_data=(x_test_mask04, ytest04), callbacks=callback_list)

Train on 30596 samples, validate on 5139 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


In [26]:
print('\n', 'Training accuracy:', history.history['acc'])
print('\n', 'validation accuracy:', history.history['val_acc'])


 Training accuracy: [0.9952935024186168, 0.9963393907700353, 0.9971238070335992]

 validation accuracy: [0.9976649153531816, 0.9982486865148862, 0.9966919634170072]


In [28]:
# Evaluate the model on test set
score = model.evaluate(x_test_mask04, ytest04, verbose=0)
# Print test accuracy
print('\n', 'Test accuracy:', score[1])


 Test accuracy: 0.9966919634170072


## 10. Make only the dense layers to be trainable and convolutional layers to be non-trainable

In [29]:
#Freezing layers in the model which don't have 'dense' in their name
for layer in model.layers:
  if('dense' not in layer.name): #prefix detection to freeze layers which does not have dense
    #Freezing a layer
    layer.trainable = False

In [30]:
# model.save_weights('MNIST.h5')

## 11. Use the model trained on 0 to 4 digit classification and train it on the dataset which has digits 5 to 9  (Using Transfer learning keeping only the dense layers to be trainable)

In [31]:
# model.load_weights('MNIST.h5')

In [32]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

## 12. Print the accuracy for classification of digits 5 to 9

In [34]:
#Training on the dataset
history1=model.fit(x_train_mask59, y_train59,
          batch_size=32,
          epochs=3,
          verbose=1,
          validation_data=(x_test_mask59, y_test59))

Train on 29404 samples, validate on 4861 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


In [37]:
print('\n', 'Training accuracy:', history1.history['acc'])
print('\n', 'validation accuracy:', history1.history['val_acc'])


 Training accuracy: [0.9920078900586567, 0.9939464018500884, 0.9947966263093456]

 validation accuracy: [0.99362271137626, 0.9944455873277104, 0.9938284303641226]


## Sentiment analysis <br> 

The objective of the second problem is to perform Sentiment analysis from the tweets data collected from the users targeted at various mobile devices.
Based on the tweet posted by a user (text), we will classify if the sentiment of the user targeted at a particular mobile device is positive or not.

### 13. Read the dataset (tweets.csv) and drop the NA's while reading the dataset

In [38]:
import pandas as pd
import numpy as np

In [39]:
tweets=pd.read_csv("tweets.csv",encoding="ISO-8859-1")
tweets=tweets.dropna()
tweets.head(10)

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,iPhone,Negative emotion
1,@jessedee Know about @fludapp ? Awesome iPad/i...,iPad or iPhone App,Positive emotion
2,@swonderlin Can not wait for #iPad 2 also. The...,iPad,Positive emotion
3,@sxsw I hope this year's festival isn't as cra...,iPad or iPhone App,Negative emotion
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Google,Positive emotion
7,"#SXSW is just starting, #CTIA is around the co...",Android,Positive emotion
8,Beautifully smart and simple idea RT @madebyma...,iPad or iPhone App,Positive emotion
9,Counting down the days to #sxsw plus strong Ca...,Apple,Positive emotion
10,Excited to meet the @samsungmobileus at #sxsw ...,Android,Positive emotion
11,Find &amp; Start Impromptu Parties at #SXSW Wi...,Android App,Positive emotion


In [40]:
tweets.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3291 entries, 0 to 9088
Data columns (total 3 columns):
tweet_text                                            3291 non-null object
emotion_in_tweet_is_directed_at                       3291 non-null object
is_there_an_emotion_directed_at_a_brand_or_product    3291 non-null object
dtypes: object(3)
memory usage: 102.8+ KB


### 14. Preprocess the text and add the preprocessed text in a column with name `text` in the dataframe.

In [41]:
def preprocess(text):
    try:
        return text.encode().decode('ascii')
    except Exception as e:
        return ""

In [42]:
tweets['text'] = [preprocess(text) for text in tweets.tweet_text]

In [43]:
tweets.head(10)

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product,text
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,iPhone,Negative emotion,.@wesley83 I have a 3G iPhone. After 3 hrs twe...
1,@jessedee Know about @fludapp ? Awesome iPad/i...,iPad or iPhone App,Positive emotion,@jessedee Know about @fludapp ? Awesome iPad/i...
2,@swonderlin Can not wait for #iPad 2 also. The...,iPad,Positive emotion,@swonderlin Can not wait for #iPad 2 also. The...
3,@sxsw I hope this year's festival isn't as cra...,iPad or iPhone App,Negative emotion,@sxsw I hope this year's festival isn't as cra...
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Google,Positive emotion,@sxtxstate great stuff on Fri #SXSW: Marissa M...
7,"#SXSW is just starting, #CTIA is around the co...",Android,Positive emotion,"#SXSW is just starting, #CTIA is around the co..."
8,Beautifully smart and simple idea RT @madebyma...,iPad or iPhone App,Positive emotion,Beautifully smart and simple idea RT @madebyma...
9,Counting down the days to #sxsw plus strong Ca...,Apple,Positive emotion,Counting down the days to #sxsw plus strong Ca...
10,Excited to meet the @samsungmobileus at #sxsw ...,Android,Positive emotion,Excited to meet the @samsungmobileus at #sxsw ...
11,Find &amp; Start Impromptu Parties at #SXSW Wi...,Android App,Positive emotion,Find &amp; Start Impromptu Parties at #SXSW Wi...


### 15. Consider only rows having Positive emotion and Negative emotion and remove other rows from the dataframe.

In [44]:
tweets['is_there_an_emotion_directed_at_a_brand_or_product'].value_counts()

Positive emotion                      2672
Negative emotion                       519
No emotion toward brand or product      91
I can't tell                             9
Name: is_there_an_emotion_directed_at_a_brand_or_product, dtype: int64

In [45]:
tweets1= tweets[tweets['is_there_an_emotion_directed_at_a_brand_or_product'] != 'No emotion toward brand or product' ]

In [46]:
tweets1= tweets1[tweets1['is_there_an_emotion_directed_at_a_brand_or_product'] != "I can't tell" ]

In [47]:
tweets1['is_there_an_emotion_directed_at_a_brand_or_product'].value_counts()

Positive emotion    2672
Negative emotion     519
Name: is_there_an_emotion_directed_at_a_brand_or_product, dtype: int64

In [48]:
tweets1.head()

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product,text
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,iPhone,Negative emotion,.@wesley83 I have a 3G iPhone. After 3 hrs twe...
1,@jessedee Know about @fludapp ? Awesome iPad/i...,iPad or iPhone App,Positive emotion,@jessedee Know about @fludapp ? Awesome iPad/i...
2,@swonderlin Can not wait for #iPad 2 also. The...,iPad,Positive emotion,@swonderlin Can not wait for #iPad 2 also. The...
3,@sxsw I hope this year's festival isn't as cra...,iPad or iPhone App,Negative emotion,@sxsw I hope this year's festival isn't as cra...
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Google,Positive emotion,@sxtxstate great stuff on Fri #SXSW: Marissa M...


### 16. Represent text as numerical data using `CountVectorizer` and get the document term frequency matrix

#### Use `vect` as the variable name for initialising CountVectorizer.

In [49]:
from sklearn.feature_extraction.text import CountVectorizer
vect = CountVectorizer()

In [51]:
# learn the 'vocabulary' of the training data
vect.fit(tweets.text)

CountVectorizer(analyzer='word', binary=False, decode_error='strict',
        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
        lowercase=True, max_df=1.0, max_features=None, min_df=1,
        ngram_range=(1, 1), preprocessor=None, stop_words=None,
        strip_accents=None, token_pattern='(?u)\\b\\w\\w+\\b',
        tokenizer=None, vocabulary=None)

### 17. Find number of different words in vocabulary

In [136]:
len(vect.get_feature_names())

4769

#### Tip: To see all available functions for an Object use dir

In [52]:
dir(vect)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_char_ngrams',
 '_char_wb_ngrams',
 '_check_vocabulary',
 '_count_vocab',
 '_get_param_names',
 '_limit_features',
 '_sort_features',
 '_validate_vocabulary',
 '_white_spaces',
 '_word_ngrams',
 'analyzer',
 'binary',
 'build_analyzer',
 'build_preprocessor',
 'build_tokenizer',
 'decode',
 'decode_error',
 'dtype',
 'encoding',
 'fit',
 'fit_transform',
 'fixed_vocabulary_',
 'get_feature_names',
 'get_params',
 'get_stop_words',
 'input',
 'inverse_transform',
 'lowercase',
 'max_df',
 'max_features',
 'min_df',
 'ngram_range',
 'preprocessor',
 'set_params',
 'stop_words',
 'stop_word

### 18. Find out how many Positive and Negative emotions are there.

Hint: Use value_counts on that column

In [53]:
tweets1['is_there_an_emotion_directed_at_a_brand_or_product'].value_counts()

Positive emotion    2672
Negative emotion     519
Name: is_there_an_emotion_directed_at_a_brand_or_product, dtype: int64

### 19. Change the labels for Positive and Negative emotions as 1 and 0 respectively and store in a different column in the same dataframe named 'Label'

Hint: use map on that column and give labels

In [54]:
tweets1['label'] = tweets.is_there_an_emotion_directed_at_a_brand_or_product.map({'Positive emotion':1, 'Negative emotion':0})

In [55]:
tweets1.head()

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product,text,label
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,iPhone,Negative emotion,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,0.0
1,@jessedee Know about @fludapp ? Awesome iPad/i...,iPad or iPhone App,Positive emotion,@jessedee Know about @fludapp ? Awesome iPad/i...,1.0
2,@swonderlin Can not wait for #iPad 2 also. The...,iPad,Positive emotion,@swonderlin Can not wait for #iPad 2 also. The...,1.0
3,@sxsw I hope this year's festival isn't as cra...,iPad or iPhone App,Negative emotion,@sxsw I hope this year's festival isn't as cra...,0.0
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Google,Positive emotion,@sxtxstate great stuff on Fri #SXSW: Marissa M...,1.0


### 20. Define the feature set (independent variable or X) to be `text` column and `labels` as target (or dependent variable)  and divide into train and test datasets

In [56]:
X = tweets1.text
y = tweets1.label
print(X.shape)
print(y.shape)

(3191,)
(3191,)


In [60]:
vect = CountVectorizer(ngram_range=(1, 1))
Xdata = vect.fit_transform(X)


# split the new DataFrame into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(Xdata, y, random_state=1)

In [61]:

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(2393, 5482)
(798, 5482)
(2393,)
(798,)


## 21. **Predicting the sentiment:**


### Use Naive Bayes and Logistic Regression and their accuracy scores for predicting the sentiment of the given text

In [62]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

# use Naive Bayes to predict the star rating
nb = MultinomialNB()
nb.fit(X_train, y_train)
y_pred_class = nb.predict(X_test)

# calculate accuracy
print(metrics.accuracy_score(y_test, y_pred_class))

0.8295739348370927


In [63]:
clf = LogisticRegression()
clf.fit(X_train, y_train)
y_pred_class = clf.predict(X_test)
# calculate accuracy
print(metrics.accuracy_score(y_test, y_pred_class))

0.8634085213032582


## 22. Create a function called `tokenize_predict` which can take count vectorizer object as input and prints the accuracy for x (text) and y (labels)

In [64]:
def tokenize_test(vect):
    x_train_dtm = vect.fit_transform(x_train)
    print('Features: ', x_train_dtm.shape[1])
    x_test_dtm = vect.transform(x_test)
    nb = MultinomialNB()
    nb.fit(x_train_dtm, y_train)
    y_pred_class = nb.predict(x_test_dtm)
    print('Accuracy: ', metrics.accuracy_score(y_test, y_pred_class))

### Create a count vectorizer function which includes n_grams = 1,2  and pass it to tokenize_predict function to print the accuracy score

In [66]:

X = tweets1['text']
Y = tweets1['label']

x_train, x_test, y_train, y_test = train_test_split(X, Y, random_state=1)

vect1 = CountVectorizer(ngram_range=(1, 2))

tokenize_test(vect1)

Features:  24066
Accuracy:  0.8508771929824561


### Create a count vectorizer function with stopwords = 'english'  and pass it to tokenize_predict function to print the accuracy score

In [68]:
vect2 = CountVectorizer(ngram_range=(1, 2),stop_words='english')

tokenize_test(vect2)

Features:  18453
Accuracy:  0.8521303258145363


### Create a count vectorizer function with stopwords = 'english' and max_features =300  and pass it to tokenize_predict function to print the accuracy score

In [73]:
vect2 = CountVectorizer(ngram_range=(1, 2),stop_words='english',max_features =300)

tokenize_test(vect2)

Features:  300
Accuracy:  0.7819548872180451


### Create a count vectorizer function with n_grams = 1,2  and max_features = 15000  and pass it to tokenize_predict function to print the accuracy score

In [72]:
vect2 = CountVectorizer(ngram_range=(1, 2),max_features =15000)

tokenize_test(vect2)

Features:  15000
Accuracy:  0.8521303258145363


### Create a count vectorizer function with n_grams = 1,2  and include terms that appear at least 2 times (min_df = 2)  and pass it to tokenize_predict function to print the accuracy score

In [71]:
vect2 = CountVectorizer(ngram_range=(1, 2),min_df=2)

tokenize_test(vect2)

Features:  7478
Accuracy:  0.8546365914786967
