# Transfer Learning MNIST

* Train a simple convnet on the MNIST dataset the first 5 digits [0..4].
* Freeze convolutional layers and fine-tune dense layers for the classification of digits [5..9].

## 1. Import necessary libraries for the model

In [0]:
import tensorflow as tf
import pandas as pd
import keras
from keras.datasets import mnist

## 2. Import MNIST data and create 2 datasets with one dataset having digits from 0 to 4 and other from 5 to 9 

In [0]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# create two datasets one with digits below 5 and one with 5 and above
x_train_lt5 = x_train[y_train < 5]
y_train_lt5 = y_train[y_train < 5]
x_test_lt5 = x_test[y_test < 5]
y_test_lt5 = y_test[y_test < 5]

x_train_gte5 = x_train[y_train >= 5]
y_train_gte5 = y_train[y_train >= 5] - 5
x_test_gte5 = x_test[y_test >= 5]
y_test_gte5 = y_test[y_test >= 5] - 5

## 3. Print x_train, y_train, x_test and y_test for both the datasets

In [59]:
x_train_lt5

array([[[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       ...,

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 

In [60]:
y_train_lt5

array([0, 4, 1, ..., 2, 1, 3], dtype=uint8)

In [61]:
x_test_lt5

array([[[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       ...,

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 

In [62]:
y_test_lt5

array([2, 1, 0, ..., 2, 3, 4], dtype=uint8)

In [24]:
x_train_gte5

array([[[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       ...,

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 

In [25]:
y_train_gte5

array([0, 4, 0, ..., 0, 1, 3], dtype=uint8)

In [26]:
x_test_gte5

array([[[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       ...,

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 

In [27]:
y_test_gte5

array([2, 4, 0, ..., 4, 0, 1], dtype=uint8)

## ** 4. Let us take only the dataset (x_train, y_train, x_test, y_test) for Integers 0 to 4 in MNIST **
## Reshape x_train and x_test to a 4 Dimensional array (channel = 1) to pass it into a Conv2D layer

In [0]:
x_train_lt5_conv = x_train_lt5.reshape(x_train_lt5.shape[0], 28, 28, 1)
x_test_lt5_conv = x_test_lt5.reshape(x_test_lt5.shape[0], 28, 28, 1)

In [55]:
x_train_lt5_conv.shape

(30596, 28, 28, 1)

## 5. Normalize x_train and x_test by dividing it by 255

In [0]:
x_train_lt5_conv_norm =  x_train_lt5_conv.astype("float32") / 255
x_test_lt5_conv_norm = x_test_lt5_conv.astype("float32") / 255


## 6. Use One-hot encoding to divide y_train and y_test into required no of output classes

In [0]:
y_train_class = keras.utils.to_categorical(y_train_lt5, 10)
y_test_class = keras.utils.to_categorical(y_test_lt5, 10)


## 7. Build a sequential model with 2 Convolutional layers with 32 kernels of size (3,3) followed by a Max pooling layer of size (2,2) followed by a drop out layer to be trained for classification of digits 0-4  

In [0]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Activation, Flatten, Dense, Dropout

In [0]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28,28,1),name='conv1'))
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28,28,1),name='conv2'))
model.add(MaxPooling2D(pool_size=(2, 2),name='max1'))
model.add(Dropout(0.25,name='drop1'))

## 8. Post that flatten the data and add 2 Dense layers with 128 neurons and neurons = output classes with activation = 'relu' and 'softmax' respectively. Add dropout layer inbetween if necessary  

In [0]:
model.add(Flatten())
model.add(Dense(128, activation='relu',name='dense1'))
model.add(Dense(10, activation='softmax',name='dense2'))

## 9. Print the training and test accuracy

In [71]:
model.compile(loss='categorical_crossentropy', optimizer="adam", metrics=['accuracy'])
output_simple_conv = model.fit(x_train_lt5_conv_norm, y_train_class, batch_size=512, epochs=10, verbose=2,
                    validation_data=(x_test_lt5_conv_norm, y_test_class))

Train on 30596 samples, validate on 5139 samples
Epoch 1/10
 - 1s - loss: 0.3396 - acc: 0.9020 - val_loss: 0.0784 - val_acc: 0.9805
Epoch 2/10
 - 1s - loss: 0.0670 - acc: 0.9792 - val_loss: 0.0299 - val_acc: 0.9907
Epoch 3/10
 - 1s - loss: 0.0386 - acc: 0.9886 - val_loss: 0.0224 - val_acc: 0.9934
Epoch 4/10
 - 1s - loss: 0.0281 - acc: 0.9913 - val_loss: 0.0170 - val_acc: 0.9947
Epoch 5/10
 - 1s - loss: 0.0235 - acc: 0.9927 - val_loss: 0.0242 - val_acc: 0.9922
Epoch 6/10
 - 1s - loss: 0.0201 - acc: 0.9936 - val_loss: 0.0124 - val_acc: 0.9953
Epoch 7/10
 - 1s - loss: 0.0160 - acc: 0.9954 - val_loss: 0.0101 - val_acc: 0.9959
Epoch 8/10
 - 1s - loss: 0.0139 - acc: 0.9961 - val_loss: 0.0095 - val_acc: 0.9965
Epoch 9/10
 - 1s - loss: 0.0122 - acc: 0.9963 - val_loss: 0.0091 - val_acc: 0.9965
Epoch 10/10
 - 1s - loss: 0.0096 - acc: 0.9971 - val_loss: 0.0080 - val_acc: 0.9971


In [57]:
score = model.evaluate(x_test_lt5_conv_norm, y_test_class, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

('Test score:', 0.0076241770370868)
('Test accuracy:', 0.9972757345787118)


## 10. Make only the dense layers to be trainable and convolutional layers to be non-trainable

In [0]:
for layer in model.layers:
  if('dense' not in layer.name): #prefix detection to freeze layers which does not have dense
    layer.trainable = False


## 11. Use the model trained on 0 to 4 digit classification and train it on the dataset which has digits 5 to 9  (Using Transfer learning keeping only the dense layers to be trainable)

In [0]:
y_train_class = keras.utils.to_categorical(y_train_gte5, 10)
y_test_class = keras.utils.to_categorical(y_test_gte5, 10)

In [0]:
x_train_gte5_conv = x_train_gte5.reshape(x_train_gte5.shape[0], 28, 28, 1)
x_test_gte5_conv = x_test_gte5.reshape(x_test_gte5.shape[0], 28, 28, 1)

In [0]:
x_train_gte5_conv_norm =  x_train_gte5_conv.astype("float32") / 255
x_test_gte5_conv_norm = x_test_gte5_conv.astype("float32") / 255

In [80]:
model.compile(loss='categorical_crossentropy', optimizer="adam", metrics=['accuracy'])
output_simple_conv = model.fit(x_train_gte5_conv_norm, y_train_class, batch_size=512, epochs=10, verbose=2,
                    validation_data=(x_test_gte5_conv_norm, y_test_class))

Train on 29404 samples, validate on 4861 samples
Epoch 1/10
 - 1s - loss: 2.4428 - acc: 0.6897 - val_loss: 0.1941 - val_acc: 0.9475
Epoch 2/10
 - 0s - loss: 0.1359 - acc: 0.9609 - val_loss: 0.1070 - val_acc: 0.9712
Epoch 3/10
 - 0s - loss: 0.0903 - acc: 0.9750 - val_loss: 0.0771 - val_acc: 0.9774
Epoch 4/10
 - 0s - loss: 0.0701 - acc: 0.9806 - val_loss: 0.0612 - val_acc: 0.9819
Epoch 5/10
 - 0s - loss: 0.0584 - acc: 0.9836 - val_loss: 0.0516 - val_acc: 0.9850
Epoch 6/10
 - 0s - loss: 0.0508 - acc: 0.9852 - val_loss: 0.0453 - val_acc: 0.9864
Epoch 7/10
 - 0s - loss: 0.0450 - acc: 0.9872 - val_loss: 0.0405 - val_acc: 0.9883
Epoch 8/10
 - 0s - loss: 0.0405 - acc: 0.9890 - val_loss: 0.0384 - val_acc: 0.9885
Epoch 9/10
 - 0s - loss: 0.0370 - acc: 0.9893 - val_loss: 0.0353 - val_acc: 0.9891
Epoch 10/10
 - 0s - loss: 0.0346 - acc: 0.9899 - val_loss: 0.0338 - val_acc: 0.9895


## 12. Print the accuracy for classification of digits 5 to 9

In [82]:
score = model.evaluate(x_test_gte5_conv_norm, y_test_class, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

('Test score:', 0.03376136876192849)
('Test accuracy:', 0.9895083315086521)


## Sentiment analysis <br> 

The objective of the second problem is to perform Sentiment analysis from the tweets data collected from the users targeted at various mobile devices.
Based on the tweet posted by a user (text), we will classify if the sentiment of the user targeted at a particular mobile device is positive or not.

### 13. Read the dataset (tweets.csv) and drop the NA's while reading the dataset

In [92]:
from google.colab import drive
drive.mount('/gdrive')


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


In [0]:
df = pd.read_csv("/gdrive/My Drive/tweets.csv")

In [0]:
df = df.dropna()

In [190]:
df.shape

(3291, 3)

### 14. Preprocess the text and add the preprocessed text in a column with name `text` in the dataframe.

In [0]:
def preprocess(text):
    try:
        return text.decode('ascii')
    except Exception as e:
        return ""

In [0]:
df['text'] = [preprocess(text) for text in df.tweet_text]

### 15. Consider only rows having Positive emotion and Negative emotion and remove other rows from the dataframe.

In [0]:
df = df[ (df["is_there_an_emotion_directed_at_a_brand_or_product"] == "Positive emotion") | (df["is_there_an_emotion_directed_at_a_brand_or_product"] == "Negative emotion")]

In [195]:
df.shape

(3191, 4)

### 16. Represent text as numerical data using `CountVectorizer` and get the document term frequency matrix

#### Use `vect` as the variable name for initialising CountVectorizer.

In [0]:
from sklearn.feature_extraction.text import CountVectorizer
vect = CountVectorizer(analyzer="word")

In [0]:
X = vect.fit_transform(df.text)

In [296]:
X.shape

(3191, 5482)

### 17. Find number of different words in vocabulary

In [297]:
vect.get_feature_names()

[u'000',
 u'02',
 u'03',
 u'08',
 u'10',
 u'100',
 u'100s',
 u'100tc',
 u'101',
 u'10am',
 u'10k',
 u'10mins',
 u'10pm',
 u'10x',
 u'11',
 u'11ntc',
 u'11th',
 u'12',
 u'12b',
 u'12th',
 u'13',
 u'130',
 u'14',
 u'1406',
 u'1413',
 u'1415',
 u'15',
 u'150',
 u'1500',
 u'150m',
 u'157',
 u'15am',
 u'15k',
 u'16162',
 u'16gb',
 u'16mins',
 u'17',
 u'188',
 u'1986',
 u'1990style',
 u'1m',
 u'1pm',
 u'1st',
 u'20',
 u'200',
 u'2010',
 u'2011',
 u'2012',
 u'20s',
 u'21',
 u'22',
 u'23',
 u'24',
 u'25',
 u'250k',
 u'25th',
 u'2am',
 u'2day',
 u'2honor',
 u'2moro',
 u'2nd',
 u'2nite',
 u'2s',
 u'2yrs',
 u'30',
 u'300',
 u'3000',
 u'30a',
 u'30am',
 u'30p',
 u'30pm',
 u'32',
 u'32gb',
 u'35',
 u'36',
 u'37',
 u'3d',
 u'3g',
 u'3gs',
 u'3k',
 u'3rd',
 u'3x',
 u'40',
 u'400',
 u'40min',
 u'41',
 u'45',
 u'45am',
 u'47',
 u'48',
 u'4android',
 u'4chan',
 u'4g',
 u'4nqv92l',
 u'4sq',
 u'4sq3',
 u'4square',
 u'50',
 u'54',
 u'55',
 u'58',
 u'59',
 u'59pm',
 u'5pm',
 u'5th',
 u'60',
 u'64g',
 u'64gb

#### Tip: To see all available functions for an Object use dir

### 18. Find out how many Positive and Negative emotions are there.

Hint: Use value_counts on that column

In [298]:
df.is_there_an_emotion_directed_at_a_brand_or_product.value_counts()

Positive emotion    2672
Negative emotion     519
Name: is_there_an_emotion_directed_at_a_brand_or_product, dtype: int64

### 19. Change the labels for Positive and Negative emotions as 1 and 0 respectively and store in a different column in the same dataframe named 'Label'

Hint: use map on that column and give labels

In [0]:
label_map = { 'Positive emotion' : '1', 'Negative emotion' : '0'}

In [0]:
df["label"] = df.is_there_an_emotion_directed_at_a_brand_or_product.map(label_map)

### 20. Define the feature set (independent variable or X) to be `text` column and `labels` as target (or dependent variable)  and divide into train and test datasets

In [301]:
df.sample()

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product,text,label
1178,So I went the whole day w/out my laptop &amp; ...,iPad,Negative emotion,So I went the whole day w/out my laptop &amp; ...,0


In [0]:
y =df[["label"]]

In [0]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=7)

## 21. **Predicting the sentiment:**


### Use Naive Bayes and Logistic Regression and their accuracy scores for predicting the sentiment of the given text

In [0]:
from sklearn.linear_model import LogisticRegression

In [0]:
model =LogisticRegression()

In [306]:
model.fit(x_train,y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [0]:
pred = model.predict(x_test)

In [308]:
from sklearn import metrics
metrics.accuracy_score(y_test,pred)

0.8785578747628083

In [0]:
from sklearn.naive_bayes import MultinomialNB

In [0]:
model1 = MultinomialNB()

In [311]:
model1.fit(x_train.toarray(),y_train)

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)

In [0]:
pred1 = model1.predict(x_test.toarray())

In [313]:
metrics.accuracy_score(y_test,pred1)

0.8671726755218216

## 22. Create a function called `tokenize_predict` which can take count vectorizer object as input and prints the accuracy for x (text) and y (labels)

In [0]:
def tokenize_predict(vect):
    X =  vect.fit_transform(df.text)
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=7)
    print('Features: ', x_train.shape[1])
    nb = MultinomialNB()
    nb.fit(x_train, y_train)
    y_pred_class = nb.predict(x_test)
    print('Accuracy: ', metrics.accuracy_score(y_test, y_pred_class))

### Create a count vectorizer function which includes n_grams = 1,2  and pass it to tokenize_predict function to print the accuracy score

In [324]:
vect = CountVectorizer(ngram_range=(1,2))
tokenize_predict(vect)

('Features: ', 28958)
('Accuracy: ', 0.8652751423149905)


### Create a count vectorizer function with stopwords = 'english'  and pass it to tokenize_predict function to print the accuracy score

In [325]:
vect = CountVectorizer(stop_words='english')
tokenize_predict(vect)

('Features: ', 5239)
('Accuracy: ', 0.8652751423149905)


### Create a count vectorizer function with stopwords = 'english' and max_features =300  and pass it to tokenize_predict function to print the accuracy score

In [326]:
vect = CountVectorizer(stop_words='english',max_features=300)
tokenize_predict(vect)

('Features: ', 300)
('Accuracy: ', 0.8358633776091081)


### Create a count vectorizer function with n_grams = 1,2  and max_features = 15000  and pass it to tokenize_predict function to print the accuracy score

In [327]:
vect = CountVectorizer(ngram_range=(1,2),max_features=15000)
tokenize_predict(vect)

('Features: ', 15000)
('Accuracy: ', 0.8595825426944972)


### Create a count vectorizer function with n_grams = 1,2  and include terms that appear at least 2 times (min_df = 2)  and pass it to tokenize_predict function to print the accuracy score

In [328]:
vect = CountVectorizer(ngram_range=(1,2),min_df=2)
tokenize_predict(vect)

('Features: ', 9914)
('Accuracy: ', 0.8425047438330171)
