# Transfer Learning CIFAR10

* Train a simple convnet on the CIFAR dataset the first 5 output classes [0..4].
* Freeze convolutional layers and fine-tune dense layers for the last 5 ouput classes [5..9].


### 1. Import CIFAR10 data and create 2 datasets with one dataset having classes from 0 to 4 and other having classes from 5 to 9 

In [1]:
import pandas as pd
import numpy as np
from keras.datasets import cifar10
from sklearn.model_selection import train_test_split
from keras import applications
from keras.models import Sequential, Model 
from keras import backend as k 
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.layers import Conv2D, MaxPooling2D, Activation, Flatten, Dense, Dropout
from keras.callbacks import EarlyStopping, ModelCheckpoint
import vis
import keras as keras

Using TensorFlow backend.


In [2]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [3]:
y_train_new = y_train[:, 0]
y_test_new = y_test[:, 0]
print(y_train_new.shape)
print(y_test_new.shape)

(50000,)
(10000,)


In [0]:
# create two datasets one with classes from 0 to 4 and one with 5 to 9
x_train_lt5 = x_train[y_train_new < 5]
y_train_lt5 = y_train_new[y_train_new < 5]
x_test_lt5 = x_test[y_test_new < 5]
y_test_lt5 = y_test_new[y_test_new < 5]

x_train_gt5 = x_train[y_train_new >= 5]
y_train_gt5 = y_train_new[y_train_new >= 5]
x_test_gt5 = x_test[y_test_new >= 5]
y_test_gt5 = y_test_new[y_test_new >= 5]

### 2. Use One-hot encoding to divide y_train and y_test into required no of output classes

In [0]:
# number of classes
num_classes = 10
# convert class vectors to binary class matrices
train_labels_lt5 = keras.utils.to_categorical(y_train_lt5, 10)
test_labels_lt5 = keras.utils.to_categorical(y_test_lt5, 10)
#Changing into float and Normalizing the input
train_features_lt5 = x_train_lt5.astype('float32')/255
test_features_lt5 = x_test_lt5.astype('float32')/255



### 3. Build a sequential neural network model which can classify the classes 0 to 4 of CIFAR10 dataset with at least 80% accuracy on test data

In [8]:
# input image dimensions
IMG_SIZE = 32
filters = 32
pool_size = 2
# convolution kernel size
kernel_size = 3


conv_layers = [
    Conv2D(filters, kernel_size,
           padding='valid',
           input_shape=(IMG_SIZE, IMG_SIZE, 3)),
    keras.layers.BatchNormalization(),
    Activation('relu'),
    MaxPooling2D(pool_size = pool_size),
    Conv2D(64, kernel_size),
    keras.layers.BatchNormalization(),
    Activation('relu'),
    MaxPooling2D(pool_size = pool_size),
    Conv2D(96, kernel_size),
    keras.layers.BatchNormalization(),
    Activation('relu'),
    MaxPooling2D(pool_size = pool_size),
    Flatten(),
]

output_layers = [
    Dense(128),
    keras.layers.BatchNormalization(),
    Activation('relu'),
    Dropout(0.25),
    Dense(num_classes),
    Activation('softmax')
]




In [0]:
from keras.preprocessing.image import ImageDataGenerator
#Attempting to use Data Augmentation
data = ImageDataGenerator(samplewise_center=False, # set input mean to 0 over the sample
                          samplewise_std_normalization=False,  # divide inputs by std of the sample
                          rotation_range=90,       # randomly rotate images in the range (degrees, 0 to 180)
                          width_shift_range=0.2,   # randomly shift images horizontally (fraction of total width)
                          height_shift_range=0.2,  # randomly shift images vertically (fraction of total height)
                          fill_mode='reflect',     # filling the area outside
                          zoom_range=0.4,          # random zoom
                          horizontal_flip=True,    # randomly flip images
                          vertical_flip=True)      # randomly flip images

In [10]:
# create complete model
model = Sequential(conv_layers + output_layers)
# Save the model 
checkpoint = ModelCheckpoint("init_model_upto_4.h5", monitor='val_acc', verbose=1, save_best_only=True, mode='auto')
early = EarlyStopping(monitor='val_acc', min_delta=0, patience=5, verbose=1, mode='auto')












Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [11]:

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

history = model.fit_generator(data.flow(train_features_lt5, train_labels_lt5, batch_size = 512),
          steps_per_epoch = train_features_lt5.shape[0]/350, epochs = 5,
          verbose = 1,
          callbacks = [checkpoint, early],
          validation_data= (test_features_lt5, test_labels_lt5))



Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch 1/5

Epoch 00001: val_acc improved from -inf to 0.89908, saving model to init_model_upto_4.h5
Epoch 2/5

Epoch 00002: val_acc improved from 0.89908 to 0.92394, saving model to init_model_upto_4.h5
Epoch 3/5

Epoch 00003: val_acc did not improve from 0.92394
Epoch 4/5

Epoch 00004: val_acc improved from 0.92394 to 0.92514, saving model to init_model_upto_4.h5
Epoch 5/5

Epoch 00005: val_acc improved from 0.92514 to 0.92822, saving model to init_model_upto_4.h5


In [12]:
output_model_train = model.evaluate(train_features_lt5, train_labels_lt5)
output_model_test = model.evaluate(test_features_lt5, test_labels_lt5)



In [13]:
print('Model Train Accuracy:', output_model_train[1] * 100, "%")
print('Model Test accuracy:', output_model_test[1] * 100, "%")

Model Train Accuracy: 92.89479954528808 %
Model Test accuracy: 92.82199912071228 %


### 4. In the model which was built above (for classification of classes 0-4 in CIFAR10), make only the dense layers to be trainable and conv layers to be non-trainable

In [0]:
for layer in model.layers:
    if('dense' not in layer.name): #prefix detection to freeze layers which does not have dense
    #Freezing a layer
        layer.trainable = False

### 5. Utilize the the model trained on CIFAR 10 (classes 0 to 4) to classify the classes 5 to 9 of CIFAR 10  (Use Transfer Learning) <br>
Achieve an accuracy of more than 85% on test data

In [0]:
# convert class vectors to binary class matrices
train_labels_gt5 = keras.utils.to_categorical(y_train_gt5, 10)
test_labels_gt5 = keras.utils.to_categorical(y_test_gt5, 10)
#Changing into float and Normalizing the input
train_features_gt5 = x_train_gt5.astype('float32')/255
test_features_gt5 = x_test_gt5.astype('float32')/255

In [22]:
model.layers

[<keras.layers.convolutional.Conv2D at 0x7f427f53ca90>,
 <keras.layers.normalization.BatchNormalization at 0x7f427f53cd30>,
 <keras.layers.core.Activation at 0x7f427f53ce48>,
 <keras.layers.pooling.MaxPooling2D at 0x7f427f53ce80>,
 <keras.layers.convolutional.Conv2D at 0x7f427f53cf28>,
 <keras.layers.normalization.BatchNormalization at 0x7f427f5a70f0>,
 <keras.layers.core.Activation at 0x7f427f5a7208>,
 <keras.layers.pooling.MaxPooling2D at 0x7f427f5a7240>,
 <keras.layers.convolutional.Conv2D at 0x7f427f5a72e8>,
 <keras.layers.normalization.BatchNormalization at 0x7f427f5a7470>,
 <keras.layers.core.Activation at 0x7f427f5a7588>,
 <keras.layers.pooling.MaxPooling2D at 0x7f427f5a75c0>,
 <keras.layers.core.Flatten at 0x7f427f5a7668>,
 <keras.layers.core.Dense at 0x7f427f5a7710>,
 <keras.layers.normalization.BatchNormalization at 0x7f427f5a7898>,
 <keras.layers.core.Activation at 0x7f427f5a79b0>,
 <keras.layers.core.Dropout at 0x7f427f5a79e8>,
 <keras.layers.core.Dense at 0x7f427f5a7a20>,


In [0]:
trans_model = Sequential(model.layers[:13])

In [24]:
trans_model.add(Dense(256))
trans_model.add(Activation('relu'))
trans_model.add(Dropout(0.25))
trans_model.add(Dense(10))
trans_model.add(Activation('softmax'))
trans_model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 30, 30, 32)        896       
_________________________________________________________________
batch_normalization_1 (Batch (None, 30, 30, 32)        128       
_________________________________________________________________
activation_1 (Activation)    (None, 30, 30, 32)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 15, 15, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 13, 13, 64)        18496     
_________________________________________________________________
batch_normalization_2 (Batch (None, 13, 13, 64)        256       
_________________________________________________________________
activation_2 (Activation)    (None, 13, 13, 64)       

In [26]:
trans_model.compile(loss='categorical_crossentropy', optimizer="adam", metrics=['accuracy'])
trans_model.fit(train_features_gt5, train_labels_gt5, batch_size = 256, epochs = 5,verbose = 2,
          validation_data= (test_features_gt5, test_labels_gt5))

Train on 25000 samples, validate on 5000 samples
Epoch 1/5
 - 21s - loss: 0.7917 - acc: 0.7046 - val_loss: 0.9535 - val_acc: 0.6394
Epoch 2/5
 - 20s - loss: 0.6612 - acc: 0.7537 - val_loss: 1.0637 - val_acc: 0.6314
Epoch 3/5
 - 20s - loss: 0.6137 - acc: 0.7744 - val_loss: 1.0541 - val_acc: 0.6374
Epoch 4/5
 - 20s - loss: 0.5712 - acc: 0.7909 - val_loss: 1.2151 - val_acc: 0.6060
Epoch 5/5
 - 20s - loss: 0.5467 - acc: 0.7986 - val_loss: 1.1067 - val_acc: 0.6328


<keras.callbacks.History at 0x7f4272d62b38>

## Sentiment analysis <br> 

The objective of the second problem is to perform Sentiment analysis from the tweets data collected from the users targeted at various mobile devices.
Based on the tweet posted by a user (text), we will classify if the sentiment of the user targeted at a particular mobile device is positive or not.

### 6. Read the dataset (tweets.csv) and drop the NA's while reading the dataset

In [0]:
import pandas as pd
import numpy as np
import scipy as sp
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

In [0]:
import pandas as pd
data = pd.read_csv('./tweets.csv', encoding = "ISO-8859-1").dropna()

In [32]:
data.head()

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,iPhone,Negative emotion
1,@jessedee Know about @fludapp ? Awesome iPad/i...,iPad or iPhone App,Positive emotion
2,@swonderlin Can not wait for #iPad 2 also. The...,iPad,Positive emotion
3,@sxsw I hope this year's festival isn't as cra...,iPad or iPhone App,Negative emotion
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Google,Positive emotion


### Consider only rows having Positive emotion and Negative emotion and remove other rows from the dataframe.

In [0]:
data = data[(data['is_there_an_emotion_directed_at_a_brand_or_product'] == 'Positive emotion') | (data['is_there_an_emotion_directed_at_a_brand_or_product'] == 'Negative emotion')]

In [34]:
data.shape

(3191, 3)

### 7. Represent text as numerical data using `CountVectorizer` and get the document term frequency matrix

#### Use `vect` as the variable name for initialising CountVectorizer.

In [0]:
# Term Frequency
vect = CountVectorizer()
tf = vect.fit_transform(data['tweet_text'])

In [36]:
tf.shape

(3191, 5648)

### 8. Find number of different words in vocabulary

In [37]:
dir(tf)

['__abs__',
 '__add__',
 '__array_priority__',
 '__bool__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__div__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__idiv__',
 '__imul__',
 '__init__',
 '__init_subclass__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__le__',
 '__len__',
 '__lt__',
 '__matmul__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__nonzero__',
 '__pow__',
 '__radd__',
 '__rdiv__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmatmul__',
 '__rmul__',
 '__rsub__',
 '__rtruediv__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__str__',
 '__sub__',
 '__subclasshook__',
 '__truediv__',
 '__weakref__',
 '_add_dense',
 '_add_sparse',
 '_arg_min_or_max',
 '_arg_min_or_max_axis',
 '_asindices',
 '_binopt',
 '_cs_matrix__get_has_canonical_format',
 '_cs_matrix__get_sorted',
 '_cs_matrix__set_has_canonical_format',
 '_cs_matrix__set_sorted

#### Tip: To see all available functions for an Object use dir

In [40]:
dir(tf)

['__abs__',
 '__add__',
 '__array_priority__',
 '__bool__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__div__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__idiv__',
 '__imul__',
 '__init__',
 '__init_subclass__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__le__',
 '__len__',
 '__lt__',
 '__matmul__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__nonzero__',
 '__pow__',
 '__radd__',
 '__rdiv__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmatmul__',
 '__rmul__',
 '__rsub__',
 '__rtruediv__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__str__',
 '__sub__',
 '__subclasshook__',
 '__truediv__',
 '__weakref__',
 '_add_dense',
 '_add_sparse',
 '_arg_min_or_max',
 '_arg_min_or_max_axis',
 '_asindices',
 '_binopt',
 '_cs_matrix__get_has_canonical_format',
 '_cs_matrix__get_sorted',
 '_cs_matrix__set_has_canonical_format',
 '_cs_matrix__set_sorted

### Find out how many Positive and Negative emotions are there.

Hint: Use value_counts on that column

In [41]:
pd.value_counts(data['is_there_an_emotion_directed_at_a_brand_or_product'])

Positive emotion    2672
Negative emotion     519
Name: is_there_an_emotion_directed_at_a_brand_or_product, dtype: int64

###  Change the labels for Positive and Negative emotions as 1 and 0 respectively and store in a different column in the same dataframe named 'label'

Hint: use map on that column and give labels

In [0]:
data['label'] = data.is_there_an_emotion_directed_at_a_brand_or_product.map({'Positive emotion':1, 'Negative emotion':0})

### 9. Define the feature set (independent variable or X) to be `text` column and `labels` as target (or dependent variable)  and divide into train and test datasets

In [0]:
from sklearn.model_selection import train_test_split

X = data.tweet_text
y = data.label

In [0]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

## 10. **Predicting the sentiment:**


### Use Naive Bayes and Logistic Regression and their accuracy scores for predicting the sentiment of the given text

In [47]:
# create document-term matrices
X_train_dtm = vect.fit_transform(X_train)
X_test_dtm = vect.transform(X_test)

# use Naive Bayes to predict the star rating
nb = MultinomialNB()
nb.fit(X_train_dtm, y_train)
y_pred_class = nb.predict(X_test_dtm)

# calculate accuracy
print (metrics.accuracy_score(y_test, y_pred_class))



0.8471177944862155


In [0]:

def logistic_reg(vect):
    X_train_dtm = vect.fit_transform(X_train)
    print('Features: ', X_train_dtm.shape[1])
    X_test_dtm = vect.transform(X_test)
    logreg = LogisticRegression()
    logreg.fit(X_train_dtm, y_train)
    y_pred_class = logreg.predict(X_test_dtm)
    print('Accuracy: ', metrics.accuracy_score(y_test, y_pred_class))

## 11. Create a function called `tokenize_predict` which can take count vectorizer object as input and prints the accuracy for x (text) and y (labels)

In [0]:
def tokenize_test(vect):
    x_train_dtm = vect.fit_transform(x_train)
    print('Features: ', x_train_dtm.shape[1])
    x_test_dtm = vect.transform(x_test)
    nb = MultinomialNB()
    nb.fit(x_train_dtm, y_train)
    y_pred_class = nb.predict(x_test_dtm)
    print('Accuracy: ', metrics.accuracy_score(y_test, y_pred_class))

### Create a count vectorizer function which includes n_grams = 1,2  and pass it to tokenize_predict function to print the accuracy score

In [0]:
# include 1-grams and 2-grams
vect = CountVectorizer(ngram_range=(1, 2))
tokenize_test(vect)

### 12. Create a count vectorizer function with stopwords = 'english'  and pass it to tokenize_predict function to print the accuracy score

In [0]:
vect = CountVectorizer(ngram_range=(1, 2), stop_words= 'english')
tokenize_test(vect)
logistic_reg(vect)

### 13. Create a count vectorizer function with stopwords = 'english' and max_features =300  and pass it to tokenize_predict function to print the accuracy score

### 14. Create a count vectorizer function with n_grams = 1,2  and max_features = 15000  and pass it to tokenize_predict function to print the accuracy score

### 15. Create a count vectorizer function with n_grams = 1,2  and include terms that appear at least 2 times (min_df = 2)  and pass it to tokenize_predict function to print the accuracy score