# Transfer Learning CIFAR10

* Train a simple convnet on the CIFAR dataset the first 5 output classes [0..4].
* Freeze convolutional layers and fine-tune dense layers for the last 5 ouput classes [5..9].


In [0]:
import tensorflow as tf
tf.reset_default_graph()
tf.set_random_seed(42)

In [2]:
from __future__ import absolute_import, division, print_function
import numpy as np
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten, Reshape
from keras.layers import Convolution2D, MaxPooling2D
from sklearn.model_selection import train_test_split
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import np_utils
import pickle
from matplotlib import pyplot as plt
import seaborn as sns

Using TensorFlow backend.


### 1. Import CIFAR10 data and create 2 datasets with one dataset having classes from 0 to 4 and other having classes from 5 to 9 

In [0]:
from keras.datasets import cifar10

(trainX, trainY),(testX, testY) = cifar10.load_data()

In [0]:
trainXset1 = []
trainYset1 = []
trainXset2 = []
trainYset2 = []
for i in range(len(trainY)):
  if ((trainY[i] == 1) or (trainY[i] == 2) or (trainY[i] == 3) or (trainY[i] == 4) or (trainY[i] == 0)):
    trainYset1.append(trainY[i])
    trainXset1.append(trainX[i])
  else:
    trainYset2.append(trainY[i])
    trainXset2.append(trainX[i])
  

In [0]:
testXset1 = []
testYset1 = []
testXset2 = []
testYset2 = []
for i in range(len(testY)):
  if ((testY[i] == 1) or (testY[i] == 2) or (testY[i] == 3) or (testY[i] == 4) or (testY[i] == 0)):
    testYset1.append(testY[i])
    testXset1.append(testX[i])
  else:
    testYset2.append(testY[i])
    testXset2.append(testX[i])

In [6]:
print('length of X trainset 0-4 :', len(trainXset1))
print('length of Y trainset 0-4 :', len(trainYset1))
print('length of X testset 0-4 :', len(testXset1))
print('length of Y testset 0-4 :', len(testYset1))

length of X trainset 0-4 : 25000
length of Y trainset 0-4 : 25000
length of X testset 0-4 : 5000
length of Y testset 0-4 : 5000


In [7]:
print('length of X trainset 5-9 :', len(trainXset2))
print('length of Y trainset 5-9 :', len(trainYset2))
print('length of X testset 5-9 :', len(testXset2))
print('length of Y testset 5-9 :', len(testYset2))

length of X trainset 5-9 : 25000
length of Y trainset 5-9 : 25000
length of X testset 5-9 : 5000
length of Y testset 5-9 : 5000


### 2. Use One-hot encoding to divide y_train and y_test into required no of output classes

In [0]:
trainXset1 = np.array(trainXset1)
trainYset1 = np.array(trainYset1)
testXset1  = np.array(testXset1)
testYset1  = np.array(testYset1)

In [0]:
trainXset2 = np.array(trainXset2)
trainYset2 = np.array(trainYset2)
testXset2  = np.array(testXset2)
testYset2  = np.array(testYset2)

In [0]:
print('train shape:', trainXset1.shape)
print('test  shape:', testXset1.shape)
x1, y1, z1 = trainXset1.shape
x2, y2, z2 = testXset1.shape

In [0]:
trainXset1 = np.reshape(trainXset1, (x1,y1,z1,1))
testXset1  = np.reshape(testXset1, (x2,y2,z2,1))

In [12]:
print('train shape:', trainXset1.shape)
print('test  shape:', testXset1.shape)

train shape: (25000, 32, 32, 3)
test  shape: (5000, 32, 32, 3)


In [0]:
trainXset1 = trainXset1/255
testXset1  = testXset1/255

In [0]:
trainYset1 = tf.keras.utils.to_categorical(trainYset1, num_classes=5)
testYset1 = tf.keras.utils.to_categorical(testYset1, num_classes=5)

### 3. Build a sequential neural network model which can classify the classes 0 to 4 of CIFAR10 dataset with at least 80% accuracy on test data

In [0]:
input_shape = (y1, z1, 1)
model = Sequential()

#Add a Convolutional Layer with 32 filters of size 3X3
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu',input_shape=input_shape,name='conv_1'))

#Add a Convolutional Layer with 32 filters of size 3X3
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu',input_shape=input_shape,name='conv_2'))

#Add a MaxPooling Layer of size 2X2 
model.add(MaxPooling2D(pool_size=(2, 2),name='max_1'))

#Apply Dropout with 0.25 probability 
model.add(Dropout(0.25,name='drop_1'))

In [0]:
#Flatten the layer
model.add(Flatten())

#Add Fully Connected Layer with 128 units and activation function as 'ReLU'
model.add(Dense(128, activation='relu',name='dense_1'))

#Add Fully Connected Layer with 128 units and activation function as 'ReLU'
model.add(Dense(5, activation='softmax',name='dense_2'))

In [0]:
from keras.losses import categorical_crossentropy
2
#Set the loss function and optimizer for the model training
3
model.compile(loss=categorical_crossentropy,
4
              optimizer='adam',
5
              metrics=['accuracy'])

In [0]:
model.fit(trainXset1, trainYset1,
          batch_size=32,
          epochs=20,
          verbose=1,
          validation_data=(testXset1, testYset1))

### 4. In the model which was built above (for classification of classes 0-4 in CIFAR10), make only the dense layers to be trainable and conv layers to be non-trainable

### 5. Utilize the the model trained on CIFAR 10 (classes 0 to 4) to classify the classes 5 to 9 of CIFAR 10  (Use Transfer Learning) <br>
Achieve an accuracy of more than 85% on test data

# Text classification using TF-IDF

### 6. Load the dataset from sklearn.datasets

In [0]:
from sklearn.datasets import fetch_20newsgroups

In [0]:
categories = ['alt.atheism', 'soc.religion.christian', 'comp.graphics', 'sci.med']

### 7. Training data

In [0]:
twenty_train = fetch_20newsgroups(subset='train', categories=categories, shuffle=True, random_state=42)

### 8. Test data

In [0]:
twenty_test = fetch_20newsgroups(subset='test', categories=categories, shuffle=True, random_state=42)

###  a.  You can access the values for the target variable using .target attribute 
###  b. You can access the name of the class in the target variable with .target_names


In [0]:
twenty_train.target

In [0]:
twenty_train.target_names

In [0]:
twenty_train.data[0:5]

### 9.  Now with dependent and independent data available for both train and test datasets, using TfidfVectorizer fit and transform the training data and test data and get the tfidf features for both

### 10. Use logisticRegression with tfidf features as input and targets as output and train the model and report the train and test accuracy score