# Transfer Learning CIFAR10

* Train a simple convnet on the CIFAR dataset the first 5 output classes [0..4].
* Freeze convolutional layers and fine-tune dense layers for the last 5 ouput classes [5..9].


### 1. Import CIFAR10 data and create 2 datasets with one dataset having classes from 0 to 4 and other having classes from 5 to 9 

In [1]:
import numpy as np
from keras.datasets import cifar10

#Load the dataset:
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

Using TensorFlow backend.


In [21]:
print ('X_train:',X_train.shape)
print ('y_train:',y_train.shape)

X_train: (50000, 32, 32, 3)
y_train: (50000, 1)


In [4]:
# Function for creating two data sets having classes from 0 to 4 and other having classes from 5 to 9
def prepare_dataset_class0to4(X_train, y_train, X_test, y_test):
    idx = np.where(y_train[:]<=4)[0]
    X_train_new = X_train[idx,::]
    y_train_new = y_train[idx,::]

    idx = np.where(y_test[:]<=4)[0]
    X_test_new = X_test[idx,::]
    y_test_new = y_test[idx,::]
    return X_train_new, y_train_new, X_test_new, y_test_new
    
def prepare_dataset_class5to9(X_train, y_train, X_test, y_test):
    idx = np.where(y_train[:]>=5)[0]
    X_train_new = X_train[idx,::]
    y_train_new = y_train[idx,::]

    idx = np.where(y_test[:]>=5)[0]
    X_test_new = X_test[idx,::]
    y_test_new = y_test[idx,::]
    return X_train_new, y_train_new, X_test_new, y_test_new

In [5]:
X_train_new1, y_train_new1, X_test_new1, y_test_new1 = prepare_dataset_class0to4(X_train, y_train, X_test, y_test)
X_train_new2, y_train_new2, X_test_new2, y_test_new2 = prepare_dataset_class5to9(X_train, y_train, X_test, y_test)

In [22]:
print ('X_train_new1:',X_train_new1.shape)
print ('X_train_new2:',X_train_new2.shape)
print ('y_train_new1:',y_train_new1.shape)
print ('y_train_new2:',y_train_new2.shape)
print ('X_test_new1:',X_test_new1.shape)
print ('X_test_new2:',X_test_new2.shape)
print ('y_test_new1:',y_test_new1.shape)
print ('y_test_new2:',y_test_new2.shape)

X_train_new1: (25000, 32, 32, 3)
X_train_new2: (25000, 32, 32, 3)
y_train_new1: (25000, 10)
y_train_new2: (25000, 10)
X_test_new1: (5000, 32, 32, 3)
X_test_new2: (5000, 32, 32, 3)
y_test_new1: (5000, 10)
y_test_new2: (5000, 10)


### 2. Use One-hot encoding to divide y_train and y_test into required no of output classes

In [15]:
from keras.utils import np_utils
num_classes = 10

y_train_new1 = np_utils.to_categorical(y_train_new1, num_classes)
y_train_new2 = np_utils.to_categorical(y_train_new2, num_classes)

y_test_new1 = np_utils.to_categorical(y_test_new1, num_classes)
y_test_new2 = np_utils.to_categorical(y_test_new2, num_classes)

In [16]:
print (y_train_new1.shape)
print (y_train_new2.shape)
print (y_test_new1.shape)
print (y_test_new2.shape)

(25000, 10)
(25000, 10)
(5000, 10)
(5000, 10)


In [25]:
indices,data = np.unique(y_train_new1, return_index=True)
print (indices)
print (data)

[0. 1.]
[0 3]


In [26]:
indices,data = np.unique(y_train_new2, return_index=True)
print (indices)
print (data)

[0. 1.]
[0 6]


In [27]:
indices,data = np.unique(y_test_new1, return_index=True)
print (indices)
print (data)

[0. 1.]
[0 3]


In [28]:
indices,data = np.unique(y_test_new2, return_index=True)
print (indices)
print (data)

[0. 1.]
[0 8]


### 3. Build a sequential neural network model which can classify the classes 0 to 4 of CIFAR10 dataset with at least 80% accuracy on test data

In [29]:
#Importing the necessary libraries 
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D
from keras.layers import Dropout, Flatten, GlobalAveragePooling2D

#Building up a Sequential model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu',input_shape = X_train_new1.shape[1:]))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(GlobalAveragePooling2D())
model.add(Dense(10, activation='softmax'))
model.summary()

Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 30, 30, 32)        896       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 15, 15, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 13, 13, 32)        9248      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 6, 6, 32)          0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 4, 4, 64)          18496     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 2, 2, 64)          0         
_________________________________________________________________
glob

In [30]:
model.compile(loss='binary_crossentropy', optimizer='adam',
              metrics=['accuracy'])

In [31]:
from keras.callbacks import ModelCheckpoint

checkpointer = ModelCheckpoint(filepath='scratchmodel.best.hdf5', 
                               verbose=1,save_best_only=True)

In [32]:
#Fitting the model on the train data and labels.
model.fit(X_train_new1, y_train_new1, batch_size=32, epochs=10, 
          verbose=1, callbacks=[checkpointer], validation_split=0.2, shuffle=True)

Instructions for updating:
Use tf.cast instead.
Train on 20000 samples, validate on 5000 samples
Epoch 1/10

Epoch 00001: val_loss improved from inf to 3.20605, saving model to scratchmodel.best.hdf5
Epoch 2/10

Epoch 00002: val_loss did not improve from 3.20605
Epoch 3/10

Epoch 00003: val_loss did not improve from 3.20605
Epoch 4/10

Epoch 00004: val_loss did not improve from 3.20605
Epoch 5/10

Epoch 00005: val_loss did not improve from 3.20605
Epoch 6/10

Epoch 00006: val_loss did not improve from 3.20605
Epoch 7/10

Epoch 00007: val_loss did not improve from 3.20605
Epoch 8/10

Epoch 00008: val_loss did not improve from 3.20605
Epoch 9/10

Epoch 00009: val_loss did not improve from 3.20605
Epoch 10/10

Epoch 00010: val_loss did not improve from 3.20605


<keras.callbacks.History at 0x1d380ff6780>

In [33]:
initial_weights = model.get_weights()

In [34]:
#Evaluate the model on the test data
score = model.evaluate(X_test_new1, y_test_new1)

#Accuracy on test data
print('Accuracy on the Test Images: ', score[1])

Accuracy on the Test Images:  0.8000000714302063


### 4. In the model which was built above (for classification of classes 0-4 in CIFAR10), make only the dense layers to be trainable and conv layers to be non-trainable

In [35]:
transf_model = model

In [36]:
transf_model.set_weights(initial_weights)

In [37]:
transf_model.layers

[<keras.layers.convolutional.Conv2D at 0x1d380ecaeb8>,
 <keras.layers.pooling.MaxPooling2D at 0x1d3ec0c8d68>,
 <keras.layers.convolutional.Conv2D at 0x1d3ec0c88d0>,
 <keras.layers.pooling.MaxPooling2D at 0x1d380ece6d8>,
 <keras.layers.convolutional.Conv2D at 0x1d380ece4e0>,
 <keras.layers.pooling.MaxPooling2D at 0x1d380ef6b00>,
 <keras.layers.pooling.GlobalAveragePooling2D at 0x1d380f1aa58>,
 <keras.layers.core.Dense at 0x1d380f34898>]

In [38]:
transf_model.layers[7]

<keras.layers.core.Dense at 0x1d380f34898>

In [39]:
#Set all layers trainable to False (except dense layer)
for layer in transf_model.layers:
    layer.trainable = False
transf_model.layers[7].trainable = True
print(transf_model.layers[7])

<keras.layers.core.Dense object at 0x000001D380F34898>


In [40]:
#Compile model
transf_model.compile(loss="categorical_crossentropy", optimizer='adam',\
    metrics=["accuracy"])

In [41]:
#Train model on second part of the data
transf_model.fit(X_train_new1, y_train_new1, batch_size=32, epochs=20, \
          verbose=1, callbacks=[checkpointer], validation_split=0.2, shuffle=True)
#Store transfer model weights
transf_weights = transf_model.get_weights()

Train on 20000 samples, validate on 5000 samples
Epoch 1/20

Epoch 00001: val_loss did not improve from 3.20605
Epoch 2/20

Epoch 00002: val_loss did not improve from 3.20605
Epoch 3/20

Epoch 00003: val_loss did not improve from 3.20605
Epoch 4/20

Epoch 00004: val_loss did not improve from 3.20605
Epoch 5/20

Epoch 00005: val_loss did not improve from 3.20605
Epoch 6/20

Epoch 00006: val_loss did not improve from 3.20605
Epoch 7/20

Epoch 00007: val_loss did not improve from 3.20605
Epoch 8/20

Epoch 00008: val_loss did not improve from 3.20605
Epoch 9/20

Epoch 00009: val_loss did not improve from 3.20605
Epoch 10/20

Epoch 00010: val_loss did not improve from 3.20605
Epoch 11/20

Epoch 00011: val_loss did not improve from 3.20605
Epoch 12/20

Epoch 00012: val_loss did not improve from 3.20605
Epoch 13/20

Epoch 00013: val_loss did not improve from 3.20605
Epoch 14/20

Epoch 00014: val_loss did not improve from 3.20605
Epoch 15/20

Epoch 00015: val_loss did not improve from 3.20605


In [42]:
#Check where the weights have changed
for i in range(len(initial_weights)):
    update_w = np.sum(initial_weights[i] != transf_weights[i])
    if update_w != 0:
        print(str(update_w)+' updated weights for layer '+str(transf_model.layers[i]))

In [43]:
#Evaluate the model on the test data
score  = transf_model.evaluate(X_test_new1, y_test_new1)

#Accuracy on test data
print('Accuracy on the Test Images: ', score[1])


Accuracy on the Test Images:  0.0


### 5. Utilize the the model trained on CIFAR 10 (classes 0 to 4) to classify the classes 5 to 9 of CIFAR 10  (Use Transfer Learning) <br>
Achieve an accuracy of more than 85% on test data

In [44]:
transf_model.fit(X_test_new2, y_test_new2, batch_size=32, epochs=10,
          validation_split=0.2, callbacks=[checkpointer], verbose=1, shuffle=True)

Train on 4000 samples, validate on 1000 samples
Epoch 1/10

Epoch 00001: val_loss did not improve from 3.20605
Epoch 2/10

Epoch 00002: val_loss did not improve from 3.20605
Epoch 3/10

Epoch 00003: val_loss did not improve from 3.20605
Epoch 4/10

Epoch 00004: val_loss did not improve from 3.20605
Epoch 5/10

Epoch 00005: val_loss did not improve from 3.20605
Epoch 6/10

Epoch 00006: val_loss did not improve from 3.20605
Epoch 7/10

Epoch 00007: val_loss did not improve from 3.20605
Epoch 8/10

Epoch 00008: val_loss did not improve from 3.20605
Epoch 9/10

Epoch 00009: val_loss did not improve from 3.20605
Epoch 10/10

Epoch 00010: val_loss did not improve from 3.20605


<keras.callbacks.History at 0x1d39873e390>

In [45]:
#Evaluate the model on the test data
score  = transf_model.evaluate(X_test_new2, y_test_new2)

#Accuracy on test data
print('Accuracy on the Test Images: ', score[1])

Accuracy on the Test Images:  0.2


# Text classification using TF-IDF

### 6. Load the dataset from sklearn.datasets

In [76]:
from sklearn.datasets import fetch_20newsgroups
import pandas as pd
import re
import numpy as np

In [77]:
categories = ['alt.atheism', 'soc.religion.christian', 'comp.graphics', 'sci.med']

### 7. Training data

In [78]:
twenty_train = fetch_20newsgroups(subset='train', categories=categories, shuffle=True, random_state=42)

### 8. Test data

In [79]:
twenty_test = fetch_20newsgroups(subset='test', categories=categories, shuffle=True, random_state=42)

###  a.  You can access the values for the target variable using .target attribute 
###  b. You can access the name of the class in the target variable with .target_names


In [80]:
twenty_train.target

array([1, 1, 3, ..., 2, 2, 2], dtype=int64)

In [81]:
twenty_train.target_names

['alt.atheism', 'comp.graphics', 'sci.med', 'soc.religion.christian']

In [82]:
twenty_train.data[0:5]

['From: sd345@city.ac.uk (Michael Collier)\nSubject: Converting images to HP LaserJet III?\nNntp-Posting-Host: hampton\nOrganization: The City University\nLines: 14\n\nDoes anyone know of a good way (standard PC application/PD utility) to\nconvert tif/img/tga files into LaserJet III format.  We would also like to\ndo the same, converting to HPGL (HP plotter) files.\n\nPlease email any response.\n\nIs this the correct group?\n\nThanks in advance.  Michael.\n-- \nMichael Collier (Programmer)                 The Computer Unit,\nEmail: M.P.Collier@uk.ac.city                The City University,\nTel: 071 477-8000 x3769                      London,\nFax: 071 477-8565                            EC1V 0HB.\n',
 "From: ani@ms.uky.edu (Aniruddha B. Deglurkar)\nSubject: help: Splitting a trimming region along a mesh \nOrganization: University Of Kentucky, Dept. of Math Sciences\nLines: 28\n\n\n\n\tHi,\n\n\tI have a problem, I hope some of the 'gurus' can help me solve.\n\n\tBackground of the probl

In [83]:
df_twenty_train = pd.DataFrame(data= np.c_[twenty_train['data'], twenty_train['target']], columns= ['feature_names','target'])

df_twenty_test = pd.DataFrame(data= np.c_[twenty_test['data'], twenty_test['target']], columns= ['feature_names','target'])

In [84]:
df_twenty_train.head()

Unnamed: 0,feature_names,target
0,From: sd345@city.ac.uk (Michael Collier)\nSubj...,1
1,From: ani@ms.uky.edu (Aniruddha B. Deglurkar)\...,1
2,From: djohnson@cs.ucsd.edu (Darin Johnson)\nSu...,3
3,From: s0612596@let.rug.nl (M.M. Zwart)\nSubjec...,3
4,From: stanly@grok11.columbiasc.ncr.com (stanly...,3


In [85]:
df_twenty_test.head()

Unnamed: 0,feature_names,target
0,From: brian@ucsd.edu (Brian Kantor)\nSubject: ...,2
1,From: rind@enterprise.bih.harvard.edu (David R...,2
2,From: adwright@iastate.edu ()\nSubject: Re: ce...,2
3,From: livesey@solntze.wpd.sgi.com (Jon Livesey...,0
4,From: jhpb@sarto.budd-lake.nj.us (Joseph H. Bu...,3


In [86]:
df_twenty_test.shape

(1502, 2)

In [87]:
df_twenty_train.shape

(2257, 2)

In [60]:
from bs4 import BeautifulSoup
import nltk
from nltk.corpus import stopwords #nltk - natural language toolkit

In [61]:
def text_to_words( raw_text ):
    # Function to convert a raw review to a string of words
    # The input is a single string (a raw movie review), and 
    # the output is a single string (a preprocessed movie review)
    #
    # 1. Remove HTML
    text = BeautifulSoup(raw_text).get_text() 
    #
    # 2. Remove non-letters        
    letters_only = re.sub("[^a-zA-Z]", " ", text) 
    #
    # 3. Convert to lower case, split into individual words
    words = letters_only.lower().split()                             
    #
    # 4. In Python, searching a set is much faster than searching
    #   a list, so convert the stop words to a set
    stops = set(stopwords.words("english"))                  
    # 
    # 5. Remove stop words
    meaningful_words = [w for w in words if not w in stops]   
    #
    # 6. Join the words back into one string separated by space, 
    # and return the result.
    return( " ".join( meaningful_words ))  

In [62]:
clean_data = text_to_words( df_twenty_train["feature_names"][0] )
print(clean_data)

sd city ac uk michael collier subject converting images hp laserjet iii nntp posting host hampton organization city university lines anyone know good way standard pc application pd utility convert tif img tga files laserjet iii format would also like converting hpgl hp plotter files please email response correct group thanks advance michael michael collier programmer computer unit email p collier uk ac city city university tel x london fax ec v hb


In [63]:
# Get the number of datapoints based on the dataframe column size
num_data = df_twenty_train["feature_names"].size

# Initialize an empty list to hold the clean reviews
clean_twenty_train = []

# Loop over each review; create an index i that goes from 0 to the length
# of the movie review list 
for i in range( 0, num_data ):
    # Call our function for each one, and add the result to the list of
    # clean reviews
    clean_twenty_train.append( text_to_words( df_twenty_train["feature_names"][i] ) )
    
#####
# Get the number of datapoints based on the dataframe column size for TEST data
num_data_test = df_twenty_test["feature_names"].size

# Initialize an empty list to hold the clean reviews
clean_twenty_test = []

# Loop over each review; create an index i that goes from 0 to the length
# of the movie review list 
for i in range( 0, num_data_test ):
    # Call our function for each one, and add the result to the list of
    # clean reviews
    clean_twenty_test.append( text_to_words( df_twenty_test["feature_names"][i] ) )

In [64]:
len(clean_twenty_train)

2257

### 9.  Now with dependent and independent data available for both train and test datasets, using TfidfVectorizer fit and transform the training data and test data and get the tfidf features for both

In [70]:
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer()
# tokenize and build vocab
train_data_features = vectorizer.fit_transform(clean_twenty_train)

print(train_data_features)

# Numpy arrays are easy to work with, so convert the result to an 
# array
train_data_features = train_data_features.toarray()

print(train_data_features.shape)

  (0, 24216)	0.13884950070261654
  (0, 4560)	0.3529404116293565
  (0, 137)	0.1410091912289279
  (0, 28315)	0.1357630691040486
  (0, 17088)	0.22202002029460763
  (0, 4891)	0.43288363163413845
  (0, 26217)	0.018927294040341276
  (0, 5639)	0.2430131931879898
  (0, 12947)	0.08388151429122585
  (0, 12524)	0.19149398638157444
  (0, 15264)	0.27769900140523307
  (0, 12888)	0.20987268475794266
  (0, 18487)	0.04255506890711052
  (0, 20902)	0.03999120516126367
  (0, 12491)	0.042179113349531665
  (0, 11717)	0.12825748813474552
  (0, 19239)	0.020636773767764082
  (0, 28651)	0.0743957111200112
  (0, 15727)	0.019610047742002715
  (0, 1268)	0.04863372083771384
  (0, 14965)	0.03921651469906459
  (0, 11241)	0.04815653689056837
  (0, 29748)	0.04887690140826429
  (0, 25773)	0.07773655864813343
  (0, 19917)	0.08607844640642758
  :	:
  (2256, 24410)	0.08521542187329259
  (2256, 26473)	0.08615990835593351
  (2256, 22293)	0.0871554724009703
  (2256, 9460)	0.08820795596408748
  (2256, 10359)	0.1107418829959938

In [71]:
# summarize
print(vectorizer.vocabulary_)
print(vectorizer.idf_)
# # encode document
# vector = vectorizer.transform(twenty_test)
# # summarize encoded vector
# print(train_data_features.shape)
# print(train_data_features.toarray())

[5.67771231 7.62362246 8.02908756 ... 7.62362246 8.02908756 8.02908756]


In [72]:
vocab = vectorizer.get_feature_names()
print(vocab)



In [73]:
# vectorizer_test = TfidfVectorizer()
# tokenize and build vocab
test_data_features = vectorizer.transform(clean_twenty_test)

print(test_data_features)

# Numpy arrays are easy to work with, so convert the result to an 
# array
test_data_features = test_data_features.toarray()

print(test_data_features.shape)

  (0, 30231)	0.11483139970936426
  (0, 29748)	0.0567451910029826
  (0, 28274)	0.24748237715966737
  (0, 26982)	0.11620523723488929
  (0, 26542)	0.15629832699984794
  (0, 26217)	0.02197424314028177
  (0, 26009)	0.4301464797320204
  (0, 25406)	0.10169438409650344
  (0, 22741)	0.15629832699984794
  (0, 22380)	0.10659779474826782
  (0, 22244)	0.10614470151789562
  (0, 20902)	0.04642905973846508
  (0, 20714)	0.148904604262379
  (0, 19800)	0.119240986362579
  (0, 19567)	0.09894401876103025
  (0, 19402)	0.09993550617647866
  (0, 19239)	0.023958917922303282
  (0, 18487)	0.04940565878160946
  (0, 18125)	0.11291939055542341
  (0, 16934)	0.10097385255726427
  (0, 16817)	0.12374118857983368
  (0, 16039)	0.12476343071428454
  (0, 15727)	0.022766907734241094
  (0, 15519)	0.0812685349340609
  (0, 14847)	0.4232008269666688
  :	:
  (1501, 22244)	0.1411493773398929
  (1501, 20515)	0.11860193057245341
  (1501, 19917)	0.1328924974043927
  (1501, 19239)	0.03186015220835636
  (1501, 18563)	0.194108835747105

In [88]:
# summarize
print(vectorizer.vocabulary_)
print(vectorizer.idf_)


[5.67771231 7.62362246 8.02908756 ... 7.62362246 8.02908756 8.02908756]


In [89]:
vocab_test = vectorizer.get_feature_names()
print(vocab)



### 10. Use logisticRegression with tfidf features as input and targets as output and train the model and report the train and test accuracy score

In [90]:
from sklearn import metrics
from sklearn.linear_model import LogisticRegression

In [91]:
type(train_data_features)

numpy.ndarray

In [92]:
type(df_twenty_train["target"])

pandas.core.series.Series

In [93]:
train_target_arr = df_twenty_train["target"].to_numpy()
test_target_arr = df_twenty_test["target"].to_numpy()

In [94]:
test_data_features.shape

(1502, 30715)

In [95]:
train_data_features.shape

(2257, 30715)

In [96]:
#Logistic regression

# Fit the model
model = LogisticRegression()
model.fit(train_data_features, train_target_arr)
test_pred_LR = model.predict(test_data_features)
model_score_LR = model.score(test_data_features, test_target_arr)
print(model_score_LR)



0.8921438082556591


Accuracy of 89.21% is achieved