This notebook will do the preprocessing in the images and then apply the necessary feature extraction technique
After that the clustering models are applied 

In [7]:
import numpy as np
import os
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg19 import VGG19

from tensorflow.keras.applications.resnet50 import ResNet50

from tensorflow.keras.applications.xception import Xception

from tensorflow.keras.applications.densenet import DenseNet121
from tensorflow.keras.applications.densenet import DenseNet169
from tensorflow.keras.applications.densenet import DenseNet201

from tensorflow.keras.applications.mobilenet import MobileNet
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2

from tensorflow.keras.applications.nasnet import NASNetLarge
from tensorflow.keras.applications.nasnet import NASNetMobile


from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2


from tensorflow.keras.models import Model

from tensorflow.keras.applications.vgg16 import preprocess_input

from sklearn.cluster import KMeans,AgglomerativeClustering,DBSCAN,OPTICS,MeanShift
import os, shutil, glob, os.path
from PIL import Image as pil_image
from matplotlib import pyplot as plt
from pylab import *

import sklearn
from sklearn.manifold import TSNE
import time

from sklearn.decomposition import PCA 

from sklearn.metrics import pairwise_distances
from sklearn import metrics
from sklearn.metrics import pairwise_distances
from sklearn.metrics import davies_bouldin_score


import warnings
warnings.filterwarnings('ignore')

In [8]:

%%capture
from tqdm import tqdm_notebook as tqdm
tqdm().pandas()

Transfer learning is adaptation more than creation. A model is not created from scratch but a pre-trained model is just adapted to a new problem. Given a small dataset which is not sufficient to build a DL model from scratch, then transfer learning is the option to automatically extract the features, we take the advantage of these learned feature maps without having to start from scratch by training a large model on a large dataset. We can in general extract the features using the following two cases:

# CASE-1


Extracting the features from the image using the pretrained models , performing the pooling operation to the output of the 
last convolution layer . See the model_vgg_16.summary() the (None,None,512) is pooled .

In [9]:
def loadPretrainedWeights():
    pretrained_weights={}

    pretrained_weights['vgg16']=VGG16(weights='imagenet', include_top=False,pooling='avg')
    pretrained_weights['vgg19']=VGG19(weights='imagenet', include_top=False,pooling='avg')

    pretrained_weights['resnet50']=ResNet50(weights='imagenet', include_top=False,pooling='avg')

    pretrained_weights['inceptionv3']=InceptionV3(weights='imagenet', include_top=False,pooling='avg')
    pretrained_weights['inception-resentv2']=InceptionResNetV2(weights='imagenet', include_top=False,pooling='avg')


    pretrained_weights['xception']=Xception(weights='imagenet', include_top=False,pooling='avg')

    pretrained_weights['densenet121']=DenseNet121(weights='imagenet', include_top=False,pooling='avg')
    pretrained_weights['densenet169']=DenseNet169(weights='imagenet', include_top=False,pooling='avg')
    pretrained_weights['densenet201']=DenseNet201(weights='imagenet', include_top=False,pooling='avg')


  #N retrained_weights['nasnetlarge']=NASNetLarge(weights='imagenet', include_top=False,pooling='avg',input_shape = (224, 224, 3))
  #N pretrained_weights['nasnetmobile']=NASNetMobile(weights='imagenet', include_top=False,pooling='avg')



    pretrained_weights['mobilenet']=MobileNet(weights='imagenet', include_top=False,pooling='avg')
  #N  pretrained_weights['mobilenetV2']=MobileNetV2(weights='imagenet', include_top=False,pooling='avg')
    
    return pretrained_weights


In [10]:
pretrained_weights=loadPretrainedWeights()

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [11]:
 pretrained_weights['vgg16'].summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, None, None, 3)]   0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0     

# CASE-2

The last few layers of the VGG16(for example) model are fully connected layers prior to the output layer. These layers will provide a complex set of features to describe a given input image and may provide useful input when training a new model for image classification or related computer vision task.

In [33]:
model_vgg_19_fcn.summary()

Model: "model_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_16 (InputLayer)        [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0   

Loading the filepaths here

In [12]:
def loadFilePaths(image_directory):
    
    files=os.listdir(image_directory)
    files_path=[os.path.join(image_directory,file) for file in files ]
    return files_path


In [13]:
def visualizeDataset2(files_path):
    subplots_adjust(hspace=0.000)
    number_of_subplots=3
    for i,v in enumerate(range(len(files_path))):
        v = v+1
        image = pil_image.open(files_path[i])
        
        ax1 = subplot(len(files_path),3,v)
        
        ax1.axis('off')
        #ax1.figure.set_size_inches(10,15)
        ax1.imshow(image, cmap="gray", aspect="auto")

In [14]:
def visualizeDataset1(file_paths):
    rows=2
    for num, x in tqdm(enumerate(file_paths[0:12])):
            img = pil_image.open(x)
            plt.subplot(rows,6,num+1)
            #plt.title(x.split('.')[0])
            plt.axis('off')
            plt.imshow(img)

extracting the features out of a pretrained model , features from each pretrained model is extracted.

In [15]:
 # the function is used to calculate the features from the image 
def getFeatures(filelist,model): 
    filelist.sort()
    featurelist = []
    for i, imagepath in enumerate(filelist):
    #for i in tqdm(range(len(filelist))):
        print(" Status: %s / %s" %(i, len(filelist)), end="\r")
        img = image.load_img(filelist[i], target_size=(224, 224))
        img_data = image.img_to_array(img)
        img_data = np.expand_dims(img_data, axis=0)
        img_data = preprocess_input(img_data)
        features = np.array(model.predict(img_data))
        featurelist.append(features.flatten())
    return featurelist



In [36]:
def saveFeatures(features,modelname,filename):
    saved_filename=filename+'_'+modelname
    saved_filename=os.path.join('Image_features',saved_filename)

    print("saving",saved_filename+'.npy')
    np.save(saved_filename+'.npy',features)

In [37]:
ideology_files_path=loadFilePaths('ideology_image_dataset/')
#muslim_files_path=loadFilePaths('muslim_image_dataset/')


In [41]:
for model in pretrained_weights.keys():
   # if(model!=vgg16 or model!=vgg19 or model!=resnet50 or model!=inceptionv3):
    if(model not  in ['vgg16','vgg19','resnet50','inceptionv3']):    
        print("extracting features for -->",model)
        ideology_features=getFeatures(ideology_files_path,pretrained_weights[model])
        saveFeatures(ideology_features,f'model_{model}','ideology')
        


extracting features for --> inception-resentv2
saving Image_features/ideology_model_inception-resentv2.npy
extracting features for --> xception
saving Image_features/ideology_model_xception.npy
extracting features for --> densenet121
saving Image_features/ideology_model_densenet121.npy
extracting features for --> densenet169
saving Image_features/ideology_model_densenet169.npy
extracting features for --> densenet201
saving Image_features/ideology_model_densenet201.npy
extracting features for --> mobilenet
saving Image_features/ideology_model_mobilenet.npy


In [46]:
for file in os.listdir('Image_features/'):
    if(file.endswith('.npy')):
        print(f'Image_features/{file}')
        features=np.load(f'Image_features/{file}')
        print(features.shape,file)

Image_features/ideology_model_vgg_16.npy
(2942, 512) ideology_model_vgg_16.npy
Image_features/ideology_model_vgg_19.npy
(2942, 512) ideology_model_vgg_19.npy
Image_features/ideology_model_resnet50.npy
(2942, 2048) ideology_model_resnet50.npy
Image_features/ideology_model_inceptionv3.npy
(2942, 2048) ideology_model_inceptionv3.npy
Image_features/ideology_model_inception-resentv2.npy
(2942, 1536) ideology_model_inception-resentv2.npy
Image_features/ideology_model_xception.npy
(2942, 2048) ideology_model_xception.npy
Image_features/ideology_model_densenet121.npy
(2942, 1024) ideology_model_densenet121.npy
Image_features/ideology_model_densenet169.npy
(2942, 1664) ideology_model_densenet169.npy
Image_features/ideology_model_densenet201.npy
(2942, 1920) ideology_model_densenet201.npy
Image_features/ideology_model_mobilenet.npy
(2942, 1024) ideology_model_mobilenet.npy
