Создаем датасет из наших фото

In [None]:
import json
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf

In [None]:
#функция для чтения json
def load_rows(filepath, nrows = None):
    with open(filepath, encoding='utf-8') as json_file:
        count = 0
        objs = []
        line = json_file.readline()
        while (nrows is None or count < nrows) and line:
            count += 1
            obj = json.loads(line)
            objs.append(obj)
            line = json_file.readline()
        return pd.DataFrame(objs)

In [None]:
#download photos.json as dataframe
photos = load_rows('../input/yelp-photos/photos.json')
photos.describe()

In [None]:
#Add photo path
photos['photo_path'] = photos['photo_id'] + '.jpg'
photos.head()

In [None]:
#Shuffling and getting rid of menu label
photos = pd.concat([photos[photos['label']=='food'][:12000],photos[photos['label']=='inside'][:12000],
                        photos[photos['label']=='outside'][:12000], photos[photos['label']=='drink'][:12000]])
photos = photos.sample(frac=1).reset_index(drop=True)


In [None]:
photos['label'].value_counts()

## Creating dataset with business

In [None]:
#download json as dataframe
business = load_rows('../input/yelp-photos/yelp_academic_dataset_business.json')
business.describe()

In [None]:
#Убираем лишний столбцы, соединяем в один dataframe
df = pd.merge(photos, business, how ='inner', on ='business_id')
df = df.drop (columns = ['address', 'city', 'state', 'postal_code', 'latitude', 'longitude', 'stars', 'review_count', 'is_open', 'hours'])
df

## Generate from dataframe

In [None]:
datagen=ImageDataGenerator(rescale=1./255.)
test_datagen=ImageDataGenerator(rescale=1./255.)
batch_size = 50
nTrain=48000 
photos_dir = '../input/photos/photos'


train_generator=datagen.flow_from_dataframe(df[:int(nTrain*0.7)],
                                            photos_dir, x_col = 'photo_path', y_col = 'label',
                                            target_size=(224, 224),
                                            batch_size=batch_size,
                                            class_mode='categorical', subset = 'training',
                                            shuffle=True)
valid_generator=test_datagen.flow_from_dataframe(df[int(nTrain*0.7):int(nTrain*0.9)],
                                                 photos_dir, x_col = 'photo_path', y_col = 'label',
                                                 target_size=(224, 224),
                                                 batch_size=batch_size,
                                                 class_mode='categorical',
                                                 shuffle=True)
test_generator=test_datagen.flow_from_dataframe(df[int(nTrain*0.9):],
                                                photos_dir, x_col = 'photo_path', y_col = 'label',
                                                target_size=(224, 224),
                                                batch_size=batch_size,
                                                class_mode='categorical',
                                                shuffle=True)

In [None]:
test_datagen=ImageDataGenerator(rescale=1./255.)
batch_size = 50
nTrain=46000 
photos_dir = '../input/photos/photos'
test_generator=test_datagen.flow_from_dataframe(photos[int(nTrain*0.9):],
                                                photos_dir, x_col = 'photo_path', y_col = 'label',
                                                target_size=(224, 224),
                                                batch_size=batch_size,
                                                class_mode='categorical',
                                                shuffle=True)

## Flow from directory generator


In [None]:
# generate batches of train images and labels
nTrain=48000
photos_dir = '../input/photos/photos'
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# load the normalized images
datagen = ImageDataGenerator(rescale=1./255, validation_split = 0.2)
# define the batch size
batch_size = 50

# the defined shape is equal to the network output tensor shape
train_features = np.zeros(shape=(nTrain, 7, 7, 512))
train_labels = np.zeros(shape=(nTrain,4))
# generate batches of train images and labels
train_generator = datagen.flow_from_dataframe(photos[:nTrain],
    photos_dir, x_col = 'photo_path', y_col = 'label',
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='categorical', subset = 'training',
    shuffle=True)

In [None]:
# generate batches of validation images and labels
validation_generator = datagen.flow_from_dataframe(photos[:nTrain],
    photos_dir, x_col = 'photo_path', y_col = 'label',
    target_size=(224, 224),
    batch_size=batch_size,color_mode="rgb",
    class_mode='categorical', subset = 'validation',
    shuffle=True)

In [None]:
# import the necessary packages
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import ResNet50
from sklearn.metrics import classification_report
#from imutils import paths
import matplotlib.pyplot as plt
import numpy as np

## Feature extarction

In [None]:
# Get feature vector of an image by given model and img_path
def getFeatureVector(model, img_path):
  img = cv2.imread(img_path)
  img = cv2.resize(img, (224, 224))
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  feature_vector = model.predict(img.reshape(1, 224, 224, 3))
  return feature_vector

In [None]:
# Function for get dataframe which contains the output features of given model
def getFeatureDataFrame(model):
  df = pd.DataFrame(columns=['file', 'features'])
  train_files = train_generator.filepaths
  valid_files = validation_generator.filepaths
  files = train_files + valid_files

  df['file'] = files
  df['features'] = df.apply(lambda row: getFeatureVector(model, row['file']), axis=1) 

  print("All files added.")
  return df

In [None]:
# Get VGG-16 Model
def getVGG16Model(lastFourTrainable=False):
  vgg_model = VGG16(weights='imagenet', input_shape=input_shape, include_top=True)

  # Make all layers untrainable
  for layer in vgg_model.layers[:]:
      layer.trainable = False

  # Add fully connected layer which have 1024 neuron to VGG-16 model
  output = vgg_model.get_layer('fc2').output
  output = Flatten(name='new_flatten')(output)
  output = Dense(units=1024, activation='relu', name='new_fc')(output)
  output = Dense(units=4, activation='softmax')(output)
  vgg_model = Model(vgg_model.input, output)

  # Make last 4 layers trainable if lastFourTrainable == True
  if lastFourTrainable == True:
    vgg_model.get_layer('block5_conv3').trainable = True
    vgg_model.get_layer('fc1').trainable = True
    vgg_model.get_layer('fc2').trainable = True
    vgg_model.get_layer('new_fc').trainable = True

  # Compile VGG-16 model
  vgg_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
  vgg_model.summary()

  return vgg_model

In [None]:
# Import necessary libraries
import numpy as np
from keras.utils import np_utils
from keras.models import Model, Sequential, load_model
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Reshape, Dropout
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.vgg16 import VGG16
from keras.applications.resnet50 import ResNet50
from sklearn.metrics import confusion_matrix, classification_report
import itertools
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# Constants
img_rows = 224
img_cols = 224
input_shape = (img_rows,img_cols,3)
epochs = 10
batch_size = 50
num_of_classes = 4

In [None]:
# Get feature extractor model from last layer of vgg_model_a
vgg_model_a = getVGG16Model(lastFourTrainable=False)
#vgg_model_a.load_weights('/content/drive/MyDrive/Colab Notebooks/cinic-10/model_vgg_nontrainable.h5')
feature_model_vgg_a = Model(inputs=vgg_model_a.input, outputs=vgg_model_a.get_layer('new_fc').output)

df = getFeatureDataFrame(feature_model_vgg_a)

In [None]:
df.to_pickle("features_vgg_a.pickle")

In [None]:
for row in df.itertuples():
  df['photo_id'][row.Index] = str(df['photo_id'][row.Index]).replace("/content/drive/MyDrive/для диплома/photos/","")
  df['photo_id'][row.Index] = str(df['photo_id'][row.Index]).replace(".jpg","")
df

In [None]:
df = df.merge(photos, on= 'photo_id')
df

In [None]:
X = df['features']
y = df['label']

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
tf.convert_to_tensor(X_train)

## Second try on feature extraction

In [None]:
from tensorflow.keras.applications import vgg16

vgg_conv = vgg16.VGG16(weights='imagenet',
                  include_top=False,
                  input_shape=(224, 224, 3))

In [None]:
nTrain = 24000
batch_size = 50
# the defined shape is equal to the network output tensor shape
train_features = np.zeros(shape=(nTrain, 7, 7, 512))
train_labels = np.zeros(shape=(nTrain,4))

In [None]:
# iterate through the batches of train images and labels
for i, (inputs_batch, labels_batch) in enumerate(train_generator):
    if i % 50 == 0:
        print(i)
    if i * batch_size >= nTrain:
        break   
    # pass the images through the network
    features_batch = vgg_conv.predict(inputs_batch)
    train_features[i * batch_size : (i + 1) * batch_size] = features_batch
    train_labels[i * batch_size : (i + 1) * batch_size] = labels_batch
# reshape train_features into vector       
train_features_vec = np.reshape(train_features, (nTrain, 7 * 7 * 512))
print("Train features: {}".format(train_features_vec.shape))

In [None]:
for i, (inputs_batch, labels_batch) in enumerate(train_generator):
    if i % 50 == 0:
        print(i)
    if i * batch_size >= nTrain:
        break   
    train_labels[i * batch_size : (i + 1) * batch_size] = labels_batch

In [None]:
np.save('train_features_vec', train_features_vec)

In [None]:
nVal = 6000
validation_features = np.zeros(shape=(nVal, 7, 7, 512))
validation_labels = np.zeros(shape=(nVal,4))

In [None]:
# iterate through the batches of validation images and labels
for i, (inputs_batch, labels_batch) in enumerate(validation_generator):
    if i % 50 == 0:
        print(i)
    if i * batch_size >= nVal:
        break
    features_batch = vgg_conv.predict(inputs_batch)
    validation_features[i * batch_size : (i + 1) * batch_size] = features_batch
    validation_labels[i * batch_size : (i + 1) * batch_size] = labels_batch

# reshape validation_features into vector 
validation_features_vec = np.reshape(validation_features, (nVal, 7 * 7 * 512))
print("Validation features: {}".format(validation_features_vec.shape))

In [None]:
for i, (inputs_batch, labels_batch) in enumerate(validation_generator):
    if i % 50 == 0:
        print(i)
    if i * batch_size >= nVal:
        break
    validation_labels[i * batch_size : (i + 1) * batch_size] = labels_batch

In [None]:
np.save('validation_features_vec', validation_features_vec)

In [None]:
train_features_vec = np.load('../input/features/train_features_vec.npy')
validation_features_vec = np.load('../input/features/validation_features_vec.npy')

In [None]:
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras import Sequential, optimizers


model = Sequential()
model.add(Dense(512, activation='relu', input_dim=7 * 7 * 512))
model.add(Dense(128, activation='relu', input_dim=512))
model.add(Dropout(0.4))
model.add(Dense(4, activation='softmax'))

In [None]:
import tensorflow as tf
# configure the model for training
model.compile(optimizer=optimizers.Adam(),
              loss='categorical_crossentropy',
              metrics=[tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

# use the train and validation feature vectors
history = model.fit(train_features_vec,
                    train_labels,
                    epochs=20,
                    batch_size=batch_size,
                    validation_data=(validation_features_vec,
                                     validation_labels))

In [None]:
# use the train and validation feature vectors
history = model.fit(train_features_vec,
                    train_labels,
                    epochs=20,
                    batch_size=batch_size,
                    validation_data=(validation_features_vec,
                                     validation_labels))

In [None]:
# get the list of all validation file names
fnames = validation_generator.filenames

# get the list of the corresponding classes
ground_truth = validation_generator.classes[:300]

# get the dictionary of classes
label2index = validation_generator.class_indices

# obtain the list of classes
idx2label = list(label2index.keys())
print("The list of classes: ", idx2label)

In [None]:
predictions = model.predict_classes(validation_features_vec)
prob = model.predict(validation_features_vec)

In [None]:
errors = np.where(predictions != ground_truth)[0]
print("Number of errors = {}/{}".format(len(errors),nVal))

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img
import matplotlib.pyplot as plt
for i in range(len(errors)):
    pred_class = np.argmax(prob[errors[i]])
    pred_label = idx2label[pred_class]
    
    print('Original label:{}, Prediction :{}, confidence : {:.3f}'.format(
        fnames[errors[i]].split('/')[0],
        pred_label,
        prob[errors[i]][pred_class]))
    
    original = load_img('{}/{}'.format(photos_dir,fnames[errors[i]]))
    plt.axis('off')
    plt.imshow(original)
    plt.show()

##Inception fine-tuning

In [None]:
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D

# create the base pre-trained model
base_model = InceptionV3(weights='imagenet', include_top=False)

# add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(1024, activation='relu')(x)
# and a logistic layer --  we have 4 classes
predictions = Dense(4, activation='softmax')(x)

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

In [None]:
import tensorflow as tf
# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False

# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy',
              metrics=[tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

In [None]:
totalTrain=33600
totalVal=9600
NUM_EPOCHS=10
# train the model on the new data for a few epochs
model.fit(train_generator,
    steps_per_epoch=totalTrain // batch_size,
    validation_data=valid_generator,
    validation_steps=totalVal // batch_size,
    epochs=NUM_EPOCHS)

# at this point, the top layers are well trained and we can start fine-tuning
# convolutional layers from inception V3. We will freeze the bottom N layers
# and train the remaining top layers.

In [None]:
# let's visualize layer names and layer indices to see how many layers
# we should freeze:
for i, layer in enumerate(base_model.layers):
    print(i, layer.name)

# we chose to train the top 2 inception blocks, i.e. we will freeze
# the first 249 layers and unfreeze the rest:
for layer in model.layers[:249]:
    layer.trainable = False
for layer in model.layers[249:]:
    layer.trainable = True

In [None]:
totalTrain = 33600
totalVal = 9600
# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
from tensorflow.keras.optimizers import SGD
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy',
    metrics=[tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

# we train our model again (this time fine-tuning the top 2 inception blocks
# alongside the top Dense layers
model.fit(train_generator,
    steps_per_epoch=totalTrain // batch_size,
    validation_data=valid_generator,
    validation_steps=totalVal // batch_size,
    epochs=10)

##Resnet fine tuning

In [None]:
totalTrain=4000
totalVal = 1000
# initialize the initial learning rate, batch size, and number of
# epochs to train for
INIT_LR = 1e-4
NUM_EPOCHS = 20
# define the path to the serialized output model after training
MODEL_PATH = "label_detector.model"

Let's try k-fold cross validation

In [None]:
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import KFold, StratifiedKFold
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
Y = photos['label']

kf = KFold(n_splits = 5)                         

In [None]:
idg = ImageDataGenerator(width_shift_range=0.1,
                         height_shift_range=0.1,
                         zoom_range=0.3,
                         fill_mode='nearest',
                         horizontal_flip = True,
                         rescale=1./255)

In [None]:
def get_model_name(k):
    return 'model_'+str(k)+'.h5'

In [None]:
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
# create the base pre-trained model
base_model = InceptionV3(weights='imagenet', include_top=False)

# add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(1024, activation='relu')(x)
# and a logistic layer --  we have 4 classes
predictions = Dense(4, activation='softmax')(x)

In [None]:
VALIDATION_ACCURACY = []
VALIDAITON_LOSS = []
batch_size = 50
num_epochs=10
photos_dir = '../input/photos/photos'
nTrain = 48000
save_dir = '/saved_models/'
fold_var = 1

for train_index, val_index in kf.split(np.zeros(len(photos)),Y):
    training_data = photos.iloc[train_index]
    validation_data = photos.iloc[val_index]
    
    train_data_generator = idg.flow_from_dataframe(photos[:40000], photos_dir, x_col = 'photo_path', y_col = 'label',
                                                  target_size=(224, 224),
                                                  batch_size=batch_size,
                                                  class_mode='categorical', shuffle=True)

    valid_data_generator  = idg.flow_from_dataframe(photos[40000:46000],
                                                  photos_dir, x_col = 'photo_path', y_col = 'label',
                                                  target_size=(224, 224),
                                                  batch_size=batch_size,color_mode="rgb",
                                                  class_mode='categorical', shuffle=True)
    
    # CREATE NEW MODEL
    # this is the model we will train
    model = Model(inputs=base_model.input, outputs=predictions)
    # COMPILE NEW MODEL
    for layer in base_model.layers:
      layer.trainable = False

    # compile the model (should be done *after* setting layers to non-trainable)
    model.compile(optimizer='rmsprop', loss='categorical_crossentropy',
                  metrics=["accuracy"])
    
    # CREATE CALLBACKS
    checkpoint = tf.keras.callbacks.ModelCheckpoint(save_dir+get_model_name(fold_var), 
                                                  monitor='val_accuracy', verbose=1, 
                                                  save_best_only=True, mode='max')
    callbacks_list = [checkpoint]
    # There can be other callbacks, but just showing one because it involves the model name
    # This saves the best model
    # FIT THE MODEL
    history = model.fit(train_data_generator,
                      epochs=num_epochs,
                      callbacks=callbacks_list,
                      validation_data=valid_data_generator)
    #PLOT HISTORY
    #		:
    #		:
    
    # LOAD BEST MODEL to evaluate the performance of the model
    model.load_weights("/saved_models/model_"+str(fold_var)+".h5")
    
    results = model.evaluate(valid_data_generator)
    results = dict(zip(model.metrics_names,results))
    
    VALIDATION_ACCURACY.append(results['accuracy'])
    VALIDAITON_LOSS.append(results['loss'])
    
    tf.keras.backend.clear_session()
    
    fold_var += 1
    

In [None]:
# initialize the training training data augmentation object
trainAug = ImageDataGenerator(
	rotation_range=25,
	zoom_range=0.1,
	width_shift_range=0.1,
	height_shift_range=0.1,
	shear_range=0.2,
	horizontal_flip=True,
	fill_mode="nearest")
# initialize the validation/testing data augmentation object (which
# we'll be adding mean subtraction to)
valAug = ImageDataGenerator()
# define the ImageNet mean subtraction (in RGB order) and set the
# the mean subtraction value for each of the data augmentation
# objects
mean = np.array([123.68, 116.779, 103.939], dtype="float32")
trainAug.mean = mean
valAug.mean = mean

In [None]:
# load the ResNet-50 network, ensuring the head FC layer sets are left
# off
print("[INFO] preparing model...")
baseModel = ResNet50(weights="imagenet", include_top=False,
    input_tensor=Input(shape=(224, 224, 3)))
# construct the head of the model that will be placed on top of the
# the base model
headModel = baseModel.output
headModel = AveragePooling2D(pool_size=(7, 7))(headModel)
headModel = Flatten(name="flatten")(headModel)
headModel = Dense(256, activation="relu")(headModel)
headModel = Dropout(0.5)(headModel)
headModel = Dense(4, activation="softmax")(headModel)
# place the head FC model on top of the base model (this will become
# the actual model we will train)
model = Model(inputs=baseModel.input, outputs=headModel)
# loop over all layers in the base model and freeze them so they will
# *not* be updated during the training process
for layer in baseModel.layers:
    layer.trainable = False

In [None]:
# compile the model
opt = Adam(lr=INIT_LR, decay=INIT_LR / NUM_EPOCHS)
model.compile(loss="categorical_crossentropy", optimizer=opt,
    metrics=["accuracy"])
# train the model
print("[INFO] training model...")
totalTrain=38400
totalVal = 9600
H = model.fit_generator(
    train_generator,
    steps_per_epoch=totalTrain // batch_size,
    validation_data=validation_generator,
    validation_steps=totalVal // batch_size,
    epochs=NUM_EPOCHS)

In [None]:
H = model.fit_generator(
    train_generator,
    steps_per_epoch=totalTrain // batch_size,
    validation_data=validation_generator,
    validation_steps=totalVal // batch_size,
    epochs=NUM_EPOCHS)

In [None]:
testGen = datagen.flow_from_dataframe(small_photos[6000:],
    photos_dir, x_col = 'photo_path', y_col = 'label',
    target_size=(224, 224),
    batch_size=batch_size,color_mode="rgb",
    class_mode='categorical',
    shuffle=True)

In [None]:
model.save('resnet_labels.h5')

In [None]:
from keras.models import load_model
model = load_model('../input/trained-model-resnet/resnet_labels.h5')

In [None]:
INIT_LR = 1e-4
NUM_EPOCHS = 1
opt = Adam(lr=INIT_LR, decay=INIT_LR / NUM_EPOCHS)
model.compile(loss="categorical_crossentropy", optimizer=opt,
    metrics=[tf.keras.metrics.Precision(),tf.keras.metrics.Recall()])

In [None]:
totalTrain = 33600
totalVal = 9600
model.fit(train_generator,
    steps_per_epoch=totalTrain // batch_size,
    validation_data=valid_generator,
    validation_steps=totalVal // batch_size,
    epochs=20)

In [None]:
from sklearn.metrics import classification_report
totalTest=6600
# reset the testing generator and then use our trained model to
# make predictions on the data
print("[INFO] evaluating network...")
test_generator.reset()
predIdxs = model.predict_generator(test_generator,
	steps=(totalTest // batch_size) + 1)
# for each image in the testing set we need to find the index of the
# label with corresponding largest predicted probability
predIdxs = np.argmax(predIdxs, axis=1)
# show a nicely formatted classification report
print(classification_report(test_generator.classes, predIdxs,
	target_names=test_generator.class_indices.keys()))
# serialize the model to disk
#print("[INFO] saving model...")
#MODEL_PATH = "inceptionv3.model"
#model.save(MODEL_PATH, save_format="h5")

In [None]:
# plot the training loss and accuracy
N = NUM_EPOCHS
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, N), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, N), H.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, N), H.history["accuracy"], label="train_acc")
plt.plot(np.arange(0, N), H.history["val_accuracy"], label="val_acc")
plt.title("Training Loss and Accuracy on Dataset")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower left")
plt.show()

##Check predictions on my gallery

In [None]:
feature_test_path = '/content/drive/MyDrive/для диплома/testgallery'
feature_test_generator = ImageDataGenerator(rescale=1./255).flow_from_directory(directory=feature_test_path,
                                                                  class_mode=None,
                                                                  batch_size=batch_size,
                                                                  target_size=(224, 224),
                                                                  color_mode="rgb",
                                                                  shuffle=False)

In [None]:
import glob
import cv2
from keras.preprocessing import image
import matplotlib.pyplot as plt
from keras.applications.resnet50 import preprocess_input
for filename in glob.iglob('../input/testgallery//*.jpg'):
    img = image.load_img(filename, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x= preprocess_input(x)
    #feature_vector = getFeatureVector(vgg_conv, filename)
    preds = model.predict(x)
    pred_class = np.argmax(preds)
    pred_label = idx2label[pred_class]
    print('predicted label is ', pred_label)
    plt.imshow(img)
    plt.show()

In [None]:
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input, decode_predictions
img = image.load_img('D:/Downloads/Downloads/diploma/testgallery//IMG_20191027_163506.jpg', target_size=(299, 299))

x = image.img_to_array(img)

x = np.expand_dims(x, axis=0)

x = preprocess_input(x)

preds = model.predict(x)# расшифровать результаты 
#в список кортежей (класс, описание, вероятность)# (по одному такому списку для каждой выборки в партии).

In [None]:
# get the dictionary of classes
label2index = validation_generator.class_indices

# obtain the list of classes
idx2label = list(label2index.keys())
print("The list of classes: ", idx2label)

In [None]:
pred_class = np.argmax(preds)
pred_label = idx2label[pred_class]
print(pred_class, pred_label)
plt.imshow(img)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img
import matplotlib.pyplot as plt
for i in range(len(36)):
    pred_class = np.argmax(prob[i])
    pred_label = idx2label[pred_class]
    
    print('Original label:{}, Prediction :{}, confidence : {:.3f}'.format(
        photos['label'][photos['photo_path']==fnames[errors[i]].split('/')[0]],
        pred_label,
        prob[errors[i]][pred_class]))
    
    original = load_img('{}/{}'.format(train_dir,fnames[errors[i]]))
    plt.axis('off')
    plt.imshow(original)
    plt.show()

## Ambience classification

### Dataset only inside

In [None]:
ambiences = ['touristy', 'hipster', 'romantic', 'intimate', 'trendy', 'upscale', 'classy', 'casual']

In [None]:
import ast
for colname in ambiences:
    df[colname]=0
    print('working on '+colname)
    for row in df.itertuples():
      if row.attributes != None:
          d = dict(row.attributes)
          try:
            #print(ast.literal_eval(d['Ambience']))
            if ast.literal_eval(d['Ambience'])!= None:
                try:
                  if ast.literal_eval(d['Ambience'])[colname] == None:
                    df[colname][row.Index] = 0
                  else:  
                    df[colname][row.Index] = int(ast.literal_eval(d['Ambience'])[colname])
                except KeyError:
                    continue
          except KeyError:
                continue 

In [None]:
df = df[df['label'] == 'inside']
df

In [None]:
datagen=ImageDataGenerator(rescale=1./255.)
test_datagen=ImageDataGenerator(rescale=1./255.)

columns=ambiences
nTrain=48000 
photos_dir = '../input/photos/photos'


train_generator=datagen.flow_from_dataframe(dataframe=df[:int(nTrain*0.7)],
                                            directory=photos_dir,
                                            x_col='photo_path',
                                            y_col=columns,
                                            batch_size=50,
                                            seed=42,
                                            shuffle=True,
                                            class_mode="raw",
                                            target_size=(224,224))
valid_generator=test_datagen.flow_from_dataframe(dataframe=df[int(nTrain*0.7):int(nTrain*0.9)],
                                                 directory=photos_dir,
                                                 x_col='photo_path',y_col=columns,
                                                 batch_size=50,seed=42,shuffle=True,
                                                 class_mode="raw",
                                                 target_size=(224,224))
'''test_generator=test_datagen.flow_from_dataframe(dataframe=df[int(nTrain*0.9):],
                                                directory=photos_dir,
                                                x_col='photo_path',
                                                batch_size=1,
                                                seed=42,
                                                shuffle=False,
                                                class_mode=None,
                                                target_size=(256,256))'''

## Inception for ambience

In [None]:
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D

# create the base pre-trained model
base_model = InceptionV3(weights='imagenet', include_top=False)

# add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(1024, activation='relu')(x)
# and a logistic layer --  we have 4 classes
predictions = Dense(8, activation='sigmoid')(x)

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

In [None]:
totalTrain = 33600
totalVal = 9600
batch_size = 50
STEP_SIZE_TRAIN = totalTrain // batch_size
STEP_SIZE_VALID = totalVal // batch_size

In [None]:
# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False

# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer='rmsprop', loss='binary_crossentropy',
    metrics=[tf.keras.metrics.Precision(),'accuracy'])

In [None]:
model.fit(train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=valid_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=10
)

In [None]:
# we chose to train the top 2 inception blocks, i.e. we will freeze
# the first 249 layers and unfreeze the rest:
for layer in model.layers[:249]:
    layer.trainable = False
for layer in model.layers[249:]:
    layer.trainable = True

In [None]:
# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
from tensorflow.keras.optimizers import SGD
import tensorflow as tf
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='binary_crossentropy',
    metrics=[tf.keras.metrics.Precision(),tf.keras.metrics.Recall()])

# we train our model again (this time fine-tuning the top 2 inception blocks
# alongside the top Dense layers
model.fit(train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=valid_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=7
)

## resnet for ambience

In [None]:

baseModel = ResNet50(weights="imagenet", include_top=False,
                     input_tensor=Input(shape=(224, 224, 3)))
# construct the head of the model that will be placed on top of the
# the base model
headModel = baseModel.output
headModel = AveragePooling2D(pool_size=(7, 7))(headModel)
headModel = Flatten(name="flatten")(headModel)
headModel = Dense(512, activation="relu")(headModel)
headModel = Dropout(0.5)(headModel)
headModel = Dense(8, activation="sigmoid")(headModel)
# place the head FC model on top of the base model (this will become
# the actual model we will train)
model = Model(inputs=baseModel.input, outputs=headModel)
# loop over all layers in the base model and freeze them so they will
# *not* be updated during the training process
for layer in baseModel.layers:
    layer.trainable = False
model.compile(optimizers.RMSprop(lr=0.0001, decay=1e-6),loss="binary_crossentropy",metrics=[tf.keras.metrics.Precision(),tf.keras.metrics.Recall()])

In [None]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
model.fit_generator(generator=train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=valid_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=7
)

## Feature extraction for ambience

In [None]:
from tensorflow.keras.applications import vgg16

vgg_conv = vgg16.VGG16(weights='imagenet',
                  include_top=False,
                  input_shape=(224, 224, 3))

In [None]:
nTrain = 14000
batch_size = 50
# the defined shape is equal to the network output tensor shape
train_features = np.zeros(shape=(nTrain, 7, 7, 512))
train_labels = np.zeros(shape=(nTrain,8))

In [None]:
# iterate through the batches of train images and labels
for i, (inputs_batch, labels_batch) in enumerate(train_generator):
    if i % 50 == 0:
        print(i)
    if i * batch_size >= nTrain:
        break   
    # pass the images through the network
    features_batch = vgg_conv.predict(inputs_batch)
    train_features[i * batch_size : (i + 1) * batch_size] = features_batch
    train_labels[i * batch_size : (i + 1) * batch_size] = labels_batch
# reshape train_features into vector       
train_features_vec = np.reshape(train_features, (nTrain, 7 * 7 * 512))
print("Train features: {}".format(train_features_vec.shape))

In [None]:
np.save('train_features', train_features)
np.save('train_labels', train_labels)

In [None]:
nVal = 4000
validation_features = np.zeros(shape=(nVal, 7, 7, 512))
validation_labels = np.zeros(shape=(nVal,8))
# iterate through the batches of validation images and labels
for i, (inputs_batch, labels_batch) in enumerate(valid_generator):
    if i % 50 == 0:
        print(i)
    if i * batch_size >= nVal:
        break
    features_batch = vgg_conv.predict(inputs_batch)
    validation_features[i * batch_size : (i + 1) * batch_size] = features_batch
    validation_labels[i * batch_size : (i + 1) * batch_size] = labels_batch

# reshape validation_features into vector 
validation_features_vec = np.reshape(validation_features, (nVal, 7 * 7 * 512))
print("Validation features: {}".format(validation_features_vec.shape))

In [None]:
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras import Sequential, optimizers


model = Sequential()
model.add(Dense(512, activation='relu', input_dim=7 * 7 * 512))
model.add(Dropout(0.5))
model.add(Dense(8, activation='sigmoid'))

In [None]:
import tensorflow as tf
# configure the model for training
model.compile(optimizer=optimizers.RMSprop(lr=2e-4),
              loss='binary_crossentropy',
              metrics=[tf.keras.metrics.Precision(),tf.keras.metrics.Recall()])

# use the train and validation feature vectors
history = model.fit(train_features_vec,
                    train_labels,
                    epochs=10,
                    batch_size=batch_size,
                    validation_data=(validation_features_vec,
                                     validation_labels))

In [None]:
# use the train and validation feature vectors
history = model.fit(train_features_vec,
                    train_labels,
                    epochs=20,
                    batch_size=batch_size,
                    validation_data=(validation_features_vec,
                                     validation_labels))