In [1]:
library(tidyverse) 
library(keras)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.0 ──

[32m✔[39m [34mggplot2[39m 3.3.2.[31m9000[39m     [32m✔[39m [34mpurrr  [39m 0.3.4     
[32m✔[39m [34mtibble [39m 3.0.3          [32m✔[39m [34mdplyr  [39m 1.0.1     
[32m✔[39m [34mtidyr  [39m 1.1.1          [32m✔[39m [34mstringr[39m 1.4.0     
[32m✔[39m [34mreadr  [39m 1.3.1          [32m✔[39m [34mforcats[39m 0.5.0     

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



In [2]:
conv_base <- application_vgg16(weights = "imagenet",include_top = FALSE, input_shape = c(150,150,3))

In [3]:
conv_base

Model
Model: "vgg16"
________________________________________________________________________________
Layer (type)                        Output Shape                    Param #     
input_1 (InputLayer)                [(None, 150, 150, 3)]           0           
________________________________________________________________________________
block1_conv1 (Conv2D)               (None, 150, 150, 64)            1792        
________________________________________________________________________________
block1_conv2 (Conv2D)               (None, 150, 150, 64)            36928       
________________________________________________________________________________
block1_pool (MaxPooling2D)          (None, 75, 75, 64)              0           
________________________________________________________________________________
block2_conv1 (Conv2D)               (None, 75, 75, 128)             73856       
________________________________________________________________________________
block2_

In [4]:
batch_size <- 64

## Feature extraction function with VGG16

In [5]:
datagen <- image_data_generator(rescale = 1/255)

extract_features <- function(dataframe, sample_count){
    features <- array(0, dim = c(sample_count, 4,4,512))
    labels <- matrix(0,nrow = sample_count,nrow(unique(dataframe[,3])))
    
    generator <- flow_images_from_dataframe(
    dataframe = dataframe, 
    x_col = "file_path", 
    y_col= "landmark_id",
    generator = datagen,
    target_size = c(150,150),
    batch_size = batch_size,
    class_mode="categorical"
    )
    
    i <- 0
    while (TRUE) {
        batch <- generator_next(generator)
        inputs_batch <- batch[[1]]
        labels_batch <- batch[[2]]
        
        features_batch <- conv_base %>% predict(inputs_batch)
        
        index_range <- ((i * batch_size)+1) : ((i+1)*batch_size)
        
        features[index_range,,,] <- features_batch
        labels[index_range,] <- labels_batch
        
        i <- i+1
        print(i)
        
        if (i*batch_size >= sample_count)
            break
    }
    
    list(
    features = features,
    labels = labels)
}

In [6]:
train_dataframe <- read_csv(file = "/kaggle/input/1-read-landmark-recognition-train-test-validation/train_dataframe.csv")
train_dataframe$landmark_id <- as.character(train_dataframe$landmark_id)
train_dataframe$count_class <- as.character(train_dataframe$count_class)
head(train_dataframe)
dim(train_dataframe)

Parsed with column specification:
cols(
  id = [31mcol_character()[39m,
  file_path = [31mcol_character()[39m,
  landmark_id = [32mcol_double()[39m,
  count_class = [32mcol_double()[39m
)



id,file_path,landmark_id,count_class
<chr>,<chr>,<chr>,<chr>
66a05adde31f5a66,../input//landmark-recognition-2020/train//6/6/a/66a05adde31f5a66.jpg,138982,6272
a5ed190d9b9e9387,../input//landmark-recognition-2020/train//a/5/e/a5ed190d9b9e9387.jpg,126637,2231
4312fc17806e1e6a,../input//landmark-recognition-2020/train//4/3/1/4312fc17806e1e6a.jpg,20409,1758
fa2615c7cedc5ff2,../input//landmark-recognition-2020/train//f/a/2/fa2615c7cedc5ff2.jpg,20409,1758
1d823dcb048afe5b,../input//landmark-recognition-2020/train//1/d/8/1d823dcb048afe5b.jpg,20409,1758
a5080e4d5086c21a,../input//landmark-recognition-2020/train//a/5/0/a5080e4d5086c21a.jpg,83144,1741


In [7]:
train <-extract_features(dataframe = train_dataframe,sample_count = nrow(train_dataframe))

[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
[1] 11
[1] 12
[1] 13
[1] 14
[1] 15
[1] 16
[1] 17
[1] 18
[1] 19
[1] 20
[1] 21
[1] 22
[1] 23
[1] 24
[1] 25
[1] 26
[1] 27
[1] 28
[1] 29
[1] 30
[1] 31
[1] 32
[1] 33
[1] 34
[1] 35
[1] 36
[1] 37
[1] 38
[1] 39
[1] 40
[1] 41
[1] 42
[1] 43
[1] 44
[1] 45
[1] 46
[1] 47
[1] 48
[1] 49
[1] 50
[1] 51
[1] 52
[1] 53
[1] 54
[1] 55
[1] 56
[1] 57
[1] 58
[1] 59
[1] 60
[1] 61
[1] 62
[1] 63
[1] 64
[1] 65
[1] 66
[1] 67
[1] 68
[1] 69
[1] 70
[1] 71
[1] 72
[1] 73
[1] 74
[1] 75
[1] 76
[1] 77
[1] 78
[1] 79
[1] 80
[1] 81
[1] 82
[1] 83
[1] 84
[1] 85
[1] 86
[1] 87
[1] 88
[1] 89
[1] 90
[1] 91
[1] 92
[1] 93
[1] 94
[1] 95
[1] 96
[1] 97
[1] 98
[1] 99
[1] 100
[1] 101
[1] 102
[1] 103
[1] 104
[1] 105
[1] 106
[1] 107
[1] 108
[1] 109
[1] 110
[1] 111
[1] 112
[1] 113
[1] 114
[1] 115
[1] 116
[1] 117
[1] 118
[1] 119
[1] 120
[1] 121
[1] 122
[1] 123
[1] 124
[1] 125
[1] 126
[1] 127
[1] 128
[1] 129
[1] 130
[1] 131
[1] 132
[1] 133
[1] 134
[1] 135
[1] 136
[1] 137
[1] 138
[1] 

In [8]:
str(train)

List of 2
 $ features: num [1:11712, 1:4, 1:4, 1:512] 0 0.0815 0 0 0.1299 ...
 $ labels  : num [1:11712, 1:7] 0 0 0 0 0 0 0 0 0 0 ...


## Reshape to the last layer

In [9]:
# reshape
reshape_features <- function(features){
    array_reshape(features, dim = c(nrow(features), 4*4*512)) 
}

In [10]:
train$features <- reshape_features(train$features)

In [11]:
str(train)

List of 2
 $ features: num [1:11712, 1:8192] 0 0.0815 0 0 0.1299 ...
 $ labels  : num [1:11712, 1:7] 0 0 0 0 0 0 0 0 0 0 ...


In [12]:
saveRDS(object = train,file = "train.Rds")

## Class indexing

In [13]:
# label index
dataframe=train_dataframe
datagen <- image_data_generator(rescale = 1/255)
generator <- flow_images_from_dataframe(
    dataframe = dataframe, 
    x_col = "file_path", 
    y_col= "landmark_id",
    generator = datagen,
    target_size = c(150,150),
    batch_size = batch_size,
    class_mode="categorical"
    )    
label_map = (generator$class_indices)
col_names=names(label_map)

In [14]:
col_names[1:5]