### Access to Google Drive folders

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


### Constants

In [0]:
prepared_data_folder = '/content/gdrive/My Drive/Project/Prepared_Data/'
images_folder = '/content/gdrive/My Drive/Project/Images_300x450/'
results_folder = '/content/gdrive/My Drive/Project/Results/'
models_folder = '/content/gdrive/My Drive/Project/Models/'

titles_file = "titles"
imdb_id_file = "imdb_id"
images_file = "images"
features_file = "features"

model_InceptionV3 = "InceptionV3"
model_VGG16 = "VGG16"

### Libraries


In [3]:
import os
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn as sk
import keras
import json
import tensorflow as tf

from keras.preprocessing.image import load_img
from sklearn.neighbors import NearestNeighbors

import tqdm
from IPython.display import HTML, display

Using TensorFlow backend.


### Functions

In [0]:
def get_titles_by_phrase(phrase):
  result_df = pd.DataFrame(columns=["Id", "IMDB id", "Title"])
  for idx, (title, imdb_id) in enumerate(zip(titles, imdb_ids)):
    if phrase.lower() in title.lower():
      result_df = result_df.append({"Id": idx, "IMDB id": imdb_id, "Title": title} , ignore_index=True)
  display(result_df)

def found_recommendation(id):
  return clustering_model.kneighbors(features[[id]], return_distance=False)

def show_results(results, id):
  fig, axies = plt.subplots(nrows=2, ncols=4, figsize=(30, 12))
  fig.tight_layout()
  plt.subplots_adjust(wspace=0.6)
  pos = 1
  col = nn_num
  rows = 1
  for idx, elem in enumerate(results[0]):
      img = load_img(data_folder + imdb_ids[elem] + '-' + titles[elem] + '.jpg')

      plt.subplot(rows, col, pos)
      plt.imshow(img)
      plt.axis('off')
      if idx == 0:
        plt.title("Found for\n" + imdb_ids[elem] + "\n" + titles[elem])
      else:
        plt.title("Recommendation " + str(idx) + "\n" + imdb_ids[elem] + "\n" + titles[elem])       
      pos += 1

  plt.show
  plt.savefig(results_folder + model_type + "_imagenet_fune_tuning_cosinus_" + str(id) +".png")
  None
  
def get_recommendation(found_for_id):
  clustering_results = found_recommendation(found_for_id)
  show_results(clustering_results, found_for_id)

### Parameters

In [0]:
model_type = model_InceptionV3
data_folder = images_folder
number_of_recommendations = 5

### Load Data

#### Find number of batches

In [0]:
number_of_batches = 0
for name in os.listdir(prepared_data_folder):
  if model_type + "_" + titles_file in name:
    number_of_batches += 1

#### Load Titles

In [7]:
titles = np.array([
    i
    for idx in range(0, number_of_batches)
    for i in pickle.load(open(prepared_data_folder + model_type + "_" + titles_file + "_part_" + str(idx) + '.p', mode='rb'))
])
titles.shape

(3978,)

#### Load IMDB ids


In [8]:
imdb_ids = np.array([
    i
    for idx in range(0, number_of_batches)
    for i in pickle.load(open(prepared_data_folder + model_type + "_" + imdb_id_file + "_part_" + str(idx) + '.p', mode='rb'))
])
imdb_ids.shape

(3978,)

#### Load Features

In [9]:
features = np.array([
    i
    for idx in range(0, number_of_batches)
    for i in pickle.load(open(prepared_data_folder + model_type + "_" + features_file + "_part_" + str(idx) + '.p', mode='rb'))
])
features.shape


(3978, 131072)

### Prepare Data

In [0]:
nn_num = 1 + number_of_recommendations


clustering_model = NearestNeighbors(n_neighbors=nn_num, algorithm='auto', metric='cosine', n_jobs = -1).fit(features)

### Find Movie

In [11]:
phrase = "Lord"

get_titles_by_phrase(phrase)

Unnamed: 0,Id,IMDB id,Title
0,169,tt1731697,The Lords of Salem
1,2021,tt0120737,The Lord of the Rings The Fellowship of the Ring
2,2174,tt0167261,The Lord of the Rings The Two Towers
3,2179,tt0167260,The Lord of the Rings The Return of the King
4,2636,tt0355702,Lords of Dogtown
5,2829,tt0399295,Lord of War


### Recommendation system


In [0]:
found_for_id = 2393

get_recommendation(found_for_id)

In [0]:
found_for_id = 2764

get_recommendation(found_for_id)

In [0]:
found_for_id = 3172

get_recommendation(found_for_id)

In [0]:
found_for_id = 3347

get_recommendation(found_for_id)

In [0]:
found_for_id = 2077

get_recommendation(found_for_id)

In [0]:
found_for_id = 1346

get_recommendation(found_for_id)

In [0]:
found_for_id = 1361

get_recommendation(found_for_id)

In [0]:
found_for_id = 480

get_recommendation(found_for_id)

In [0]:
found_for_id = 593


get_recommendation(found_for_id)

In [0]:
found_for_id = 1785


get_recommendation(found_for_id)

In [0]:
found_for_id = 2966


get_recommendation(found_for_id)

In [0]:
found_for_id = 2179


get_recommendation(found_for_id)

In [0]:
found_for_id = 467


get_recommendation(found_for_id)