# Database Search

In [3]:
import numpy as np

def levenshtein_ratio_and_distance(s, t, ratio_calc = True):
    """ levenshtein_ratio_and_distance:
        Calculates levenshtein distance between two strings.
        If ratio_calc = True, the function computes the
        levenshtein distance ratio of similarity between two strings
        For all i and j, distance[i,j] will contain the Levenshtein
        distance between the first i characters of s and the
        first j characters of t
    """
    # Initialize matrix of zeros
    rows = len(s)+1
    cols = len(t)+1
    distance = np.zeros((rows,cols),dtype = int)

    # Populate matrix of zeros with the indeces of each character of both strings
    for i in range(1, rows):
        for k in range(1,cols):
            distance[i][0] = i
            distance[0][k] = k

    # Iterate over the matrix to compute the cost of deletions,insertions and/or substitutions    
    for col in range(1, cols):
        for row in range(1, rows):
            if s[row-1] == t[col-1]:
                cost = 0 # If the characters are the same in the two strings in a given position [i,j] then the cost is 0
            else:
                # In order to align the results with those of the Python Levenshtein package, if we choose to calculate the ratio
                # the cost of a substitution is 2. If we calculate just distance, then the cost of a substitution is 1.
                if ratio_calc == True: cost = 2
                else: cost = 1
            
            distance[row][col] = min(distance[row-1][col] + 1,      # Cost of deletions
                                 distance[row][col-1] + 1,          # Cost of insertions
                                 distance[row-1][col-1] + cost)     # Cost of substitutions
    
    # Computation of the Levenshtein Distance Ratio
    Ratio = ((len(s)+len(t)) - distance[row][col]) / (len(s)+len(t))
    return Ratio

In [4]:
import json

from prettytable import PrettyTable
from time import time

table = PrettyTable(field_names=["UserID", "Rating", "Tags", "Timestamp"])

### Loading the database into the memory

In [28]:
data = json.load(open("dataFinal.json", "rb"))

### Loading Global secondary index into the memory

In [29]:
GIS = json.load(open("dataFinal_GIS.json", "rb"))

In [30]:
#Here, we are writing a function to search the movies by their titles
def getClosestMatch(queryString):
    tc = time() #time counter
    
    matchRatios = [levenshtein_ratio_and_distance(queryString, KEY) for KEY, _ in GIS.items()]
    bestMatchRatio = max(matchRatios)

    #If the best match ratio is less than 50% then we will assume that the records doesn't exist
    if (bestMatchRatio < 0.5):
        return {"response code" : 404, "search time" : f"{time() - tc} seconds", "message" : "error", "response body" : "no match found"}
    
    bestMatchRatio_idx = matchRatios.index(bestMatchRatio)
    
    bestMatch_mTitle = list(GIS)[bestMatchRatio_idx]
    bestMatch_mId = GIS[bestMatch_mTitle]
    
    movieInfo = data[str(bestMatch_mId)] #details of movie retrieved with movieId

    comp_time = time() #Time of completion of search

    movieInfo["title"] = bestMatch_mTitle
    
    responseModel = {"response code" : 200, "search time" : f"{comp_time - tc} seconds", "message" : "success", "response body" : movieInfo}
    
    return responseModel

In [31]:
#Here we are writing a function to request and visualise the data returned by the search function
def request(movieToSearch=""):
    movieToSearch = str(movieToSearch)
    if(movieToSearch.replace(" ", "").strip() == ""):
        return f"Invalid Movie Name"
    
    response = getClosestMatch(queryString = movieToSearch)

    if response["response code"] == 404:
        return f"ERROR: No records found"
    
    ratings = response["response body"]["user_rating"]

    data_to_print = f"\
    Title : {response['response body']['title']}\n\
    Genre : {response['response body']['genre']}\n\
    User Ratings : ---------------------------\n\
    "
    print(data_to_print)

In [32]:
request(movieToSearch = "Jumanji")

    Title : Jumanji (1995)
    Genre : ['Adventure', 'Children', 'Fantasy']
    User Ratings : ---------------------------
    
