In [65]:
import pandas as pd
import json
from connector import set_connection

In [7]:
with open('model.json', 'r', encoding='utf-8') as file:
    model = json.load(file)


In [8]:
model['head'].remove('0425160394')

In [66]:
def run_query(query:str):
    with set_connection() as dd:
        return dd.query(query).to_df()    

In [3]:
def get_cross_books(books_1, books_2):
    return list(set(books_1) & set(books_2))

In [18]:
def sort_keys_by_values(input_dict):

    sorted_keys = sorted(input_dict.keys(), key=lambda k: input_dict[k], reverse=True)
    return sorted_keys

In [24]:
def get_user_group(user_books:dict, model:dict, n_groups:int=10):
    groups = model['head']
    
    groups_similary = {}
    
    
    for group in model['head']:
    
        group_1 = get_cross_books(model[group]['books'], list(user_books.keys()))
        if len(group_1) != 0:
            groups_similary[group] = sum([user_books[x] for x in group_1])/len(group_1)
        else:
            groups_similary[group] = 0
    
    return sort_keys_by_values(groups_similary)[:n_groups]    
        

In [81]:
user_123 = {
    'read_books':
        {'0375400117':9,
         '0553375059':10,
         '0553285785':9,
         '0385504209':9,
         '0312303467':2
         
        },
    'groups':[],
    'same_users':[],
    'books_for_rec':[]
}

In [83]:
user_123['groups'] = get_user_group(user_123['read_books'], model)

In [84]:
user_123

{'read_books': {'0375400117': 9,
  '0553375059': 10,
  '0553285785': 9,
  '0385504209': 9,
  '0312303467': 2},
 'groups': ['059035342X',
  '0671027387',
  '0312995423',
  '0743403452',
  '0345350499',
  '0316776963',
  '0312422156',
  '0671672797',
  '0061094226',
  '0425167313'],
 'same_users': [],
 'books_for_rec': []}

In [109]:
def get_similar_num(user_0:list, user_1:list)-> int:
    similar_num = 0
    for x in range(len(user_1)):
        similar_num +=  2 * (abs(user_0[x] - user_1[x]))
    return similar_num/len(user_0)

In [123]:
def get_similar_users(books:list, groups:list, model:dict, get_sililar=False):
    
    similar_users = {}
    
    for group in groups:
        for user in model[group]['users']:
            user_data = run_query(f"""select isbn, book_rating 
                                   from ratings 
                                   where user_id = {user}""").set_index('isbn')['book_rating'].to_dict()
            
            cross_books = get_cross_books(list(user_data.keys()), list(books.keys()))
            
            if len(cross_books) == 0:
                continue
            
            similar_num = (get_similar_num([books[x] for x in cross_books],
                                           [user_data[x] for x in cross_books]))/len(books.keys())
            
            if similar_num < 10:
                similar_users[user] = similar_num
    
    
    if get_sililar:
        return similar_users
    
    return sort_keys_by_values(similar_users)[::-1]


In [126]:
user_123['same_users'] = get_similar_users(user_123['read_books'], user_123['groups'], model)

In [127]:
user_123

{'read_books': {'0375400117': 9,
  '0553375059': 10,
  '0553285785': 9,
  '0385504209': 9,
  '0312303467': 2},
 'groups': ['059035342X',
  '0671027387',
  '0312995423',
  '0743403452',
  '0345350499',
  '0316776963',
  '0312422156',
  '0671672797',
  '0061094226',
  '0425167313'],
 'same_users': [219301,
  15602,
  70594,
  124720,
  158226,
  92810,
  81484,
  11676,
  218608,
  62659,
  638,
  49980,
  28865,
  75819,
  60263,
  10560,
  220688,
  242083,
  97694,
  229741,
  112001,
  235282,
  128434,
  239584,
  69078,
  100906,
  54335,
  24995,
  180591,
  100227,
  104636],
 'books_for_rec': []}

In [132]:
def predict(user:dict, model:dict, n_book_for_rec:int):
    from random import choice
    
    if len(user['groups']) == 0:
        user['groups'] = get_user_group(user['read_books'], model)
    
    if len(user['same_users']) < 10:
        user['same_users'] = get_similar_users(user['read_books'], user['groups'], model)
    
    if len(user['books_for_rec']) < n_book_for_rec:
        for same_user in user['same_users']:
            same_user_data =  run_query(f"""select isbn, book_rating 
                                   from ratings 
                                   where user_id = {same_user}""").set_index('isbn')['book_rating'].to_dict()
            
            same_books = get_cross_books(list(user['read_books'].keys()), list(same_user_data.keys()))
            
            for book in same_user_data.keys():
                if book not in same_books and book not in user['read_books'] and same_user_data[book] > 7:
                    user['books_for_rec'].append(book)
    
    book_for_rec = []
    for _ in range(n_book_for_rec):
        book_for_rec.append(choice(user['books_for_rec']))
    
    return book_for_rec
        
        
            
   

In [164]:
predict(user_123, model, 10)

['0553250426',
 '0671034774',
 '0439368634',
 '0451403002',
 '8420601209',
 '344243372X',
 '3423130857',
 '0385326335',
 '1570717575',
 '0767912233']