# Rêquetes mongo pour édition des pages

In [7]:
# Importation des packages nécessaires
import pandas as pd
import numpy as np
import pymongo
from datetime import time,MAXYEAR,timedelta
import datetime

# Ouverturre de la base mongo
client = pymongo.MongoClient()
database = client.anime
collection = database.myanimelist
client.list_database_names()

['admin', 'anime', 'config', 'local']

## Séletion d'animés selon des critères

In [8]:
def init_components(collection):
    """
    Création des labels et des valeurs des dropdowns,sliders 
    et Checklist de l'onglet "Advance Search"

    Args:
        collection: collection comportant les animés à requêter
    
    Returns:
        dictionnaire contenant:
            - Type_val: valeurs et labels du dropdown "type"
            - rating_val: valeurs et labels du dropdown "rating"
            - status_val: valeurs et labels du dropdown "status"
            - duration_val, duration_label: valeurs et labels du slider "duration"
            - episode_val: valeurs et labels du slider "episodes"
            - popularity_val: valeurs du slider "popularity"
            - ranked_val: valeur et labels du slider "ranked"
            - year_val: valeurs et labels du slider "start_year"
            - genres_val: valeurs et labels de la checklist "genres"
            - radio_genres: valeurs et labels de radioItems "genres"
            - producers_val: valeurs et labels de la checklist "producers"
            - radio_producers: valeurs et labels de radioItems "producers"
    """
    # Création des valeurs et labels des dropdowns
    Type_val=collection.aggregate([{"$group" : {"_id" : "$Type"}}])
    Type_val=[val["_id"] for val in list(Type_val) if val["_id"]!=None]

    rating_val=collection.aggregate([{"$group" : {"_id" : "$rating"}}])
    rating_val=[val["_id"] for val in list(rating_val) if val["_id"]!=None]

    status_val=collection.aggregate([{"$group" : {"_id" : "$status"}}])
    status_val=[val["_id"] for val in list(status_val) if val["_id"]!=None]


    # Création des valeurs et labels des sliders
    durations=collection.aggregate([{"$group" : {"_id" : "$duration"}}])
    max_duration=max([val["_id"].hour for val in list(durations) if type(val["_id"])==type(datetime.datetime.today())])
    hours=4*[0]+2*[1]+[2,max_duration+1]
    minutes=[i for i in range(0,46,15)]+[0,30,0,0]
    duration_val=[int(timedelta(hours=h,minutes=m).total_seconds()) for h,m in zip(hours,minutes)]
    duration_label=["{}h{}min".format(h,m) if h!=0 and m!=0 
                    else "{}h".format(h) if h!=0 
                    else "{}min".format(m) 
                    for h,m in zip(hours,minutes)]

    episodes=collection.aggregate([{"$group" : {"_id" : "$episodes"}}])
    max_episodes=max([val["_id"] for val in list(episodes) if type(val["_id"])==int])
    episodes_val=[1,50,200,300,500,700,(max_episodes//1000+1)*1000]

    popularity=collection.aggregate([{"$group" : {"_id" : "$popularity"}}])
    max_popularity=max([val["_id"] for val in list(popularity) if type(val["_id"])==int])
    popularity_val=[i for i in range(0,10001,2000)]+[(max_popularity//1000+1)*1000]

    ranked=collection.aggregate([{"$group" : {"_id" : "$ranked"}}])
    max_ranked=max([val["_id"] for val in list(ranked) if type(val["_id"])==int])
    ranked_val=[1,1000]+[i for i in range(2000,8001,2000)]+[(max_ranked//1000+1)*1000]

    year=collection.aggregate([{"$group" : {"_id" : "$aired.start"}}])
    year=[int(val["_id"].year) if type(val["_id"])==type(datetime.datetime.today())
        else int(val["_id"]) if str(val["_id"]).isdigit()
        else -1 for val in list(year)]
    year=[val for val in year if val!=-1]
    max_year,min_year=max(year),min(year)
    year_val=[(min_year//10)*10]+[i for i in range(1930,1990,30)]+[i for i in range(2000,2016,7)]+[max_year]


    # Création des valeurs et labels des checklists et radioItems
    genres=collection.aggregate([{"$group" : {"_id" : "$genres"}}])
    genres=[val["_id"] for val in genres if val["_id"]!=None ]
    genres_val=[]
    for g in genres:
        genres_val= genres_val+g
    genres_val=set(genres_val)
    radio_genres=["All genres","Limit to","Exactly with","Exclude"]

    producers=collection.aggregate([{"$group" : {"_id" : "$producers"}}])
    producers=[val["_id"] for val in producers if val["_id"]!=[] ]
    producers_val=[]
    for p in producers:
        producers_val= producers_val+p
    producers_val=set(producers_val)
    radio_producers=["All producers","Limit to","Exactly with","Exclude"]


    # Création du dictionnaire de sortie
    keys=["Type_val","rating_val","status_val",
            "duration_val","duration_label","episode_val",
            "popularity_val","ranked_val","year_val","genres_val",
            "radio_genres","producers_val","radio_producers"]
    values=[Type_val,rating_val,status_val,duration_val,
            duration_label,episode_val,popularity_val,
            ranked_val,year_val,genres_val,
            radio_genres,producers_val,radio_producers]
    return dict(list(zip(keys,values)))

In [9]:
def to_time(timedelta,to_datetime=False):
    """
    Conversion d'une durée en secondes
    en un datetime.time ou datetime.datetime

    Args:
        timedelta: une durée ou un liste de durée en secondes
        to_datetime: choix du format datetime.datetime

    Returns:
        varible(s) datetime.time ou datetime.datetime associée(s)
    """
    if type(timedelta)==int:
        res=time()
        if timedelta!=0 and timedelta%3600==0 or timedelta-3600>0:
            res=res.replace(hour=timedelta//3600)
            timedelta-=res.hour*3600
        if timedelta!=0 and timedelta%60==0 or timedelta-60>0:
            res=res.replace(minute=timedelta//60)
            timedelta-=res.minute*60
        if timedelta!=0:
            res=res.replace(second=timedelta)
        if to_datetime:
            res=datetime.datetime(year=MAXYEAR,month=12,day=31,hour=res.hour,minute=res.minute,second=res.second)
        return res
    elif type(timedelta)==list:
        return [to_time(d,to_datetime) for d in timedelta]

In [10]:
def make_request(Type=["All types"],rating=["All ratings"],status=["All status"],score=[],duration=[],episodes=[],popularity=[],ranked=[],year=[],genres_r='',genres_d=[],producers_r='',producers_d=[],start_year=[],titles=[]):
    """
    Réalisation d'une requête mongo 
    à partir des filtres passé en arguments

    Args:
        filtres utilisé pour construire la requête
    
    Returns:
        rêquete mongo adéquate
    """ 
    # Initialisation des filtres et de la requête
    champs=['titles','score','ranked','popularity','genres','producers','Type','episodes','status','duration','rating','aired']
    filters=dict([(champ,None) for champ in champs])
    request={"$and":[]}

    # Edition des filtres
    if len(titles)>0:
        filters["titles"]={"$or":[{"main_title":{"$in":titles}},{"other_titles":{"$in":titles}}]}
    if "All types" not in Type:
        filters["Type"]={"Type":{"$in":Type}}
    if "All status" not in status:
        filters["status"]={"status":{"$in":status}}
    if "All ratings" not in rating:
        filters["rating"]={"rating":{"$in":rating}}
    if len(duration)>0:
        d=to_time(duration,True)
        filters["duration"]={ "$and": [ {"duration":{"$gte":min(d)}} , {"duration":{"$lte":max(d)}} ] }
    if genres_r=="Limit to" and len(genres_d)>0:
        filters["genres"]={"genres":{"$in":genres_d}}
    elif genres_r=="Exclude" and len(genres_d)>0:
        filters["genres"]={"genres":{"$nin":genres_d}}
    elif genres_r=="Exactly with" and len(genres_d)>0:
        filters["genres"]={"genres":{"$all":genres_d}}
    if producers_r=="Limit to" and len(producers_d)>0:
        filters["producers"]={"producers":{"$in":producers_d}}
    elif producers_r=="Exclude" and len(producers_d)>0:
        filters["producers"]={"producers":{"$nin":producers_d}}
    elif producers_r=="Exactly with" and len(producers_d)>0:
        filters["producers"]={"producers":{"$all":producers_d}}
    if len(score)>0:
        filters["score"]={ "$and": [ {"score":{"$gte": min(score)}} , {"score":{"$lte": max(score)}} ] }
    if len(ranked)>0:
        filters["ranked"]={ "$and": [ {"ranked":{"$gte": min(ranked)}} , {"ranked":{"$lte": max(ranked)}} ] }
    if len(popularity)>0:
        filters["popularity"]={ "$and": [ {"popularity":{"$gte": min(popularity)}} , {"popularity":{"$lte": max(popularity)}} ] }
    if len(episodes)>0:
        filters["episodes"]={ "$and": [ {"episodes":{"$gte": min(episodes)}} , {"episodes":{"$lte": max(episodes)}} ] }
    if len(start_year)>0:
        start={"$or":[{"aired.start":{"$gte":min(start_year)}},{"aired.start":{"$gte":datetime.datetime(min(start_year),1,1)}}]}
        end={"$or":[{"aired.start":{"$lte":max(start_year)}},{"aired.start":{"$lt":datetime.datetime(max(start_year),12,31)}}]}
        filters["aired"]={"$and":[start,end]}
    
    # Edition de la requête
    for key in filters.keys():
        if filters[key]!=None:
            request["$and"].append(filters[key])   
    if len(request["$and"])==0:
        request={}
    
    return request

In [11]:
def select_anime(request,collection,champs=[],max_result=10**30,sort={"champs":None,"order":None}):
    """
    Sélection des animés de collections 
    correspondant à request

    Args:
        request: requête mongo
        colletion: colletion à parcourir
        champs: liste de champs à récupérer
        sort: dict indiquant si on doit trier les documents de clé champs et order
        max_result: taille maximale de la liste de sortie
    
    Return:
        liste d'animés validant request
    """
    # Choix des champs à récupérés
    champs = dict([(champ,1) for champ in champs])

    # Lancement de la requête
    if sort["champs"]==None or sort["order"]==None: 
        if len(champs)>1:
            res=list(collection.find(request,champs))
        elif len(champs)==1:
            res=[elt[list(champs.keys())[0]] for elt in list(collection.find(request,champs))]
        else:
            res= list(collection.find(request))
    else:
        sorting=[(c,o) for c,o in zip(sort["champs"],sort["order"])]
        if len(champs)>1:
            res=list(collection.find(request,champs).sort(sorting))
        elif len(champs)==1:
            res=[elt[list(champs.keys())[0]] for elt in list(collection.find(request,champs).sort(sorting))]
        else:
            res= list(collection.find(request).sort(sorting))
    
    
    # Limitation du nombre de résultat
    if len(res)>0:
        return res[0:min(max_result,len(res))]
    
    return list(collection.find(request))

## Algorithme de recommandation

In [52]:
def find_references(references,collection,champs,NaN=None):
    """
    Formatage des infos de références pour la recommandation

    Args:
        references: titres des animés de références
        collection: base de données comportant tous les animés
        champs: liste de champs utilisés pour la recommandation 
    Return:
        infos de références corretement formatées
    """
    NA=[None,[],'None',NaN]
    request=make_request(titles=references)
    selection=select_anime(request,collection,champs)
    infos=dict()
    for anime in selection:
        for champ in anime.keys():
            if anime[champ] not in NA and champ!="_id":
                if not champ in infos.keys():
                    infos[champ]=[anime[champ]]
                else:
                    infos[champ].append(anime[champ])
    if "duration" in infos.keys():
        infos["duration"]=[elt.time() for elt in infos["duration"]]
        infos["duration"]=[datetime.timedelta(hours=elt.hour,minutes=elt.minute,seconds=elt.second)for elt in infos["duration"]]
        infos["duration"]=[int(elt.total_seconds()) for elt in infos["duration"]]
    if "aired" in infos.keys():
        infos["aired"]=[ elt["start"].year if type(elt["start"])!=int else elt["start"] for elt in infos["aired"]]
    if "related_anime" in infos.keys():
        for i in range(len(infos["related_anime"])):
            related_anime=[]
            for k in infos["related_anime"][i].keys():
                if k!="Adaptation":
                    related_anime = related_anime  + infos["related_anime"][i][k]
            infos["related_anime"][i]=related_anime
    return infos

In [56]:
infos=find_references(["Naruto"],collection,["genres","duration","aired.start","episodes","related_anime"])
infos

{'genres': [['Action',
   'Adventure',
   'Comedy',
   'Super Power',
   'Martial Arts',
   'Shounen']],
 'related_anime': [['Naruto Movie 1: Dai Katsugeki!! Yuki Hime Shinobu Houjou Dattebayo!',
   'Naruto: Takigakure no Shitou - Ore ga Eiyuu Dattebayo!',
   'Naruto: Akaki Yotsuba no Clover wo Sagase',
   'Naruto Movie 2: Dai Gekitotsu! Maboroshi no Chiteiiseki Dattebayo!',
   'Naruto Narutimate Hero 3: Tsuini Gekitotsu! Jounin vs. Genin!! Musabetsu Dairansen Taikai Kaisai!!',
   'Naruto Movie 3: Dai Koufun! Mikazuki Jima no Animaru Panikku Dattebayo!',
   'Naruto: The Cross Roads',
   'Naruto: Shippuuden']],
 'episodes': [220],
 'duration': [1380],
 'aired': [2002]}

In [162]:
def anime_recommandation(titles,collection,champs=["genres","duration","episodes","aired.start","main_title"],max_result=10):
    """
    Recommandation d'animés à partir d'autres d'animés

    Args:
        titles: titres des animés utilisé comme références
        collection: base de données comportant tous les animés
        champs: liste de champs utilisés pour la recommandation 
        max_result: taille maximale de la liste de recommandation
    
    Returns:
        liste d'animés recommandés
    """
    # Récupération d'infos sur les animés de références
    ref_infos=find_references(titles,collection,champs)
    print(ref_infos)

    # Recherche d'animés partageant possédeant les genres de référence
    under_max,selection=False,[]
    clean_yet=False
    if "genres" in ref_infos.keys():
        genres_c=[]
        for elt in ref_infos["genres"]:
            genres_c= genres_c + elt
        request=make_request(genres_r='Limit to',genres_d=list(set(genres_c)))
        select=select_anime(request,collection,champs=["main_title"],sort={"champs":["score","popularity"],"order":[-1,-1]})
        select= set(select).difference(ref_infos["main_title"])
        selection.append(clean_selection(list(select)))
        clean_yet=True
        under_max=(len(selection[-1])<=max_result & len(selection[-1])>0)
        if not under_max:
            genres_c=[set(elt) for elt in ref_infos["genres"]]
            common_genres=genres_c[0]
            for elt in genres_c[1:]:
                common_genres = elt&common_genres
            if len(common_genres)>0:
                request=make_request(genres_r='Exactly with',genres_d=list(common_genres))
                select=select_anime(request,collection,champs=["main_title"],
                                    sort={"champs":["score","popularity"],"order":[-1,-1]})
                select= set(select).difference(ref_infos["main_title"])
                if len(select)>0:
                    selection.append(clean_selection(list(select),clean_yet))
                    clean_yet=True
                    under_max=(len(selection[-1])<=max_result)
    
    # Recherche d'animés ayant environ la même durée
    if "duration" in ref_infos.keys() and not under_max:
        mean_duration=np.mean(ref_infos["duration"])
        limits=[round(0.75*mean_duration),round(1.25*mean_duration)]
        request=make_request(titles=selection[-1],duration=limits)
        select=select_anime(request,collection,champs=["main_title"],sort={"champs":["score","popularity"],"order":[-1,-1]})
        if len(select)>0:
            selection.append(clean_selection(select,clean_yet))
            clean_yet=True
            under_max=(len(selection[-1])<=max_result)

    # Recherche d'animés ayant environ le même nombre d'épisodes
    if "episodes" in ref_infos.keys() and not under_max:
        mean_episodes=np.mean(ref_infos["episodes"])
        limits=[round(0.75*mean_episodes),round(1.25*mean_episodes)]
        request=make_request(titles=selection[-1],episodes=limits)
        select=select_anime(request,collection,champs=["main_title"],sort={"champs":["score","popularity"],"order":[-1,-1]})
        if len(select)>0:
            selection.append(clean_select(select,clean_yet))
            clean_yet=True
            under_max=(len(selection[-1])<=max_result)

    # Recherche d'animés ayant environ le même année
    if "aired" in ref_infos.keys() and not under_max:
        mean_year=np.mean(ref_infos["aired"])
        limits=[round(0.75*mean_year),round(1.25*mean_year)]
        request=make_request(titles=selection[-1],episodes=limits)
        select=select_anime(request,collection,champs=["main_title"],sort={"champs":["score","popularity"],"order":[-1,-1]})
        if len(select)>0:
            selection.append(clean_selection(select,clean_yet))
            clean_yet=True
            under_max=(len(selection[-1])<=max_result)
    
    # Renvoi de la meilleur sélection
    len_selection=pd.Series(selection).apply(lambda s: len(s) if len(s)!=0 else 10**30)
    best_selection=selection[len_selection.argmin()]
    request=make_request(titles=clean_selection(best_selection,clean_yet))
    best_selection= select_anime(request,collection,max_result=max_result,champs=["main_title"],
                                    sort={"champs":["score","popularity"],"order":[-1,-1]})
    return best_selection

In [163]:
def clean_selection(selection,clean_yet=False):
    """
    Suppression  dans une sélection 
    des animés liées entre eux

    Args:
        selection: liste de récommandations à traiter
    Return:
        liste de recommandations traitée
    """
    if not clean_yet:
        ref_infos=find_references(selection,collection,champs=["main_title","related_anime"])
        selection=pd.Series(selection)
        for i in selection.index:
            if not pd.Series(selection[i]).isna()[0]:
                related_anime=ref_infos["related_anime"][ref_infos["main_title"].index(selection[i])]
                selection[i+1:]=selection[i+1:].apply(lambda x: np.NaN if x in related_anime else x)
        return list(selection.dropna())
    return selection

In [164]:
selection=['Fullmetal Alchemist: Brotherhood',
 'Gintama°',
 'Hunter x Hunter (2011)',
 'Shingeki no Kyojin Season 3 Part 2',
 'Ginga Eiyuu Densetsu',
 "Gintama'",
 'Shingeki no Kyojin: The Final Season',
 "Gintama': Enchousen",
 '3-gatsu no Lion 2nd Season',
 'Koe no Katachi']
clean_selection(selection)

['Fullmetal Alchemist: Brotherhood',
 'Gintama°',
 'Hunter x Hunter (2011)',
 'Shingeki no Kyojin Season 3 Part 2',
 'Ginga Eiyuu Densetsu',
 "Gintama'",
 '3-gatsu no Lion 2nd Season',
 'Koe no Katachi']

In [166]:
anime_recommandation(["Naruto","One Piece"],collection)

{'main_title': ['One Piece', 'Naruto'], 'genres': [['Action', 'Adventure', 'Comedy', 'Super Power', 'Drama', 'Fantasy', 'Shounen'], ['Action', 'Adventure', 'Comedy', 'Super Power', 'Martial Arts', 'Shounen']], 'duration': [1440, 1380], 'aired': [1999, 2002], 'episodes': [220]}


['Dragon Ball Z',
 'Naruto: Shippuuden',
 'D.Gray-man',
 'One Piece Film: Strong World Episode 0',
 'Bleach',
 'Shaman King',
 'Ueki no Housoku',
 'Dragon Ball Kai',
 'Dragon Ball Kai (2014)',
 'Dragon Ball Super']

## Lancement de l'app dash

In [16]:
!python anime_app.py

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "anime_app" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on
^C


In [1]:
!python ../dash-sample-apps-master/apps/dash-oil-and-gas/app.py

'/home/lcplacide/Documents/DataEngineerTools/6Evaluation/Projet/dash_app'