In [1]:
import pandas as pd

tareas = pd.read_csv("../input/tareas.csv")

In [2]:
tareas

Unnamed: 0,Tareas,Duracion,Area,Id
0,Hacer la cama,5 minutos,Organización,1
1,Preparar el desayuno,10 minutos,Alimentación,2
2,Lavar los platos,15 minutos,Tareas del hogar,3
3,Limpiar el baño,20 minutos,Tareas del hogar,5
4,Planificar la cena,10 minutos,Alimentación,6
...,...,...,...,...
123,Aplicar una mascarilla facial o capilar,Variable,Belleza y Cuidado Personal,125
124,Escuchar un podcast antes de dormir,Variable,Entretenimiento,126
125,Realizar una actividad creativa,Variable,Arte y Creatividad,127
126,Repasar y ajustar metas y objetivos,10 minutos,Autodesarrollo,128


In [3]:
import re

def clean_title(title):
  return re.sub("[^a-zA-Z0-9 ]","",title)

In [4]:
tareas["clean_title"] = tareas["Tareas"].apply(clean_title)

In [5]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(ngram_range=(1,2))

tfidf = vectorizer.fit_transform(tareas["clean_title"])

In [6]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def search(title):
  title = clean_title(title)
  query_vec = vectorizer.transform([title])
  similarity = cosine_similarity(query_vec, tfidf).flatten()
  indices = np.argpartition(similarity,-5)[-5:]
  results = tareas.iloc[indices]
  return results

In [7]:
import ipywidgets as widgets
from IPython.display import display

tarea_input = widgets.Text(
    value = "Ejercicio",
    description = "Tarea:",
    disabled = False
)
tarea_list = widgets.Output()

def on_type(data):
  with tarea_list:
    tarea_list.clear_output()
    title = data["new"]
    if len(title) > 5:
      display(search(title))


tarea_input.observe(on_type, names = 'value')

display(tarea_input, tarea_list)

Text(value='Ejercicio', description='Tarea:')

Output()

In [9]:
ratings = pd.read_csv("../input/puntuacion.csv")

In [10]:
ratings

Unnamed: 0,user_id,tarea_id,rating
0,1,1,3
1,1,2,5
2,1,3,5
3,1,4,5
4,1,5,5
...,...,...,...
4381,34,125,3
4382,34,126,2
4383,34,127,2
4384,34,128,3


In [11]:
ratings.dtypes

user_id     int64
tarea_id    int64
rating      int64
dtype: object

In [12]:
tarea_id = 1

In [13]:
similar_users = ratings[(ratings["tarea_id"] == tarea_id) & (ratings["rating"]> 4)]["user_id"].unique()

In [14]:
similar_users

array([ 4,  7, 19, 23, 31, 33, 34], dtype=int64)

In [15]:
similar_user_recs = ratings[(ratings["user_id"].isin(similar_users)) & (ratings["rating"]> 4)]["tarea_id"]

In [16]:
similar_user_recs

387       1
396      10
398      12
400      14
414      28
       ... 
4362    106
4364    108
4370    114
4372    116
4385    129
Name: tarea_id, Length: 196, dtype: int64

In [18]:
similar_user_recs = similar_user_recs.value_counts() / len(similar_users)

similar_user_recs = similar_user_recs[similar_user_recs > .1]

In [19]:
similar_user_recs

tarea_id
1      1.000000
14     0.714286
100    0.571429
18     0.571429
77     0.571429
         ...   
24     0.142857
61     0.142857
91     0.142857
93     0.142857
116    0.142857
Name: count, Length: 97, dtype: float64

In [20]:
all_users = ratings[(ratings["tarea_id"].isin(similar_user_recs.index)) & (ratings["rating"] > 4)]

In [21]:
all_users

Unnamed: 0,user_id,tarea_id,rating
2,1,3,5
4,1,5,5
5,1,6,5
17,1,18,5
31,1,32,5
...,...,...,...
4362,34,106,5
4364,34,108,5
4370,34,114,5
4372,34,116,5


In [22]:
all_users_recs = all_users["tarea_id"].value_counts() / len(all_users["user_id"].unique())

In [23]:
all_users_recs

tarea_id
74     0.411765
29     0.352941
123    0.352941
31     0.323529
115    0.323529
         ...   
16     0.088235
87     0.088235
88     0.088235
97     0.088235
39     0.058824
Name: count, Length: 97, dtype: float64

In [24]:
rec_percentages = pd.concat([similar_user_recs, all_users_recs],axis = 1)
rec_percentages.columns = ["similar","all"]

In [25]:
rec_percentages

Unnamed: 0_level_0,similar,all
tarea_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1.000000,0.205882
14,0.714286,0.235294
100,0.571429,0.235294
18,0.571429,0.264706
77,0.571429,0.264706
...,...,...
24,0.142857,0.176471
61,0.142857,0.117647
91,0.142857,0.205882
93,0.142857,0.235294


In [26]:
rec_percentages["score"] = rec_percentages["similar"] / rec_percentages["all"] 

In [27]:
rec_percentages = rec_percentages.sort_values("score",ascending = False)

In [28]:
rec_percentages

Unnamed: 0_level_0,similar,all,score
tarea_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1.000000,0.205882,4.857143
108,0.571429,0.147059,3.885714
88,0.285714,0.088235,3.238095
16,0.285714,0.088235,3.238095
14,0.714286,0.235294,3.035714
...,...,...,...
104,0.142857,0.235294,0.607143
71,0.142857,0.235294,0.607143
116,0.142857,0.294118,0.485714
26,0.142857,0.323529,0.441558


In [29]:
rec_percentages.head(10).merge(tareas, left_index=True, right_on = "Id")

Unnamed: 0,similar,all,score,Tareas,Duracion,Area,Id,clean_title
0,1.0,0.205882,4.857143,Hacer la cama,5 minutos,Organización,1,Hacer la cama
106,0.571429,0.147059,3.885714,Realizar ejercicios de relajación,10 minutos,Bienestar Mental,108,Realizar ejercicios de relajacin
86,0.285714,0.088235,3.238095,Organizar y planificar el día siguiente,10 minutos,Organización,88,Organizar y planificar el da siguiente
14,0.285714,0.088235,3.238095,Pasear al perro,30 minutos,Mascotas,16,Pasear al perro
12,0.714286,0.235294,3.035714,Hacer la colada,1 hora,Tareas del hogar,14,Hacer la colada
4,0.428571,0.176471,2.428571,Planificar la cena,10 minutos,Alimentación,6,Planificar la cena
37,0.142857,0.058824,2.428571,Hacer estiramientos suaves,10 minutos,Salud y Bienestar,39,Hacer estiramientos suaves
98,0.571429,0.235294,2.428571,Actualizar el calendario/agenda,5 minutos,Organización,100,Actualizar el calendarioagenda
49,0.428571,0.176471,2.428571,Hacer una pausa para estirarse,5 minutos,Salud y Bienestar,51,Hacer una pausa para estirarse
75,0.571429,0.264706,2.15873,Realizar ejercicio físico,30 minutos,Salud y Fitness,77,Realizar ejercicio fsico


In [30]:
def find_similar_tarea(tarea_id):
  similar_users = ratings[(ratings["tarea_id"] == tarea_id) & (ratings["rating"]>4)] ["user_id"].unique()
  similar_user_recs = ratings[(ratings["user_id"].isin(similar_users)) & (ratings["rating"]>4)]["tarea_id"]

  similar_user_recs = similar_user_recs.value_counts() / len(similar_users)
  similar_user_recs = similar_user_recs[similar_user_recs > .10]

  all_users = ratings[(ratings["tarea_id"].isin(similar_user_recs.index)) & (ratings["rating"]>4)]
  all_user_recs = all_users["tarea_id"].value_counts() / len(all_users["user_id"].unique())

  rec_percentages = pd.concat([similar_user_recs, all_user_recs], axis= 1)
  rec_percentages.columns = ["similar","all"]

  rec_percentages["score"] = rec_percentages["similar"] / rec_percentages["all"]

  rec_percentages = rec_percentages.sort_values("score",ascending = False)
  return rec_percentages.head(10).merge(tareas,left_index=True,right_on = "Id")[["score","Tareas","Area"]]

In [31]:
tarea_input_name = widgets.Text(
    value = "Hacer ejercicio fisico",
    description = "Nombre Tarea:",
    disables = False
)

recommendation_list = widgets.Output()

def on_type(data):
  with recommendation_list:
    recommendation_list.clear_output()
    title = data["new"]
    if len(title) > 5:
      results = search(title)
      tarea_id = results.iloc[0] ["Id"]
      display(find_similar_tarea(tarea_id))

tarea_input_name.observe(on_type, names= "value")

display(tarea_input_name, recommendation_list)

Text(value='Hacer ejercicio fisico', description='Nombre Tarea:')

Output()