In [78]:
import numpy as np
import pandas as pd
from sklearn.impute import KNNImputer
from sklearn.preprocessing import MinMaxScaler

# Datos

In [3]:
ratings = pd.read_csv('users_ratings_table.csv')
places = pd.read_csv('base_final_lugares.csv ')

In [19]:
# Formato
ratings = ratings[ratings['rating']!='None']
ratings['rating'] = ratings['rating'].astype('int')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


# Modelo

### Primero se filtra para un usuario

In [6]:
users_list = ratings['reviewer_id'].unique().tolist()

In [26]:
def get_users_ratings(data: pd.DataFrame, user_id):
    data_subset = data[data['reviewer_id']==user_id]
    #Agrupar por place_id
    data_grouped = data_subset.groupby('place_id').agg({'rating':['count', 'mean']}).reset_index()
    data_grouped.columns = data_grouped.columns.map('_'.join).str.strip('_')
    #Poner usuario
    data_grouped['user_id'] = user_id
    
    return data_grouped

In [27]:
users_rating = get_users_ratings(ratings, users_list[0])

### Filtrar para una experiencia

In [51]:
def get_experience_to_analyze(user_data: pd.DataFrame, places_data: pd.DataFrame, experience_type: str):
    places_details =  places_data[['place_id', 'tipo_lugar']]
    #Pegar tipo de lugar a los reviews
    user_data = pd.merge(user_data, places_details, how='left', on='place_id')
    #Filtrar para la experiencia requerida
    user_data_subset = user_data[user_data['tipo_lugar']==experience_type]
    # Tomar la experiencia a analizar
    ## El método por definir 
    user_data_sorted = user_data_subset.sort_values(by=['rating_mean', 'rating_count'], ascending=False)
    top_place = user_data_sorted.head(1)['place_id'].values[0]
    
    return top_place

In [53]:
experience = get_experience_to_analyze(users_rating, places, 'Comida mexicana')

### Obtener lugares similares

In [153]:
def similar_experiences(places_data: pd.DataFrame, experience: str, characteristics: list):
    type_of_experience = places_data[places_data['place_id']==experience]['tipo_lugar'].values[0]
    #Filtrar base de lugares 
    places_data_subset = places_data[places_data['tipo_lugar']==type_of_experience]
    
    #Encontrar los top 3 lugares similares
    ## Filtrar columnas que queremos
    variables = ['place_id']+characteristics
    data = places_data_subset[variables]
    ## Separar label de valores
    label = data[['place_id']].reset_index(drop=True)
    variables = data[characteristics]
    ## Imputar nulos
    ### define imputer
    imputer = KNNImputer(n_neighbors=5, weights='uniform', metric='nan_euclidean')
    ### fit/transform on the dataset
    variables_imputed = imputer.fit_transform(variables)
    ## Escalar valores
    scaler = MinMaxScaler()
    variables_imputed_scaled = scaler.fit_transform(variables_imputed)
    ##Regresar a df
    final_data = pd.DataFrame(variables_imputed_scaled, columns=characteristics)
    final_data = pd.concat([label, final_data], axis=1)
    
    ## Separar experiencia a analizar del dataset
    experience_user = final_data[final_data['place_id']==experience]
    experience_other = final_data[final_data['place_id']!=experience]
    experience_user_variables = experience_user[characteristics]
    experience_other_variables = experience_other[characteristics]
    
    #Calcular la distancia entre el valor del usuario y el resto de experiencias
    dist_list = []

    for _, i in experience_other_variables.iterrows():
        dist = np.linalg.norm(i-np.squeeze(experience_user_variables,0))
        dist_list.append(dist)
    experience_other = experience_other.copy()
    experience_other['similarity'] = dist_list
    #Obtener top 3
    rec = experience_other.sort_values(by='similarity').tail(3)['place_id'].tolist()
    
    return rec

In [154]:
columns_to_consider = ['price_level', 'comida_norm', 'servicio_norm', 'ambiente_norm']
similar_experiences(places, experience, columns_to_consider)

['ChIJs29OZZH40YUR73753MLhEnc',
 'ChIJ0RedUVYCzoURQcJ0Bo5LJew',
 'ChIJ0-IjfKoDzoURrhSoInEmyrs']