In [81]:
from flask import Flask, render_template, request, jsonify
from flaskext.mysql import MySQL
from surprise import dump, KNNBasic
from tqdm import tqdm

from rec_utils import *
from db_helper import DbHelper

import pickle
import pymysql  # mysql error handling
import functools
import itertools
import json
import random

import numpy as np
import pandas as pd

In [2]:
app = Flask(__name__)
app.config.from_pyfile('config.py')

mysql = MySQL()
mysql.init_app(app)

conn = mysql.connect()
cursor = conn.cursor()

In [82]:
db = DbHelper(conn, cursor)

## Registro

In [123]:
def register(socio):
    '''
    Args: 
        Socio es un diccionario con atributos 
        ('apPaterno', 'apMaterno', 'nombre', 'edad', 'genero', 'email', 'passwd')
    
    Return:
        Diccionario donde el valor para la clave 'email_used' es True si ya existe un email por ingresar, 
        valor para success True en caso de hacer el registro correctamente.
    '''
    res = {'email_used': False, 'success': False}
    try:
        args_order = ('apPaterno', 'apMaterno', 'nombre', 'edad', 'genero', 'email', 'passwd')
        args = tuple(socio[arg] for arg in args_order)
        cursor.callproc('insert_socio', args)
        res['success'] = True
    except pymysql.err.IntegrityError as err:
        if 'c_uniq_email_passwd' in str(err):
            res['email_used'] = True
        print(err)
    finally:
        conn.commit()    
        return res

In [84]:
socio = {"apPaterno": "Mendoza", "apMaterno": "Hernandez", "nombre": "Alma", 
        "edad": 22, "genero": "F", "email": "almis@gmail.com", "passwd": "frio"}
db.register(socio)

{'email_used': False, 'success': True}

In [85]:
db.register(socio)

(1062, "Duplicate entry 'almis@gmail.com' for key 'socio.c_uniq_email_passwd'")


{'email_used': True, 'success': False}

In [86]:
socio = {"apPaterno": "Fragoso", "apMaterno": "Hernandez", "nombre": "Abigail", 
        "edad": 19, "genero": "F", "email": "almis@gmail.com", "passwd": "frio"}
db.register(socio)

(1062, "Duplicate entry 'almis@gmail.com' for key 'socio.c_uniq_email_passwd'")


{'email_used': True, 'success': False}

## Login

In [87]:
email, passwd = 'almis@gmail.com', 'frio'
db.login(email, passwd)

{'email_found': True,
 'correct_passwd': True,
 'idSocio': 'cbf632d90c51d0783942'}

In [88]:
passwd = 'incorrecto'
db.login(email, passwd)

{'email_found': True, 'correct_passwd': False, 'idSocio': False}

In [89]:
email = 'noexiste@gmail.com'
db.login(email, 'falsooo')

{'email_found': False, 'correct_passwd': False, 'idSocio': False}

## product info

In [16]:
raw_iid = 'B005FIWTHO'
get_product_info(raw_iid)

{'idProducto': 'B005FIWTHO',
 'nombre': 'Quicken Premier 2012',
 'marca': 'Intuit',
 'precioUnitario': 44.96298689479438,
 'idSubCat': 17}

In [17]:
raw_iids = ['B005FIWTHO','B0053WX3AY','B00EZPXYP4','B00BFNCFZ4',
            'B0013QQWQG','B0123C60EW','B015724RQI','B000HCZ8EO',
            'B00LC9UU6C','B000X86ZAS']
get_products_info(raw_iids)

SELECT idProducto, nombre, marca, precioUnitario, idSubCat FROM producto 
                    WHERE idProducto in (
                            "B005FIWTHO", "B0053WX3AY", "B00EZPXYP4", "B00BFNCFZ4", "B0013QQWQG", "B0123C60EW", "B015724RQI", "B000HCZ8EO", "B00LC9UU6C", "B000X86ZAS")


[{'idProducto': 'B000HCZ8EO',
  'nombre': 'Microsoft Office Home and Student 2007 [Old Version]',
  'marca': 'Microsoft',
  'precioUnitario': 149.99,
  'idSubCat': 17},
 {'idProducto': 'B000X86ZAS',
  'nombre': 'Microsoft Office 2008 for Mac Home &amp; Student Edition [Old Version]',
  'marca': 'Microsoft',
  'precioUnitario': 3.0,
  'idSubCat': 17},
 {'idProducto': 'B0013QQWQG',
  'nombre': 'Office 97 Professional With Bookshelf Basics',
  'marca': 'Microsoft',
  'precioUnitario': 44.96298689479438,
  'idSubCat': 17},
 {'idProducto': 'B0053WX3AY',
  'nombre': 'Dragon NaturallySpeaking Home, European Version 11.5',
  'marca': 'Nuance Communications, Inc.',
  'precioUnitario': 44.96298689479438,
  'idSubCat': 17},
 {'idProducto': 'B005FIWTHO',
  'nombre': 'Quicken Premier 2012',
  'marca': 'Intuit',
  'precioUnitario': 44.96298689479438,
  'idSubCat': 17},
 {'idProducto': 'B00BFNCFZ4',
  'nombre': 'VideoStudio Pro X6 [OLD VERSION]',
  'marca': 'Corel',
  'precioUnitario': 19.95,
  'idSu

In [18]:
raw_iids = ['B000SAUFBG', 'B0013WO6QS', 'B004E9SKF0', 'B005EJ2J1K',
       'B00C79J0YE', 'B00005ICD3', 'B00008CQPH']
get_products_info(raw_iids)

SELECT idProducto, nombre, marca, precioUnitario, idSubCat FROM producto 
                    WHERE idProducto in (
                            "B000SAUFBG", "B0013WO6QS", "B004E9SKF0", "B005EJ2J1K", "B00C79J0YE", "B00005ICD3", "B00008CQPH")


[{'idProducto': 'B00005ICD3',
  'nombre': 'Rosetta Stone V2: Hebrew Level 1 [OLD VERSION]',
  'marca': 'Rosetta Stone',
  'precioUnitario': 44.96298689479438,
  'idSubCat': 17},
 {'idProducto': 'B00008CQPH',
  'nombre': 'VCOM Partition Commander 8.0',
  'marca': 'V Communications',
  'precioUnitario': 44.96298689479438,
  'idSubCat': 17},
 {'idProducto': 'B000SAUFBG',
  'nombre': 'Partition Commander 10',
  'marca': 'Avanquest',
  'precioUnitario': 44.8,
  'idSubCat': 17},
 {'idProducto': 'B0013WO6QS',
  'nombre': 'Norton 360 Version 2.0 Premier Edition [Old Version]',
  'marca': 'Symantec',
  'precioUnitario': 44.96298689479438,
  'idSubCat': 17},
 {'idProducto': 'B004E9SKF0',
  'nombre': 'Microsoft Office Home &amp; Business 2010 - 1 User-2 PC [Download]',
  'marca': 'Microsoft',
  'precioUnitario': 44.96298689479438,
  'idSubCat': 17},
 {'idProducto': 'B005EJ2J1K',
  'nombre': 'Bitdefender AntiVirus Plus 2012 Standard M2 - 3Pc/1 Year [Old Version]',
  'marca': 'Bitdefender',
  'prec

## Recomendaciones 

In [62]:
# load the algo
# file_name = 'algo.model'
# wdir = './model/'
# _, algo = dump.load(wdir + file_name)

# config to keep same experiments
my_seed = 0
random.seed(my_seed)
np.random.seed(my_seed)

In [49]:
# get an instance of the algo
sim_options = {'name': 'pearson', 'user_based': False}
algo = KNNBasic(sim_options=sim_options)
# %time algo.fit(trainset=trainset)
# preds = algo.test(testset)

In [63]:


# load csv to build trainset, required to recommend
cols = ['reviewerID', 'asin', 'overall']
df_reviews = pd.read_csv('./model/software_reviews.csv')
trainset, testset = train_test_from_df(df_reviews, cols, test_size=0.2)

# train the model
algo.fit(trainset)
sims = algo.compute_similarities()

Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.


In [92]:
raw_uid = testset[0][0]
raw_iids = get_top_item_based(algo, raw_uid, trainset, sims)  # if raw_id not in trainset it raises error
raw_uid, raw_iids

('A2JSNRJV7DL5JZ',
 ['B005FIWTHO',
  'B0053WX3AY',
  'B00EZPXYP4',
  'B00BFNCFZ4',
  'B0013QQWQG',
  'B0123C60EW',
  'B015724RQI',
  'B000HCZ8EO',
  'B00LC9UU6C',
  'B000X86ZAS'])

If there's no uid in trainset ValueError is raised.

In [76]:
not_in_trainset = []
for uid in tqdm(map(lambda x: x[0], testset), total=len(testset)):
    try:
        get_top_item_based(algo, uid, trainset, sims)
    except ValueError as e:
        not_in_trainset.append(uid)


100%|██████████| 4378/4378 [00:47<00:00, 91.41it/s] 


In [66]:
len(not_in_trainset)

11

In [77]:
uid = testset[1][0]
uid_ratings = df_reviews[df_reviews.reviewerID == uid].asin.values
get_products_info(uid_ratings)

[{'idProducto': 'B000PIJT6I',
  'nombre': '3D Home Architect Home &amp; Landscape Design [Old Version]',
  'marca': 'Encore',
  'precioUnitario': 44.96298689479438,
  'idSubCat': 17},
 {'idProducto': 'B00597EEIS',
  'nombre': 'PDF Fusion [Download]',
  'marca': 'Corel',
  'precioUnitario': 49.99,
  'idSubCat': 17},
 {'idProducto': 'B00EDWBAUK',
  'nombre': 'Paragon Migrate OS to SSD 3.0 [Download]',
  'marca': 'Paragon Software Group',
  'precioUnitario': 19.95,
  'idSubCat': 17},
 {'idProducto': 'B00MUY6KY4',
  'nombre': 'Trend Micro Maximum Security 2015 - 3 Devices  [OLD VERSION]',
  'marca': 'Trend Micro',
  'precioUnitario': 44.96298689479438,
  'idSubCat': 17},
 {'idProducto': 'B012KPMWZI',
  'nombre': 'WinX DVD Ripper Platinum V7 [Download]',
  'marca': 'Digiarty Software',
  'precioUnitario': 44.96298689479438,
  'idSubCat': 17}]

In [78]:
iid_recs = get_top_item_based(algo, uid, trainset, sims)  # if raw_id not in trainset it raises error
get_products_info(iid_recs)

[{'idProducto': 'B000Q6ZK3K',
  'nombre': 'Microsoft Works 9.0',
  'marca': 'Microsoft',
  'precioUnitario': 44.96298689479438,
  'idSubCat': 17},
 {'idProducto': 'B001LQO4P4',
  'nombre': ' Roxio Easy VHS to DVD [Old Version]" />',
  'marca': 'Roxio',
  'precioUnitario': 49.68,
  'idSubCat': 17},
 {'idProducto': 'B001TYYZCA',
  'nombre': 'Corel Painter 11 [OLD VERSION]',
  'marca': 'Corel',
  'precioUnitario': 44.96298689479438,
  'idSubCat': 17},
 {'idProducto': 'B002DHLUWK',
  'nombre': 'Microsoft Windows 7 Home Premium Upgrade [Old Version]',
  'marca': 'Microsoft',
  'precioUnitario': 218.83,
  'idSubCat': 17},
 {'idProducto': 'B004Y46PAC',
  'nombre': 'CyberLink PowerDVD 11 Ultra V.11',
  'marca': 'Cyberlink',
  'precioUnitario': 44.96298689479438,
  'idSubCat': 17},
 {'idProducto': 'B00EDSI7QO',
  'nombre': 'Microsoft Windows 8.1 - Full Version',
  'marca': 'Microsoft',
  'precioUnitario': 165.99,
  'idSubCat': 17},
 {'idProducto': 'B00LC9UU6C',
  'nombre': 'Kaspersky Internet S

In [90]:
def get_recommends(uid):
    res = {'was_possible': False}
    try:
        iid_recs = get_top_item_based(algo, uid, trainset, sims)  # if raw_id not in trainset it raises error
        res['products'] = db.get_products_info(iid_recs)
        res['was_possible'] = True
    except ValueError as e:
        pass
    return res
    

In [91]:
get_recommends(uid)

{'was_possible': True,
 'products': [{'idProducto': 'B000Q6ZK3K',
   'nombre': 'Microsoft Works 9.0',
   'marca': 'Microsoft',
   'precioUnitario': 44.96298689479438,
   'idSubCat': 17},
  {'idProducto': 'B001LQO4P4',
   'nombre': ' Roxio Easy VHS to DVD [Old Version]" />',
   'marca': 'Roxio',
   'precioUnitario': 49.68,
   'idSubCat': 17},
  {'idProducto': 'B001TYYZCA',
   'nombre': 'Corel Painter 11 [OLD VERSION]',
   'marca': 'Corel',
   'precioUnitario': 44.96298689479438,
   'idSubCat': 17},
  {'idProducto': 'B002DHLUWK',
   'nombre': 'Microsoft Windows 7 Home Premium Upgrade [Old Version]',
   'marca': 'Microsoft',
   'precioUnitario': 218.83,
   'idSubCat': 17},
  {'idProducto': 'B004Y46PAC',
   'nombre': 'CyberLink PowerDVD 11 Ultra V.11',
   'marca': 'Cyberlink',
   'precioUnitario': 44.96298689479438,
   'idSubCat': 17},
  {'idProducto': 'B00EDSI7QO',
   'nombre': 'Microsoft Windows 8.1 - Full Version',
   'marca': 'Microsoft',
   'precioUnitario': 165.99,
   'idSubCat': 17}