### Preparación de archivos y procesamiento de datos

In [1]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix

In [2]:
data_items = pd.read_csv('BX_Books.csv', sep=';', error_bad_lines=False, encoding='latin-1')
data = pd.read_csv('BX-Book-Ratings.csv', sep=';', error_bad_lines=False, encoding='latin-1')

In [3]:
data.head()

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [4]:
data_items.head()

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton & Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...


In [5]:
books_data = data_items.drop(['Year-Of-Publication','Image-URL-S','Image-URL-M','Image-URL-L'],axis=1)
ratings = books_data.merge(data,on='ISBN')

In [6]:
number_of_ratings = ratings.groupby('ISBN')['Book-Rating'].count().reset_index()
number_of_ratings.rename(columns={'Book-Rating':'Number of Book-Rating'}, inplace=True)
ratings = ratings.merge(number_of_ratings,on='ISBN')

In [7]:
ratings = ratings[ratings['Number of Book-Rating'] >= 30]
ratings.shape

(315065, 7)

In [8]:
ratings.drop_duplicates(['User-ID', 'Book-Title'], inplace=True)

In [9]:
ratings.shape

(313546, 7)

In [10]:
book_pivot=ratings.pivot_table(columns='User-ID',index='Book-Title',values='Book-Rating')
book_pivot.fillna(0, inplace=True)
book_pivot

User-ID,8,9,10,14,16,17,26,32,39,42,...,278831,278832,278836,278838,278843,278844,278846,278849,278851,278854
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'Salem's Lot,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10 Lb. Penalty,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16 Lighthouse Road,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1984,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1st to Die: A Novel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zoya,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"\O\"" Is for Outlaw""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"\Surely You're Joking, Mr. Feynman!\"": Adventures of a Curious Character""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
book_sparse=csr_matrix(book_pivot)

### Modelo

In [12]:
from sklearn.neighbors import NearestNeighbors
k = 10
model=NearestNeighbors(n_neighbors=k,algorithm='brute')
model.fit(book_sparse)

NearestNeighbors(algorithm='brute', n_neighbors=10)

In [13]:
distances,suggestions=model.kneighbors(book_pivot.iloc[253,:].values.reshape(1,-1))

In [14]:
distances

array([[ 0.        , 34.2636834 , 34.7706773 , 35.42597917, 35.58089375,
        35.60898763, 36.26292873, 36.31803959, 36.33180425, 36.35931793]])

In [15]:
suggestions

array([[ 253, 3541, 1180, 2622, 1181, 2845, 3196, 1369, 2333,  637]],
      dtype=int64)

In [16]:
for i in range(len(suggestions)):
    print(book_pivot.index[suggestions[i]])

Index(['Angle of Repose (Contemporary American Fiction)', 'Thornyhold',
       'Golden Cup', 'Tall, Dark, and Deadly', 'Golden Orange',
       'The Curse of the Mummy's Tomb (Goosebumps, No 5)', 'The Midnight Hour',
       'Icon', 'Second Wind', 'Commitments'],
      dtype='object', name='Book-Title')


In [17]:
def reco(book_name):
    book_id=np.where(book_pivot.index==book_name)[0][0]
    distances,suggestions=model.kneighbors(book_pivot.iloc[book_id,:].values.reshape(1,-1))
    
    for i in range(len(suggestions)):
        if i==0:
            print("the suggestions are ",book_name,"are : ")
        if not i:
            print(book_pivot.index[suggestions[i]])

In [18]:
reco('Jurassic Park')

the suggestions are  Jurassic Park are : 
Index(['Jurassic Park', 'McNally's Caper (Archy McNally Novels (Paperback))',
       'GEMINI CONTENDERS', 'Night Mare #06', 'After Dark', 'Golden Cup',
       'Fatal Terrain', 'Night of the Living Dummy (Goosebumps, No 7)',
       'Women in His Life', 'The Satanic Verses'],
      dtype='object', name='Book-Title')


In [19]:
reco("Harry Potter and the Sorcerer's Stone (Book 1)")

the suggestions are  Harry Potter and the Sorcerer's Stone (Book 1) are : 
Index(['Harry Potter and the Sorcerer's Stone (Book 1)', 'Falling Backwards',
       'Golden Cup', 'Fatal Terrain',
       'Attack of the Mutant (Goosebumps, No 25)', 'The Invitation',
       'McNally's Caper (Archy McNally Novels (Paperback))',
       'GEMINI CONTENDERS', 'Hidden Leaves (Debeers)',
       'The Mystery of the Cupboard (Indian in the Cupboard)'],
      dtype='object', name='Book-Title')


In [20]:
reco('The Two Towers (The Lord of the Rings, Part 2)')

the suggestions are  The Two Towers (The Lord of the Rings, Part 2) are : 
Index(['The Two Towers (The Lord of the Rings, Part 2)',
       'The Return of the King (The Lord of the Rings, Part 3)', 'Golden Cup',
       'Tall, Dark, and Deadly', 'Golden Orange',
       'The Curse of the Mummy's Tomb (Goosebumps, No 5)', 'Thornyhold',
       'The Satanic Verses', 'Snagged', 'The Midnight Hour'],
      dtype='object', name='Book-Title')


In [21]:
def recoForInteface(book_name):
  book_id=np.where(book_pivot.index==book_name)[0][0]
  distances,suggestions=model.kneighbors(book_pivot.iloc[book_id,:].values.reshape(1,-1))
  #result = ""
  #result += "the suggestions for "+book_name+" are:"
  result = []
  for i in range(len(suggestions)):
    #if i==0:
    #  result += "the suggestions for "+book_name+" are:"
    #if not i:
    result.append(book_pivot.index[suggestions[i]])
  return result

In [22]:
recoForInteface('Jurassic Park')

[Index(['Jurassic Park', 'McNally's Caper (Archy McNally Novels (Paperback))',
        'GEMINI CONTENDERS', 'Night Mare #06', 'After Dark', 'Golden Cup',
        'Fatal Terrain', 'Night of the Living Dummy (Goosebumps, No 7)',
        'Women in His Life', 'The Satanic Verses'],
       dtype='object', name='Book-Title')]

In [42]:
listaDeTitulos = list(set(ratings["Book-Title"]))
listaDeTitulos

['The Four Agreements: A Practical Guide to Personal Freedom',
 'Walking Across Egypt',
 'Die SÃ?Â¤ulen der Erde. Roman.',
 'When We Were Orphans (Vintage International (Paperback))',
 'Survivor : A Novel',
 'Generation X: Tales for an Accelerated Culture',
 'Dracula (Bantam Classics)',
 "The Liar's Club: A Memoir",
 'Miss Julia Takes over',
 'Monkey Wrench Gang',
 'Mayday',
 'A Morbid Taste for Bones: The First Chronicle of Brother Cadfael',
 'Holy Blood, Holy Grail',
 'ACCORDION CRIMES',
 'Perfume: The Story of a Murderer (Vintage International)',
 'Lost',
 'Deck the Halls (Holiday Classics)',
 'Night',
 'Montana 1948 : Montana 1948',
 'The Least Likely Bride',
 "I'm Not Really Here",
 'Accordion Crimes',
 'The Chamber',
 'Grass',
 'Imajica',
 "Mirror of Her Dreams (Mordant's Need)",
 'Reading Lolita in Tehran: A Memoir in Books',
 'Tishomingo Blues',
 'Cows Of Our Planet (Far Side Series)',
 'The Two Mrs. Grenvilles',
 'From the Heart: Tonight and Always/A Matter of Choice/Endings a

### Interfaz




In [24]:
import plotly.express as px
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output# Load Data

In [43]:

df = px.data.tips()# Build App
app = JupyterDash(__name__)
app.layout = html.Div([
    html.H1("Proyecto Final IA: Grupo 3"),
    html.H2("Sistema de recomendación de libros"),
    dcc.Input(
            id="book",
            type="text",
            placeholder="Input the book tittle",
        ),
    html.Br(),
    html.Div(id="output"),

])

@app.callback(
    Output("output", "children"),
    Input("book", "value"),
)
def update_output(book):
    if book in listaDeTitulos:
      rec = recoForInteface(book)
    else:
      rec = "book not found"
    return u'book: {}'.format(rec)

In [44]:
app.run_server(mode='external')

Dash app running on http://127.0.0.1:8050/


### Deploy

In [34]:
from pyngrok import ngrok

# Open a HTTP tunnel on the default port 80
public_url = ngrok.connect(port = '8050')

In [35]:
public_url

<NgrokTunnel: "http://b05d66b16edb.ngrok.io" -> "http://localhost:80">

In [37]:
ngrok.kill()