# 1. Importar librerías

In [105]:
import numpy as np
import pandas as pd
import sqlite3 as sql
from sklearn.preprocessing import MinMaxScaler
from ipywidgets import interact ## para análisis interactivo
from sklearn import neighbors ### basado en contenido un solo producto consumido
import joblib
from sklearn.preprocessing import MinMaxScaler

# 2. Importar datos

In [7]:
conn = sql.connect('data/db_movies')
cur = conn.cursor()

In [8]:
cur.execute('select name from sqlite_master where type = "table"')
cur.fetchall()

[('ratings',),
 ('movies',),
 ('movies2',),
 ('ratings2',),
 ('movies_rating',),
 ('genres',)]

# 3. Sistemas basados en popularidad

### 3.1. Top 10 películas con más calificaciones

In [9]:
query = '''
SELECT title,
        avg(rating) AS rating_prom,
        count(*) AS view_num
FROM movies_rating
GROUP BY movieId
ORDER BY view_num DESC
LIMIT 10;
'''
pd.read_sql(query, conn)

Unnamed: 0,title,rating_prom,view_num
0,Forrest Gump (1994),4.164134,329
1,"Shawshank Redemption, The (1994)",4.429022,317
2,Pulp Fiction (1994),4.197068,307
3,"Silence of the Lambs, The (1991)",4.16129,279
4,"Matrix, The (1999)",4.192446,278
5,Star Wars: Episode IV - A New Hope (1977),4.231076,251
6,Jurassic Park (1993),3.75,238
7,Braveheart (1995),4.031646,237
8,Terminator 2: Judgment Day (1991),3.970982,224
9,Schindler's List (1993),4.225,220


### 3.2. 10 películas mejor calificadas (calificadas al menos 30 veces).

In [90]:
query = '''
SELECT title,
        avg(rating) AS rating_prom,
        count(*) AS view_num
FROM movies_rating
GROUP BY movieId
HAVING view_num >= 30
ORDER BY rating_prom DESC
LIMIT 10;
'''
pd.read_sql(query, conn)

Unnamed: 0,title,rating_prom,view_num
0,"Shawshank Redemption, The (1994)",4.429022,317
1,Lawrence of Arabia (1962),4.3,45
2,"Godfather, The (1972)",4.289062,192
3,Fight Club (1999),4.272936,218
4,Cool Hand Luke (1967),4.27193,57
5,Dr. Strangelove or: How I Learned to Stop Worr...,4.268041,97
6,Rear Window (1954),4.261905,84
7,"Godfather: Part II, The (1974)",4.25969,129
8,"Departed, The (2006)",4.252336,107
9,"Manchurian Candidate, The (1962)",4.25,30


### 3.3. Top 10 películas más calificadas por decada

In [52]:
query = '''
SELECT DISTINCT year as year
FROM movies_rating
ORDER BY year DESC
'''
years = pd.read_sql(query, conn)

In [75]:
decadas = [str(d) + '-' + str(d + 10) for d in range(1900,2020, 10)]
years['decada'] = pd.cut(years.year.astype(int), len(decadas), labels = decadas)

In [78]:
def top10_dec(Decada):
    d = Decada.split('-')
    query = '''
    SELECT title,
        avg(rating) AS rating_prom,
        count(*) AS view_num
    FROM movies_rating
    WHERE year >= "{}" and year < "{}"
    GROUP BY movieId
    ORDER BY view_num DESC
    LIMIT 10;
    '''.format(d[0], d[1])
    return pd.read_sql(query, conn)

interact(top10_dec, Decada = decadas)

interactive(children=(Dropdown(description='Decada', options=('1900-1910', '1910-1920', '1920-1930', '1930-194…

<function __main__.top10_dec(Decada)>

### 3.4. Top 10 películas mejor calificadas por decada (calificadas al menos 30 veces).

In [89]:
def top10_rating_dec(Decada):
    d = Decada.split('-')
    query = '''
    SELECT title,
        avg(rating) AS rating_prom,
        count(*) AS view_num
    FROM movies_rating
    WHERE year >= "{}" and year < "{}"
    GROUP BY movieId
    HAVING view_num >= 30
    ORDER BY rating_prom DESC
    LIMIT 10;
    '''.format(d[0], d[1])
    return pd.read_sql(query, conn)

interact(top10_rating_dec, Decada = decadas)

interactive(children=(Dropdown(description='Decada', options=('1900-1910', '1910-1920', '1920-1930', '1930-194…

<function __main__.top10_rating_dec(Decada)>

### 3.5. Top 10 películas más calificadas por género

In [81]:
query = '''
SELECT `Género`
FROM genres
'''
genres = pd.read_sql(query, conn)

In [82]:
def top10_views_genre(Genre):
    query = '''
    SELECT title,
            avg(rating) as rating_prom,
            sum({}) as views_num
    FROM movies_rating
    GROUP BY movieId
    ORDER BY views_num DESC
    LIMIT 10;
    '''.format(Genre)
    return pd.read_sql(query, conn)

interact(top10_views_genre, Genre = list(genres['Género']))

interactive(children=(Dropdown(description='Genre', options=('Drama', 'Comedy', 'Action', 'Thriller', 'Adventu…

<function __main__.top10_views_genre(Genre)>

### 3.6. Top 10 películas mejor calificadas por género (calificadas al menos 30 veces).

In [87]:
def top10_rating_genre(Genre):
    query = '''
    SELECT title,
            avg(rating) as rating_prom,
            sum({}) as views_num
    FROM movies_rating
    GROUP BY movieId
    HAVING views_num >= 30
    ORDER BY rating_prom DESC
    LIMIT 10;
    '''.format(Genre)
    return pd.read_sql(query, conn)

interact(top10_rating_genre, Genre = list(genres['Género']))

interactive(children=(Dropdown(description='Genre', options=('Drama', 'Comedy', 'Action', 'Thriller', 'Adventu…

<function __main__.top10_rating_genre(Genre)>

# 4. Sistema de recomendación basado en contenido

### 4.1. KNN una sola película vista.

Importar base de datos solo de películas

In [97]:
movies = pd.read_sql('SELECT * FROM movies2;', conn)

Escalar la variable año

In [107]:
sc = MinMaxScaler()
movies_std = movies.drop(['movieId', 'title'], axis = 1)
movies_std[['year']] = sc.fit_transform(movies_std[['year']])

Modelo con 11 vecinos más cercanos

In [109]:
model = neighbors.NearestNeighbors(n_neighbors = 11, metric='cosine')
model.fit(movies_std)
dist, idlist = model.kneighbors(movies_std)

distancias = pd.DataFrame(dist)
id_list = pd.DataFrame(idlist)

Sistema de recomendación

In [139]:
def BookRecommender(movies_name = np.sort(list(movies['title'].value_counts().index))):
    movies_list_name = []
    movies_id = movies[movies['title'] == movies_name].index
    movies_id = movies_id[0]
    for newid in idlist[movies_id]:
        movies_list_name.append(movies.loc[newid].title)
    df = pd.DataFrame()
    df['Movie'] = movies_list_name
    df2 = df.drop(df[df['Movie'] == movies.loc[movies_id].title].index[0])
    return df2


print(interact(BookRecommender))

interactive(children=(Dropdown(description='movies_name', options=("'71 (2014)", "'Hellboy': The Seeds of Crea…

<function BookRecommender at 0x000002A646851800>
