# 1. Importar librerías

In [2]:
import numpy as np
import pandas as pd
import sqlite3 as sql
from sklearn.preprocessing import MinMaxScaler
from ipywidgets import interact ## para análisis interactivo
from sklearn import neighbors ### basado en contenido un solo producto consumido
import joblib

# 2. Importar datos

In [3]:
conn = sql.connect('data/db_movies')
cur = conn.cursor()

In [4]:
cur.execute('select name from sqlite_master where type = "table"')
cur.fetchall()

[('ratings',), ('movies',), ('movies2',), ('ratings2',), ('movies_rating',)]

# 3. Sistemas basados en popularidad

### 3.1. Top 10 películas más vistas

In [14]:
query = '''
SELECT title,
        avg(rating) AS rating_prom,
        count(*) AS view_num
FROM movies_rating
GROUP BY movieId
ORDER BY view_num DESC
LIMIT 10;
'''
pd.read_sql(query, conn)

Unnamed: 0,title,rating_prom,view_num
0,Forrest Gump,4.164134,329
1,"Shawshank Redemption, The",4.429022,317
2,Pulp Fiction,4.197068,307
3,"Silence of the Lambs, The",4.16129,279
4,"Matrix, The",4.192446,278
5,Star Wars: Episode IV - A New Hope,4.231076,251
6,Jurassic Park,3.75,238
7,Braveheart,4.031646,237
8,Terminator 2: Judgment Day,3.970982,224
9,Schindler's List,4.225,220


### 3.2. 10 películas mejor calificadas (calificadas al menos 20 veces).

In [12]:
query = '''
SELECT title,
        avg(rating) AS rating_prom,
        count(*) AS view_num
FROM movies_rating
GROUP BY movieId
HAVING view_num >= 20
ORDER BY rating_prom DESC
LIMIT 10;
'''
pd.read_sql(query, conn)

Unnamed: 0,title,rating_prom,view_num
0,"Streetcar Named Desire, A",4.475,20
1,"Shawshank Redemption, The",4.429022,317
2,Sunset Blvd.,4.333333,27
3,"Philadelphia Story, The",4.310345,29
4,Lawrence of Arabia,4.3,45
5,In the Name of the Father,4.3,25
6,Hoop Dreams,4.293103,29
7,"Godfather, The",4.289062,192
8,Harold and Maude,4.288462,26
9,Logan,4.28,25


### 3.3. Top 10 películas más vistas por año

In [20]:
query = '''
SELECT DISTINCT year as year
FROM movies_rating
ORDER BY year DESC
'''
years = pd.read_sql(query, conn)

In [88]:
def top10_anio(Year):
    query = '''
    SELECT title,
        avg(rating) AS rating_prom,
        count(*) AS view_num
    FROM movies_rating
    WHERE year = "{}"
    GROUP BY movieId
    ORDER BY view_num DESC
    LIMIT 10;
    '''.format(Year)
    return pd.read_sql(query, conn)

interact(top10_anio, Year = list(years.year))

interactive(children=(Dropdown(description='Year', options=('2018', '2017', '2016', '2015', '2014', '2013', '2…

<function __main__.top10_anio(Year)>

### 3.4. Top 10 películas mejor calificadas por año (calificadas al menos 5 veces).

In [91]:
def top10_rating_anio(Year):
    query = '''
    SELECT title,
        avg(rating) AS rating_prom,
        count(*) AS view_num
    FROM movies_rating
    WHERE year = "{}"
    GROUP BY movieId
    HAVING view_num >= 5
    ORDER BY rating_prom DESC
    LIMIT 10;
    '''.format(Year)
    return pd.read_sql(query, conn)

interact(top10_rating_anio, Year = list(years.year))

interactive(children=(Dropdown(description='Year', options=('2018', '2017', '2016', '2015', '2014', '2013', '2…

<function __main__.top10_rating_anio(Year)>

### 3.5. Top 10 películas más vistas por género

In [95]:
query = '''
SELECT `Género`
FROM genres
'''
genres = pd.read_sql(query, conn)

In [106]:
def top10_views_genre(Genre):
    query = '''
    SELECT title,
            avg(rating) as rating_prom,
            sum({}) as views_num
    FROM movies_rating
    GROUP BY movieId
    ORDER BY views_num DESC
    LIMIT 10;
    '''.format(Genre)
    return pd.read_sql(query, conn)

interact(top10_views_genre, Genre = list(genres['Género']))

interactive(children=(Dropdown(description='Genre', options=('Drama', 'Comedy', 'Action', 'Thriller', 'Adventu…

<function __main__.top10_views_genre(Genre)>

### 3.6. Top 10 películas mejor calificadas por género (calificadas al menos 5 veces).

In [100]:
def top10_rating_genre(Genre):
    query = '''
    SELECT title,
            avg(rating) as rating_prom,
            sum({}) as views_num
    FROM movies_rating
    GROUP BY movieId
    HAVING views_num >= 5
    ORDER BY rating_prom DESC
    LIMIT 10;
    '''.format(Genre)
    return pd.read_sql(query, conn)

interact(top10_rating_genre, Genre = list(genres['Género']))

interactive(children=(Dropdown(description='Genre', options=('Drama', 'Comedy', 'Action', 'Thriller', 'Adventu…

<function __main__.top10_rating_genre(Genre)>