# Explore here

# **SISTEMA DE RECOMENDACIÓN DE PELÍCULAS CON MODELO KNN**

In [2]:
# Bibliotecas necesarias

# Biblioteca para manipulación e implementación de datos:
import pandas as pd

# Función para dividir un conjunto de datos en dos subconjuntos (entrenamiento y prueba)
from sklearn.model_selection import train_test_split

# Biblioteca para trabajar con bases de datos SQLite:
import sqlite3

# **Carga de datos**

In [3]:
data_movies = pd.read_csv("https://raw.githubusercontent.com/4GeeksAcademy/k-nearest-neighbors-project-tutorial/main/tmdb_5000_movies.csv")
data_credits = pd.read_csv("https://raw.githubusercontent.com/4GeeksAcademy/k-nearest-neighbors-project-tutorial/main/tmdb_5000_credits.csv")


In [4]:
data_credits.head()

Unnamed: 0,movie_id,title,cast,crew
0,19995,Avatar,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,285,Pirates of the Caribbean: At World's End,"[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."
2,206647,Spectre,"[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de..."
3,49026,The Dark Knight Rises,"[{""cast_id"": 2, ""character"": ""Bruce Wayne / Ba...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de..."
4,49529,John Carter,"[{""cast_id"": 5, ""character"": ""John Carter"", ""c...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de..."


In [5]:
data_movies.head(2)

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2009-12-10,2787965087,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2007-05-19,961000000,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500


# **Creación de una base de datos**

- Crea una base de datos para almacenar los dos DataFrames en tablas distintas. 
- A continuación, une las dos tablas con SQL (e intégralo con Python) para generar una tercera tabla que contenga información de ambas unificada. 
- (La clave a través de la cual se puede hacer la unión es el título de la película (`titulo`).)

**Todos estos pasos se realizan porque SQLite es una base de datos ligera y fácil de usar para almacenar datos de forma local**

In [6]:
# Creamos la base de datos llamada: "movies_database"
conn = sqlite3.connect("../data/movies_database.db")

# Creamos las dos tablas en la base de datos
data_movies.to_sql("movies_table", conn, if_exists="replace", index =False)
data_credits.to_sql("credits_table",conn, if_exists = "replace", index = False)

# Unimos las dos tablas generando una tercera
query = """
    SELECT *
    FROM movies_table
    INNER JOIN credits_table
    ON movies_table.title = credits_table.title
"""

# Cargamos los resultados en un dataframe
total_data = pd.read_sql_query(query, conn)

# Cerramos la conexión con la base de datos
conn.close()

total_data.head(2)

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,...,spoken_languages,status,tagline,title,vote_average,vote_count,movie_id,title.1,cast,crew
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...",...,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,19995,Avatar,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...",...,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,285,Pirates of the Caribbean: At World's End,"[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."


In [15]:
# Eliminamos las columnas que no necesitamos:
columnas_a_conservar = ['movie_id', 'title', 'overview', 'genres', 'keywords', 'cast', 'crew']
data_reducido = total_data[columnas_a_conservar]
data_reducido.head(2)


Unnamed: 0,movie_id,title,title.1,overview,genres,keywords,cast,crew
0,19995,Avatar,Avatar,"[In the 22nd century, a paraplegic Marine is d...","[Action, Adventure, Fantasy, Science Fiction]","[culture clash, future, space war, space colon...","[Sam Worthington, Zoe Saldana, Sigourney Weaver]",James Cameron
1,285,Pirates of the Caribbean: At World's End,Pirates of the Caribbean: At World's End,"[Captain Barbossa, long believed to be dead, h...","[Adventure, Fantasy, Action]","[ocean, drug abuse, exotic island, east india ...","[Johnny Depp, Orlando Bloom, Keira Knightley]",Gore Verbinski


In [None]:
# Transforma los datos.
# Como puedes ver, hay algunas columnas con formato JSON(Se encuentran entre corchetes o llaves).
# De cada uno de los JSONs, selecciona el atributo `name` y reemplaza las columnas `genres` y `keywords`. Para la columna `cast`, selecciona los tres primeros nombres.
# Data transform as expected
import json

def load_json_safe(json_str, default_value = None):
    try:
        return json.loads(json_str)
    except (TypeError, json.JSONDecodeError):
        return default_value

data_reducido["genres"] = data_reducido["genres"].apply(lambda x: [item["name"] for item in load_json_safe(x)] if pd.notna(x) else None)
data_reducido["keywords"] = data_reducido["keywords"].apply(lambda x: [item["name"] for item in load_json_safe(x)] if pd.notna(x) else None)

data_reducido["cast"] = data_reducido["cast"].apply(lambda x: [item["name"] for item in load_json_safe(x)][:3] if pd.notna(x) else None)

data_reducido["crew"] = data_reducido["crew"].apply(lambda x: " ".join([crew_member['name'] for crew_member in load_json_safe(x) if crew_member['job'] == 'Director']))

data_reducido["overview"] = data_reducido["overview"].apply(lambda x: [x])

data_reducido.head()





ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()