## Movie Document
- Title
- Characters
- Cast
- Genres
- Poster
- Runtime
- Revenue
- IMDb Rating
- IMDb Votes

In [1]:
import json 
import sqlite3
# open final.db
conn = sqlite3.connect('final.db')
cursor = conn.cursor()

cursor.execute("Select * from movie")

movies = cursor.fetchall()

movie_list = []

for movie in movies:
    movie_id = movie[0]
    
    # get characters 
    
    cursor.execute("Select name from character where movie_id = ?", (movie_id,))
    characters = cursor.fetchall()
    character_names = [character[0] for character in characters]
    
    # get actors
    cursor.execute(
        "SELECT actor.name FROM actor " +
        "JOIN character ON actor.id = character.actor_id " +
        "WHERE character.movie_id = ?", (movie_id,)
    )
    actors = [actor[0] for actor in cursor.fetchall()]
    
    # get genres
    cursor.execute(
        "SELECT genre.name FROM genre " +
        "JOIN movie_genre ON genre.id = movie_genre.genre_id " +
        "WHERE movie_genre.movie_id = ?", (movie_id,)
    )
    genres = [genre[0] for genre in cursor.fetchall()]
    
    movie_list.append({
        'id': movie_id,
        'title': movie[1],
        'year': movie[2],
        'imdb_rating': movie[3],
        'imdb_votes': movie[4],
        'runtime': movie[5],
        'revenue': movie[6],
        'poster_path': movie[7],
        'characters': character_names,
        'genres': genres,
        'actors': actors
    })
    

movie_json = json.dumps(movie_list)

# put the json into a file
with open('movies.json', 'w') as f:
    f.write(movie_json)


## Conversation Document
- Lines
- Movie
- Character
- Cast
- Previous Dialogue
- Next Dialogue

In [13]:
def create_conversation_dict(id):
    cursor.execute("Select * from conversation where id = ?", (id,))
    
    movie_id = cursor.fetchone()[3]
    
    cursor.execute("Select * from line where conversation_id = ?", (id,))
    lines = []
    for line in cursor.fetchall():
        line_id = line[0]
        character_id = line[1]
        text = line[3]
        
        cursor.execute("Select name from character where id = ?", (character_id,))
        char_name = cursor.fetchone()
        if (char_name == None):
            char_name = ""
        else:
            char_name = char_name[0]
        
        
        cursor.execute("Select actor.name from actor join character on actor.id = character.actor_id where character.id = ?", (character_id,))
        actor_name = cursor.fetchone()
        if (actor_name == None):
            actor_name = ""
        else:
            actor_name = actor_name[0]
        
        lines.append({
            'id': line_id,
            'character': char_name,
            'actor': actor_name,
            'text': text
        })
    
    cursor.execute(
    "SELECT c1.name AS character1, c2.name AS character2 " +
    "FROM conversation AS conv " +
    "JOIN character AS c1 ON conv.first_char_id = c1.id " +
    "JOIN character AS c2 ON conv.second_char_id = c2.id " +
    "WHERE conv.id = ?",
    (id,)
    )
    character_names = cursor.fetchall()
    char1 = character_names[0][0]
    char2 = character_names[0][1]
    
    cursor.execute(
        "Select title from movie where id = ?", (movie_id,)
    )
    movie_title = cursor.fetchone()[0]
    
    # get actors
    actors = []
    for entry in lines:
        actor_name = entry['actor']
        if actor_name not in actors:
            actors.append(entry['actor'])
        else :
            continue  
    
    return {
        'id': id,
        'movie': movie_title,
        'characters' : [char1, char2],
        'lines': lines,
        'actors': actors,
    }
    
    

In [14]:
cursor.execute("Select * from conversation")
conversations = cursor.fetchall()

i = 0

conversation_list = []
for conversation in conversations:
    if i > 500:
        break

    conversation_id = conversation[0]
    movie_id = conversation[3]
    # get lines
    cursor.execute("Select * from line where conversation_id = ?", (conversation_id,))
    lines = []
    for line in cursor.fetchall():
        line_id = line[0]
        character_id = line[1]
        text = line[3]
        
        cursor.execute("Select name from character where id = ?", (character_id,))
        char_name = cursor.fetchone()
        if (char_name == None):
            char_name = ""
        else:
            char_name = char_name[0]
        
        
        cursor.execute("Select actor.name from actor join character on actor.id = character.actor_id where character.id = ?", (character_id,))
        actor_name = cursor.fetchone()
        if (actor_name == None):
            actor_name = ""
        else:
            actor_name = actor_name[0]
        
        lines.append({
            'id': line_id,
            'character': char_name,
            'actor': actor_name,
            'text': text
        })
    
    # get characters
    cursor.execute(
    "SELECT c1.name AS character1, c2.name AS character2 " +
    "FROM conversation AS conv " +
    "JOIN character AS c1 ON conv.first_char_id = c1.id " +
    "JOIN character AS c2 ON conv.second_char_id = c2.id " +
    "WHERE conv.id = ?",
    (conversation_id,)
    )
    character_names = cursor.fetchall()
    char1 = character_names[0][0]
    char2 = character_names[0][1]
    
    # get movie
    cursor.execute(
        "Select title from movie where id = ?", (movie_id,)
    )

    movie_title = cursor.fetchone()[0]
    
    # get actors
    # traverse lines list and get actor names
    actors = []
    for entry in lines:
        actor_name = entry['actor']
        if actor_name not in actors:
            actors.append(entry['actor'])
        else :
            continue  
    
    # get previous conversation
    if conversation_id == 1:
        previous_conversation = None
    else:
        previous_conversation = create_conversation_dict(conversation_id - 1)
    
    # get next conversation
    next_conversation = create_conversation_dict(conversation_id + 1)
        
    
    conversation_list.append({
        'id': conversation_id,
        'lines': lines,
        'characters': [char1, char2],
        'movie': movie_title,
        'actors': actors,
        'previous_dialogue': previous_conversation ,
        'next_dialogue': next_conversation
    })
        
    i += 1

conversation_json = json.dumps(conversation_list)

# put the json into a file
with open('conversations.json', 'w') as f:
    f.write(conversation_json)

## Actor Document

- Name
- Movies
- Characters
- Other Related Actors


In [None]:
cursor.execute("Select * from actor")
actors = cursor.fetchall()

actor_list = []
for actor in actors:
    actor_id = actor[0]
    actor_name = actor[1]
    
    # get characters
    cursor.execute("SELECT character.name FROM character JOIN actor ON character.actor_id = actor.id WHERE actor.id = ?", (actor_id,))
    characters = [character[0] for character in cursor.fetchall()]
    
    # get movies
    movies = []
    for char in characters:
        cursor.execute("SELECT movie.title FROM movie JOIN character ON movie.id = character.movie_id WHERE character.name = ?", (char,))
        movie = cursor.fetchone()
        if movie != None:
            movies.append(movie[0])
        else:
            continue

    # related actors
    related_actors = []
    for movie in movies:
        cursor.execute("SELECT actor.name FROM actor JOIN character ON actor.id = character.actor_id JOIN movie ON character.movie_id = movie.id WHERE movie.title = ?", (movie,))
        actors = [actor[0] for actor in cursor.fetchall()]
        for actor in actors:
            if actor not in related_actors:
                related_actors.append(actor)
            else:
                continue
            
    actor_list.append({
        'id': actor_id,
        'name': actor_name,
        'characters': characters,
        'movies': movies,
        'related_actors': related_actors
    })
    
actor_json = json.dumps(actor_list)

# put the json into a file
with open('actors.json', 'w') as f:
    f.write(actor_json)

