## Movie Document
- Title
- Characters
- Cast
- Genres
- Poster
- Runtime
- Revenue
- IMDb Rating
- IMDb Votes

In [1]:
import json 
import sqlite3
# open final.db
conn = sqlite3.connect('final.db')
cursor = conn.cursor()

cursor.execute("Select * from movie")

movies = cursor.fetchall()

movie_list = []

i=1

for movie in movies:
    movie_id = movie[0]
    
    # get characters 
    
    cursor.execute("Select name from character where movie_id = ?", (movie_id,))
    characters = cursor.fetchall()
    character_names = [character[0] for character in characters]
    
    # get actors
    cursor.execute(
        "SELECT actor.name FROM actor " +
        "JOIN character ON actor.id = character.actor_id " +
        "WHERE character.movie_id = ?", (movie_id,)
    )
    actors = [actor[0] for actor in cursor.fetchall()]
    
    # get genres
    cursor.execute(
        "SELECT genre.name FROM genre " +
        "JOIN movie_genre ON genre.id = movie_genre.genre_id " +
        "WHERE movie_genre.movie_id = ?", (movie_id,)
    )
    genres = [genre[0] for genre in cursor.fetchall()]
    
    movie_list.append({
        'id': i,
        'title': movie[1],
        'year': movie[2],
        'imdb_rating': movie[3],
        'imdb_votes': movie[4],
        'runtime': movie[5],
        'revenue': movie[6],
        'poster_path': movie[7],
        'characters': character_names,
        'genres': genres,
        'actors': actors
    })
    i += 1
    

movie_json = json.dumps(movie_list)

# put the json into a file
with open('json/movies.json', 'w') as f:
    f.write(movie_json)


## Conversation Document
- Lines
- Movie
- Character
- Cast
- Previous Dialogue
- Next Dialogue

In [2]:
import json 
import sqlite3
# open final.db
conn = sqlite3.connect('final.db')
cursor = conn.cursor()

cursor.execute("Select * from conversation")
conversations = cursor.fetchall()

i = 0
id = 1

action_kill_lines = []

conversation_list = []
conversation_lines_length = {}
for conversation in conversations:

    conversation_id = conversation[0]
    conv_id_to_json = id
    
    movie_id = conversation[3]
    
    
    
    cursor.execute("Select genre.name from movie_genre join genre on movie_genre.genre_id = genre.id where movie_genre.movie_id = ?", (movie_id,))
    genres = cursor.fetchall()
    
    if (genres != None):
        genres = [genre[0] for genre in genres]
    
    cursor.execute("Select year,poster_path, runtime, revenue,imdb_votes, imdb_rating from movie where id = ?", (movie_id,))
    row = cursor.fetchone()
    if (row != None):
        year = row[0]
        poster_path = row[1]
        runtime = row[2]
        revenue = row[3]
        imdb_votes = row[4]
        imdb_rating = row[5]
    
    # get lines
    cursor.execute("Select * from line where conversation_id = ?", (conversation_id,))
    lines = []
    transcript = ""
    for line in cursor.fetchall():
        line_id = line[0]
        character_id = line[1]
        text = line[3]
        
        cursor.execute("Select name from character where id = ?", (character_id,))
        char_name = cursor.fetchone()
        if (char_name == None):
            char_name = ""
        else:
            char_name = char_name[0]
        
        
        cursor.execute("Select actor.name from actor join character on actor.id = character.actor_id where character.id = ?", (character_id,))
        actor_name = cursor.fetchone()
        if (actor_name == None):
            actor_name = ""
        else:
            actor_name = actor_name[0]
            
        id +=1
        lines.append({
            'id': line_id,
            'character': char_name,
            'actor': actor_name,
            'text': text
        })
        if text:
            transcript += text + ' '
    
    conversation_lines_length[conversation_id] = len(lines)
    
    # get characters
    cursor.execute(
    "SELECT c1.name AS character1, c2.name AS character2 " +
    "FROM conversation AS conv " +
    "JOIN character AS c1 ON conv.first_char_id = c1.id " +
    "JOIN character AS c2 ON conv.second_char_id = c2.id " +
    "WHERE conv.id = ?",
    (conversation_id,)
    )
    character_names = cursor.fetchall()
    if (character_names != None):
        char1 = character_names[0][0]
        char2 = character_names[0][1]
    
    # get movie
    cursor.execute(
        "Select title from movie where id = ?", (movie_id,)
    )

    movie_title = cursor.fetchone()[0]
    
    # get actors
    # traverse lines list and get actor names
    actors = []
    for entry in lines:
        actor_name = entry['actor']
        if actor_name not in actors:
            actors.append(entry['actor'])
        else :
            continue  

    
    previous_dialogue_length =0
    if conversation_id > 1:
        previous_dialogue_length = conversation_lines_length[conversation_id - 1]
    
    print("SAVING CONVERSATION: " + str(conversation_id))
    
    conversation_list.append({
        'id': conv_id_to_json,
        'characters': [char1, char2],
        'poster_path': poster_path,
        'movie': movie_title,
        'imdb_rating': imdb_rating,
        'imdb_votes': imdb_votes,
        'runtime': runtime,
        'revenue': revenue,
        'genres': genres,
        'year': year,
        'actors': actors,
        'transcript': transcript,
        'previous_dialogue': conv_id_to_json - previous_dialogue_length - 1 ,
        'next_dialogue': conv_id_to_json + len(lines)+1,
        'content_type':"parentDocument",
        'lines': lines })
        
    i += 1
    id += 1

conversation_json = json.dumps(conversation_list)

# put the json into a file
with open('json/conversations.json', 'w') as f:
    f.write(conversation_json)

SAVING CONVERSATION: 1
SAVING CONVERSATION: 2
SAVING CONVERSATION: 3
SAVING CONVERSATION: 4
SAVING CONVERSATION: 5
SAVING CONVERSATION: 6
SAVING CONVERSATION: 7
SAVING CONVERSATION: 8
SAVING CONVERSATION: 9
SAVING CONVERSATION: 10
SAVING CONVERSATION: 11
SAVING CONVERSATION: 12
SAVING CONVERSATION: 13
SAVING CONVERSATION: 14
SAVING CONVERSATION: 15
SAVING CONVERSATION: 16
SAVING CONVERSATION: 17
SAVING CONVERSATION: 18
SAVING CONVERSATION: 19
SAVING CONVERSATION: 20
SAVING CONVERSATION: 21
SAVING CONVERSATION: 22
SAVING CONVERSATION: 23
SAVING CONVERSATION: 24
SAVING CONVERSATION: 25
SAVING CONVERSATION: 26
SAVING CONVERSATION: 27
SAVING CONVERSATION: 28
SAVING CONVERSATION: 29
SAVING CONVERSATION: 30
SAVING CONVERSATION: 31
SAVING CONVERSATION: 32
SAVING CONVERSATION: 33
SAVING CONVERSATION: 34
SAVING CONVERSATION: 35
SAVING CONVERSATION: 36
SAVING CONVERSATION: 37
SAVING CONVERSATION: 38
SAVING CONVERSATION: 39
SAVING CONVERSATION: 40
SAVING CONVERSATION: 41
SAVING CONVERSATION: 42
S

IndexError: list index out of range

## Actor Document

- Name
- Movies
- Characters
- Other Related Actors


In [8]:
cursor.execute("Select * from actor")
actors = cursor.fetchall()

actor_list = []
i=1
for actor in actors:
    actor_id = actor[0]
    actor_name = actor[1]
    
    # get characters
    cursor.execute("SELECT character.name FROM character JOIN actor ON character.actor_id = actor.id WHERE actor.id = ?", (actor_id,))
    characters = [character[0] for character in cursor.fetchall()]
    
    # get movies
    movies = []
    for char in characters:
        cursor.execute("SELECT movie.title FROM movie JOIN character ON movie.id = character.movie_id WHERE character.name = ?", (char,))
        movie = cursor.fetchone()
        if movie != None:
            movies.append(movie[0])
        else:
            continue

    # related actors
    related_actors = []
    for movie in movies:
        cursor.execute("SELECT actor.name FROM actor JOIN character ON actor.id = character.actor_id JOIN movie ON character.movie_id = movie.id WHERE movie.title = ?", (movie,))
        actors = [actor[0] for actor in cursor.fetchall()]
        for actor in actors:
            if actor not in related_actors:
                related_actors.append(actor)
            else:
                continue
            
    actor_list.append({
        'id': i,
        'name': actor_name,
        'characters': characters,
        'movies': movies,
        'related_actors': related_actors
    })
    i += 1
    
actor_json = json.dumps(actor_list)

# put the json into a file
with open('json/actors.json', 'w') as f:
    f.write(actor_json)



KeyboardInterrupt: 