# Convert IMDb CSV Files to Django [Fixture](https://docs.djangoproject.com/en/4.2/topics/db/fixtures/)

In [None]:
# load numpy
import numpy as np

# load pandas
import pandas as pd

In [None]:
# load data
movies_genres_data = pd.read_csv('movies_genres.csv', delimiter='\t')
movies_directors_data = pd.read_csv('movies_directors.csv', delimiter='\t')
movies_data = pd.read_csv('movies.csv', delimiter='\t')
actors_data = pd.read_csv('actors.csv', delimiter='\t')
directors_data = pd.read_csv('directors.csv', delimiter='\t')
roles_data = pd.read_csv('roles.csv', delimiter='\t', keep_default_na=False) # keep NULL as empty string

display(movies_genres_data.head())
display(movies_directors_data.head())
display(movies_data.head())
display(actors_data.head())
display(directors_data.head())
display(roles_data.head())

# Object-oriented Representation of IMDb Data

In [None]:
class Genre:
    pk = 0
    def __init__(self, name):
        self.pk = Genre.pk
        Genre.pk += 1
        self.name = name
    
    def __str__(self):
        return self.genre
        
    def to_json(self, f):
        """ @param f: file object (see https://docs.python.org/3/glossary.html#term-file-object) """
        f.write('{\n')
        f.write('    "model": "movies.genre",\n')
        f.write(f'    "pk": %i,\n'%self.pk)
        f.write('    "fields": {\n')
        f.write(f'        "name": "%s"\n'%self.name)
        f.write('    }\n')
        f.write('}')


class Movie:
    def __init__(self, pk, name, year, rank, genres):
        self.pk = pk
        self.name = name
        self.year = year
        self.rank = rank
        self.genres = genres # list of genre pks
        
    def __str__(self):
        return self.name
        
    def to_json(self, f):
        f.write('{\n')
        f.write(f'    "model": "movies.movie",\n')
        f.write(f'    "pk": %i,\n'%self.pk)
        f.write('    "fields": {\n')
        f.write(f'        "name": "%s",\n'%self.name)
        f.write(f'        "year": %i,\n'%self.year)
        f.write(f'        "rank": %.1f,\n'%self.rank)
        f.write(f'        "genre": [\n')
        for i, g in enumerate(self.genres):
            if i!=(len(self.genres)-1):
                f.write(f'            %i,\n'%g)
            else:
                f.write(f'            %i\n'%g)
        f.write('        ]\n')
        f.write('    }\n')
        f.write('}')
        
            
class Actor:
    def __init__(self, pk, first_name, last_name, gender):
        self.pk = pk
        self.first_name = first_name
        self.last_name = last_name
        self.gender = gender
        
    def __str__(self):
        return self.first_name + ' ' + self.last_name
        
    def to_json(self, f):
        f.write('{\n')
        f.write('    "model": "movies.actor",\n')
        f.write(f'    "pk": %i,\n'%self.pk)
        f.write('    "fields": {\n')
        f.write(f'        "first_name": "%s",\n'%self.first_name)
        f.write(f'        "last_name": "%s",\n'%self.last_name)
        f.write(f'        "gender": "%s"\n'%self.gender)
        f.write('    }\n')
        f.write('}')
        
class Director:
    def __init__(self, pk, first_name, last_name, movies):
        self.pk = pk
        self.first_name = first_name
        self.last_name = last_name
        self.movies = movies
        
    def __str__(self):
        return self.first_name + ' ' + self.last_name
        
    def to_json(self, f):
        f.write('{\n')
        f.write('    "model": "movies.director",\n')
        f.write(f'    "pk": %i,\n'%self.pk)
        f.write('    "fields": {\n')
        f.write(f'        "first_name": "%s",\n'%self.first_name)
        f.write(f'        "last_name": "%s",\n'%self.last_name)
        f.write('        "movies": [\n')
        for i, g in enumerate(self.movies):
            if i!=(len(self.movies)-1):
                f.write(f'            %i,\n'%m.pk)
            else:
                f.write(f'            %i\n'%m.pk)
        f.write('        ]\n')
        f.write('    }\n')
        f.write('}')
        
class DirectorMovies:
    pk = 0
    def __init__(self, director, movie):
        self.pk = DirectorMovies.pk
        DirectorMovies.pk += 1
        self.director = director
        self.movie = movie
        
    def to_json(self, f):
        f.write('{\n')
        f.write('    "model": "movies.director_movies",\n')
        f.write(f'    "pk": %i,\n'%self.pk)
        f.write('    "fields": {\n')
        f.write(f'        "director": %i,\n'%self.director)
        f.write(f'        "movie": %i\n'%self.movie)
        f.write('    }\n')
        f.write('}')
            
class PlayIn:
    pk = 0
    def __init__(self, actor, movie, role):
        self.pk = PlayIn.pk
        PlayIn.pk += 1
        self.actor = actor
        self.movie = movie
        self.role = role

    def __str__(self):
        return self.role
        
    def to_json(self, f):
        f.write('{\n')
        f.write('    "model": "movies.playin",\n')
        f.write(f'    "pk": %i,\n'%self.pk)
        f.write('    "fields": {\n')
        f.write(f'        "actor": "%s",\n'%self.actor)
        f.write(f'        "movie": "%s",\n'%self.movie)
        f.write(f'        "role": "%s"\n'%self.role)
        f.write('    }\n')
        f.write('}')

In [None]:
def data_to_json(data, filename):
    with open(filename, "w") as f:
        f.write("[\n")
        for idx, item in enumerate(data.items()):
            relation, tuples = item
            print("Writing data for %s."%relation)
            for t_idx, t in enumerate(tuples):
                t.to_json(f)
                # check if last entry in dict and last tuple
                if (idx == len(data) - 1) and (t_idx == len(tuples) - 1):
                    f.write('\n')
                else:
                    f.write(',\n')
        f.write("]")

# Convert IMDb csv files to objects

### Genre

In [None]:
genres = [Genre(g) for g in set(movies_genres_data['genre'].unique())]

### Movie

In [None]:
movies = []

# iterate over each row in the csv file
for index, row in movies_data.iterrows():
    # ID
    m_id = row['id']
    # Name
    m_name = row['name']
    # Year
    m_year = row['year']
    # Rank
    m_rank = row['rank']    
    # Genre
    m_genre_strings = movies_genres_data[movies_genres_data.movie_id == m_id]['genre'].tolist() # List of genre strings
    m_genres = []
    for genre_string in m_genre_strings:
        # get corresponding id from `genres` list
        for genre in genres:
            if genre_string == genre.name:
                m_genres.append(genre.pk)
    
    # Create Movie
    m = Movie(m_id, m_name, m_year, m_rank, m_genres)
    movies.append(m)

### Actor

In [None]:
# process actors_in_movies mapping
actors = []
for index, row in actors_data.iterrows():
    # ID
    a_id = row['id']
    # first_name
    a_first_name = row['first_name']
    # last_name
    a_last_name = row['last_name']
    # gender
    a_gender = row['gender']
    
    # Create Actor
    a = Actor(a_id, a_first_name, a_last_name, a_gender)
    actors.append(a)

### Director

In [None]:
# process actors_in_movies mapping
directors = []
for index, row in directors_data.iterrows():
    # ID
    d_id = row['id']
    # first_name
    d_first_name = row['first_name']
    # last_name
    d_last_name = row['last_name']
    # movies
    d_movies = []
    d_movie_ids = movies_directors_data[movies_directors_data.director_id == d_id]['movie_id'].tolist()
        
    # Create Director
    d = Director(d_id, d_first_name, d_last_name, d_movies)
    directors.append(d)

### DirectorMovies

In [None]:
director_movies = []
for index, row in movies_directors_data.iterrows():
    # director_id
    dm_director_id = row['director_id']
    dm_movie_id = row['movie_id']
    
    # Create DirectorMovies
    dm = DirectorMovies(dm_director_id, dm_movie_id)
    director_movies.append(dm)

### PlayIn

In [None]:
playsin = []
for index, row in roles_data.iterrows():
    # actor_id    
    p_actor_id = row['actor_id']
    # movie_id
    p_movie_id = row['movie_id']
    # role
    p_role = row['role'].replace('"','')
    
    # Create PlayIn
    p = PlayIn(p_actor_id, p_movie_id, p_role)
    playsin.append(p)

## Data

In [None]:
data = {
    'Genre': genres,
    'Movie': movies,
    'Actor': actors,
    'Director': directors,
    'DirectorMovies': director_movies,
    'PlayIn':playsin
}

In [None]:
data_to_json(data, 'imdb_data.json')