# From Eric's Inital Data Import:

In [None]:
#dependencies
import pandas as pd
import json

In [None]:
movies = pd.read_csv("Resources/tmdb_5000_movies.csv")
movies.head()

In [None]:
en_movies = movies[movies["original_language"]=="en"]
en_movies.head()

In [None]:
credits = pd.read_csv("Resources/tmdb_5000_credits.csv")
credits.head()

# Create Genres Dataframe

In [None]:
# Create sets to hold ids and names
ids_names = set()


# Loop through genres rows and update sets
for row in en_movies['genres']:
    genres = json.loads(row)
    ids_names.update((i['id'], i['name']) for i in genres)


# Print ids and genre lists
print(ids_names)


In [None]:
# Create Genres Dataframe
genres_df = pd.DataFrame(columns=['genre_id', 'genre'], data=(ids_names))

# Convert ids to integers
genres_df['genre_id']=genres_df['genre_id'].astype(int)

# Save as csv
genres_df.to_csv('csv_outputs\genres.csv', index=False)

genres_df

### Using emoji Library with Genres DF

In [None]:
# A new library just for fun!
import emoji 
# New DF
emoji_genres=genres_df.copy()
# Create a dictionary emoji aliases
emoji_dic = {'Horror':':face_screaming_in_fear:', 'Science Fiction':':alien:', 'Foreign':':globe_showing_europe_africa:', 'Family':':family_man_woman_girl_boy:', 'Action':':person_fencing:', 'Music':':musical_notes:', 'War':':military_medal:', 'Comedy':':rolling_on_the_floor_laughing:', 'Romance':':sparkling_heart:', 'Drama':':performing_arts:', 'Thriller':':bomb:', 'Adventure':':person_climbing:', 'TV Movie':':television:', 'Documentary':':video_camera:', 'Crime':':supervillain:', 'History':':classical_building:', 'Animation':':unicorn:', 'Fantasy':':troll:', 'Mystery':':detective:', 'Western':':cowboy_hat_face:'}
# Create a DF of emoji aliases
emoji_df= pd.DataFrame(list(emoji_dic.items()), columns=['genre', 'alias'])
# Merge DFs
emoji_genres=pd.merge(emoji_genres, emoji_df, on='genre', how='left')
# Create emojis
emoji_genres['genres_emoji'] = [emoji.emojize(x, language='alias') for x in emoji_genres['alias']]
# Save as csv
emoji_genres.to_csv('csv_outputs\emoji_genres.csv', index=False)

emoji_genres

# Create Cast Dataframe

In [None]:
# Create set to hold ids and actors
ids_cast = set()


# Loop through cast rows and update set
for row in credits['cast']:
    actor = json.loads(row)
    ids_cast.update((i['id'], i['name']) for i in actor)



# Print ids and actors lists
print(ids_cast)

In [None]:
# Create Actors Dataframe
cast_df = pd.DataFrame(columns=['actor_id', 'actor'], data=(ids_cast))

# Convert ids to integers
cast_df['actor_id']=cast_df['actor_id'].astype(int)

# Save as csv
cast_df.to_csv('csv_outputs\cast.csv', index=False)

cast_df.head(10)

# Create Directors Dataframe

In [None]:
# Create set to hold ids and directors
ids_director = set()

# Loop through crew rows and update set
for row in credits['crew']:
    directors = json.loads(row)
         
# Loop through crew members
    for crew_member in directors:
    # Find members with 'job':'director'
        if crew_member.get('job') == "Director":
        # If Director, update set
            ids_director.add((crew_member['id'], crew_member['name']))


# Print the set containing director ids and names
print(ids_director)

In [None]:
# Create Director Dataframe
director_df = pd.DataFrame(columns=['director_id', 'director'], data=(ids_director))

# Convert ids to integers
director_df['director_id']=director_df['director_id'].astype(int)

# Save as csv
director_df.to_csv('csv_outputs\directors.csv', index=False)

director_df.head(10)

# Create Keyword Dataframe

In [None]:
# Create sets to hold ids and names
ids_keyword = set()

# Loop through genres rows and update sets
for row in en_movies['keywords']:
    keywords = json.loads(row)
    ids_keyword.update((i['id'], i['name']) for i in keywords)


# Print ids and genre lists
print(ids_keyword)

In [None]:
# Create keywords Dataframe
keywords_df = pd.DataFrame(columns=['kw_id', 'keyword'], data=(ids_keyword))

# Convert ids to integers
keywords_df['kw_id']=keywords_df['kw_id'].astype(int)

# Save as csv
keywords_df.to_csv('csv_outputs\keywords.csv', index=False)

keywords_df

In [None]:
# create df of movie ids and keywords
movie_keywords = en_movies[["id", "keywords"]]

# Create empty dataframe
movie_keywords_df = pd.DataFrame()

for index, row in movie_keywords.iterrows():
    # Get all keywords into a list
    data = row['keywords']
        
    # Get movie id
    movie_id = row['id']
        
    # Get separate lists for id of keywords
    kw_id = [sub['id'] for sub in data_list]
    
    # Combine lists into data frame
    movieid_kws = pd.DataFrame({'movie_id': movie_id, 'kw_id': kw_id})
    
    # Concatenate to main DataFrame
    movie_keywords_df = pd.concat([movie_keywords_df, movieid_kws], ignore_index=True)

# Convert ids to integers
movie_keywords_df['kw_id']=movie_keywords_df['kw_id'].astype(int)

# Save as csv
movie_keywords_df.to_csv('csv_outputs\movieids_keywords.csv', index=False)
    
# Check first 25 rows of the DataFrame
movie_keywords_df.head(25)

# Create Movies Dataframe

In [None]:
# Identify which columns are needed
data = {'movie_id':en_movies['id'], 'title':en_movies['title'], 'release_date_str':en_movies['release_date'], 'revenue':en_movies['revenue'], 'tagline':en_movies['tagline'], 'average_vote':en_movies['vote_average']}

# Create Director Dataframe
movies_df = pd.DataFrame(data=(data))

# Convert revenue into dollars

# Convert ids to integers
movies_df['movie_id']=movies_df['movie_id'].astype(int)

# Convert release date column to datetime type
movies_df['release_date'] = pd.to_datetime(movies_df['release_date_str'])

# Drop the original release date column
movies_df.drop(columns=['release_date_str'], inplace=True)

# Save as csv
movies_df.to_csv('csv_outputs\movies.csv', index=False)

movies_df.head(10)

# movies_df.dtypes