In [1]:
# Other tools and json
import json

In [2]:
# open the json
with open("C:\\Users\\Elizabeth Spreng\\.secret\\mysql.json", 'r') as f:
    login = json.load(f)
## Display the keys of the loaded dict
login.keys()

dict_keys(['user', 'password'])

In [3]:
## UPDATE THESE VARIABLES TO MATCH YOUR OWN PC/DATABASE
# MySQL Database to export 
DB_NAME = "movies"

# Json file with mysql login credentials
MYSQL_LOGIN = "C:\\Users\\Elizabeth Spreng\\.secret\\mysql.json"
USER_KEY = "user"
PASSWORD_KEY = "password"

## (Optional) - Change folder
folder = "Data-for-Tableau/"

In [4]:
######## CODE TO TEST LOGIN CREDENTIALS
import os, json
os.makedirs(folder, exist_ok=True)

with open(MYSQL_LOGIN) as f:
	login = json.load(f)

if (USER_KEY not in login):
    raise Exception(f"[!] The json file did not have a {USER_KEY} key.")
    
if (PASSWORD_KEY not in login):
    raise Exception(f"[!] The json file did not have a {PASSWORD_KEY} key.")

In [5]:
!pip install pymysql




In [6]:
import pandas as pd
import os
import numpy as np

from sqlalchemy import create_engine
from sqlalchemy_utils import create_database, database_exists

import pymysql
pymysql.install_as_MySQLdb()

In [7]:
connection = f"mysql+pymysql://{login[USER_KEY]}:{login[PASSWORD_KEY]}@localhost/{DB_NAME}"
engine = create_engine(connection)

if database_exists(engine.url):
    print(f"[i] Database {DB_NAME} found.")
else:
    raise Exception(f'[!] Database {DB_NAME} does not exist.')

[i] Database movies found.


In [8]:
q  = """SHOW TABLES;"""
tables = pd.read_sql(q, engine)
tables

Unnamed: 0,Tables_in_movies
0,basics
1,crew
2,genres
3,known_for
4,name_basics
5,new_basics
6,new_genres
7,new_ratings
8,new_title_genres
9,principals


In [9]:
table_names = tables[f'Tables_in_{DB_NAME}'].to_list()
table_names

['basics',
 'crew',
 'genres',
 'known_for',
 'name_basics',
 'new_basics',
 'new_genres',
 'new_ratings',
 'new_title_genres',
 'principals',
 'profession',
 'ratings',
 'title_basics',
 'title_genres',
 'title_profession',
 'tmbd_data']

In [10]:
# Empty containers for new filenames and error messages
errors = {}
new_files = []

dashes = '---'*25
print(dashes,f"    EXPORTING DATABASE ({DB_NAME}) to '{folder}'", 
      dashes, sep='\n')


# Loop through all tables to export
for table in table_names:
    
    try:
        ## Get all data for table and save to csv
        temp = pd.read_sql(f"SELECT * FROM {table}", engine )
        fname = folder+f"{table}.csv"
        temp.to_csv(fname,index=False)
        
        # Save filename and print message
        new_files.append(fname)
        print(f"  - Exported {table} to '{fname}'")

    except Exception as e:
        # Save error message
        errors[table] = e
        print(f"  - [!] Error with '{table}' table")
        

---------------------------------------------------------------------------
    EXPORTING DATABASE (movies) to 'Data-for-Tableau/'
---------------------------------------------------------------------------
  - Exported basics to 'Data-for-Tableau/basics.csv'
  - Exported crew to 'Data-for-Tableau/crew.csv'
  - Exported genres to 'Data-for-Tableau/genres.csv'
  - Exported known_for to 'Data-for-Tableau/known_for.csv'
  - Exported name_basics to 'Data-for-Tableau/name_basics.csv'
  - Exported new_basics to 'Data-for-Tableau/new_basics.csv'
  - Exported new_genres to 'Data-for-Tableau/new_genres.csv'
  - Exported new_ratings to 'Data-for-Tableau/new_ratings.csv'
  - Exported new_title_genres to 'Data-for-Tableau/new_title_genres.csv'
  - Exported principals to 'Data-for-Tableau/principals.csv'
  - Exported profession to 'Data-for-Tableau/profession.csv'
  - Exported ratings to 'Data-for-Tableau/ratings.csv'
  - Exported title_basics to 'Data-for-Tableau/title_basics.csv'
  - Exported tit

In [11]:
## if errors, print out details
if len(errors) > 0:
    print('\n\n[!] ERRORS FOUND DURING EXPORT:')
    for k, v in errors.keys():
        print(f"  - Error for table {k}:   {e}")
        
else:
    print('[i]  No errors. :-)')

[i]  No errors. :-)


In [12]:
## Print preview of exported files.
for file in new_files:
    temp_df = pd.read_csv(file)
    print(dashes, f"[i] Preview of {file}:", dashes, sep='\n')

    display(temp_df.head(), temp_df.tail())

---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/basics.csv:
---------------------------------------------------------------------------


Unnamed: 0,tconst,primaryTitle,startYear,runtimeMinutes
0,tt0035423,Kate & Leopold,2001.0,118
1,tt0062336,The Tango of the Widower and Its Distorting Mi...,2020.0,70
2,tt0069049,The Other Side of the Wind,2018.0,122
3,tt0088751,The Naked Monster,2005.0,100
4,tt0096056,Crime and Punishment,2002.0,126


Unnamed: 0,tconst,primaryTitle,startYear,runtimeMinutes
81486,tt9914942,Life Without Sara Amat,2019.0,74
81487,tt9915872,The Last White Witch,2019.0,97
81488,tt9916170,The Rehearsal,2019.0,51
81489,tt9916190,Safeguard,2020.0,95
81490,tt9916362,Coven,2020.0,92


---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/crew.csv:
---------------------------------------------------------------------------


Unnamed: 0,tconst,director,writer
0,tt0035423,nm0003506,nm0737216
1,tt0035423,nm0003506,nm0003506
2,tt0062336,nm0749914,nm0749914
3,tt0062336,nm0749914,nm1146177
4,tt0062336,nm0765384,nm0749914


Unnamed: 0,tconst,director,writer
301949,tt9916170,nm5412267,nm3245789
301950,tt9916190,nm7308376,nm7308376
301951,tt9916362,nm1893148,nm1893148
301952,tt9916362,nm1893148,nm3471432
301953,tt9916428,nm0910951,


---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/genres.csv:
---------------------------------------------------------------------------


Unnamed: 0,Genre_name,Genre_id
0,Action,0
1,Adult,1
2,Adventure,2
3,Animation,3
4,Biography,4


Unnamed: 0,Genre_name,Genre_id
20,Sport,20
21,Talk-Show,21
22,Thriller,22
23,War,23
24,Western,24


---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/known_for.csv:
---------------------------------------------------------------------------


Unnamed: 0,nconst,titles_split,primaryName
0,nm0000001,tt0072308,Fred Astaire
1,nm0000001,tt0053137,Fred Astaire
2,nm0000001,tt0050419,Fred Astaire
3,nm0000001,tt0031983,Fred Astaire
4,nm0000002,tt0117057,Lauren Bacall


Unnamed: 0,nconst,titles_split,primaryName
1786132,nm9993680,tt21047844,Christopher-Lawson Palmer
1786133,nm9993693,tt8302382,Apsara Rani
1786134,nm9993693,tt13847502,Apsara Rani
1786135,nm9993693,tt12856788,Apsara Rani
1786136,nm9993693,tt8737752,Apsara Rani


---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/name_basics.csv:
---------------------------------------------------------------------------


Unnamed: 0,nconst,primaryName,birthYear,deathYear
0,nm0000001,Fred Astaire,1899.0,1987.0
1,nm0000002,Lauren Bacall,1924.0,2014.0
2,nm0000003,Brigitte Bardot,1934.0,
3,nm0000004,John Belushi,1949.0,1982.0
4,nm0000005,Ingmar Bergman,1918.0,2007.0


Unnamed: 0,nconst,primaryName,birthYear,deathYear
592715,nm9993444,Jason Winter,,
592716,nm9993494,Amjad Ali,,
592717,nm9993616,Ryan Mac Lennan,,
592718,nm9993680,Christopher-Lawson Palmer,,
592719,nm9993693,Apsara Rani,1996.0,


---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/new_basics.csv:
---------------------------------------------------------------------------


Unnamed: 0,tconst,primaryTitle,startYear,runtimeMinutes
0,tt0035423,Kate & Leopold,2001.0,118
1,tt0043139,Life of a Beijing Policeman,2013.0,120
2,tt0062336,The Tango of the Widower and Its Distorting Mi...,2020.0,70
3,tt0069049,The Other Side of the Wind,2018.0,122
4,tt0088751,The Naked Monster,2005.0,100


Unnamed: 0,tconst,primaryTitle,startYear,runtimeMinutes
81757,tt9914942,Life Without Sara Amat,2019.0,74
81758,tt9915872,The Last White Witch,2019.0,97
81759,tt9916170,The Rehearsal,2019.0,51
81760,tt9916190,Safeguard,2020.0,95
81761,tt9916362,Coven,2020.0,92


---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/new_genres.csv:
---------------------------------------------------------------------------


Unnamed: 0,Genre_name,Genre_id
0,Action,0
1,Adult,1
2,Adventure,2
3,Animation,3
4,Biography,4


Unnamed: 0,Genre_name,Genre_id
20,Sport,20
21,Talk-Show,21
22,Thriller,22
23,War,23
24,Western,24


---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/new_ratings.csv:
---------------------------------------------------------------------------


Unnamed: 0,tconst,ordering,nconst,category,job,characters
0,tt0035423,10,nm0107463,editor,,
1,tt0035423,1,nm0000212,actress,,"[""Kate McKay""]"
2,tt0035423,2,nm0413168,actor,,"[""Leopold""]"
3,tt0035423,3,nm0000630,actor,,"[""Stuart Besser""]"
4,tt0035423,4,nm0005227,actor,,"[""Charlie McKay""]"


Unnamed: 0,tconst,ordering,nconst,category,job,characters
1062194,tt9916428,3,nm8594703,actor,,"[""Dr. Hatem""]"
1062195,tt9916428,4,nm0422639,actress,,"[""Song Qingling""]"
1062196,tt9916428,5,nm0910951,director,,
1062197,tt9916428,6,nm8680851,actor,,"[""Powell Editor-in-chief of US newspaper""]"
1062198,tt9916428,7,nm3370295,actor,,"[""Train Station Manager""]"


---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/new_title_genres.csv:
---------------------------------------------------------------------------


Unnamed: 0,tconst,genre_id
0,tt0035423,5
1,tt0035423,9
2,tt0035423,18
3,tt0043139,7
4,tt0043139,11


Unnamed: 0,tconst,genre_id
153211,tt9916190,0
153212,tt9916190,2
153213,tt9916190,22
153214,tt9916362,7
153215,tt9916362,11


---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/principals.csv:
---------------------------------------------------------------------------


Unnamed: 0,tconst,ordering,nconst,category,job,characters
0,tt0035423,10,nm0107463,editor,,
1,tt0035423,1,nm0000212,actress,,"[""Kate McKay""]"
2,tt0035423,2,nm0413168,actor,,"[""Leopold""]"
3,tt0035423,3,nm0000630,actor,,"[""Stuart Besser""]"
4,tt0035423,4,nm0005227,actor,,"[""Charlie McKay""]"


Unnamed: 0,tconst,ordering,nconst,category,job,characters
1062194,tt9916428,3,nm8594703,actor,,"[""Dr. Hatem""]"
1062195,tt9916428,4,nm0422639,actress,,"[""Song Qingling""]"
1062196,tt9916428,5,nm0910951,director,,
1062197,tt9916428,6,nm8680851,actor,,"[""Powell Editor-in-chief of US newspaper""]"
1062198,tt9916428,7,nm3370295,actor,,"[""Train Station Manager""]"


---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/profession.csv:
---------------------------------------------------------------------------


Unnamed: 0,profession_name,profession_id
0,soundtrack,0
1,actor,1
2,miscellaneous,2
3,actress,3
4,music_department,4


Unnamed: 0,profession_name,profession_id
38,assistant,38
39,podcaster,39
40,publicist,40
41,production_department,41
42,electrical_department,42


---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/ratings.csv:
---------------------------------------------------------------------------


Unnamed: 0,tconst,averageRating,numVotes
0,tt0000001,5.7,1980
1,tt0000002,5.8,265
2,tt0000003,6.5,1835
3,tt0000004,5.6,179
4,tt0000005,6.2,2624


Unnamed: 0,tconst,averageRating,numVotes
1319587,tt9916730,8.3,10
1319588,tt9916766,7.0,21
1319589,tt9916778,7.2,36
1319590,tt9916840,7.5,7
1319591,tt9916880,7.0,7


---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/title_basics.csv:
---------------------------------------------------------------------------


Unnamed: 0,tconst,primaryTitle,startYear,runtimeMinutes
0,tt0035423,Kate & Leopold,2001.0,118
1,tt0043139,Life of a Beijing Policeman,2013.0,120
2,tt0062336,The Tango of the Widower and Its Distorting Mi...,2020.0,70
3,tt0069049,The Other Side of the Wind,2018.0,122
4,tt0088751,The Naked Monster,2005.0,100


Unnamed: 0,tconst,primaryTitle,startYear,runtimeMinutes
81757,tt9914942,Life Without Sara Amat,2019.0,74
81758,tt9915872,The Last White Witch,2019.0,97
81759,tt9916170,The Rehearsal,2019.0,51
81760,tt9916190,Safeguard,2020.0,95
81761,tt9916362,Coven,2020.0,92


---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/title_genres.csv:
---------------------------------------------------------------------------


Unnamed: 0,tconst,genre_id
0,tt0035423,5
1,tt0035423,9
2,tt0035423,18
3,tt0043139,7
4,tt0043139,11


Unnamed: 0,tconst,genre_id
153211,tt9916190,0
153212,tt9916190,2
153213,tt9916190,22
153214,tt9916362,7
153215,tt9916362,11


---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/title_profession.csv:
---------------------------------------------------------------------------


Unnamed: 0,nconst,primaryName,profession_split,profession_id
0,nm0000001,Fred Astaire,soundtrack,0
1,nm0000001,Fred Astaire,actor,1
2,nm0000001,Fred Astaire,miscellaneous,2
3,nm0000002,Lauren Bacall,actress,3
4,nm0000002,Lauren Bacall,soundtrack,0


Unnamed: 0,nconst,primaryName,profession_split,profession_id
1186679,nm9993444,Jason Winter,actor,1
1186680,nm9993494,Amjad Ali,writer,5
1186681,nm9993616,Ryan Mac Lennan,actor,1
1186682,nm9993680,Christopher-Lawson Palmer,actor,1
1186683,nm9993693,Apsara Rani,actress,3


---------------------------------------------------------------------------
[i] Preview of Data-for-Tableau/tmbd_data.csv:
---------------------------------------------------------------------------


Unnamed: 0,imdb_id,budget,revenue,certification
0,tt0035423,48000000.0,76019000.0,PG-13
1,tt0113026,10000000.0,0.0,
2,tt0113092,0.0,0.0,
3,tt0114447,0.0,0.0,
4,tt0116391,0.0,0.0,


Unnamed: 0,imdb_id,budget,revenue,certification
2573,tt7802790,0.0,0.0,
2574,tt8665056,0.0,0.0,
2575,tt8795764,0.0,0.0,NR
2576,tt9071078,0.0,0.0,
2577,tt9212730,0.0,0.0,


#### These tables can be  exported to Tableau and Power BI