In [2]:
# Python 3 - MySQL Database Access Tutorial:
# https://www.tutorialspoint.com/python3/python_database_access.htm
# Inserting into a MySQL Database Tutorial:
# https://pymysql.readthedocs.io/en/latest/user/examples.html
# Reading MySQL data into a Pandas DataFrame
# https://pythondata.com/quick-tip-sqlalchemy-for-mysql-and-pandas/

import pymysql
import pandas as pd
import sqlalchemy as sql
import os

# Create connection variables
DB_USER = os.environ.get('DB_USER')
DB_PASS = os.environ.get('DB_PASS')
DB_HOST = 'localhost'
DB_PORT = 3306
DATABASE = 'movies'

# Connect to the database for pymysql connection
connection = pymysql.connect(host=DB_HOST,
                             user=os.environ.get('DB_USER'),
                             password=os.environ.get('DB_PASS'),
                             db=DATABASE,
                             charset='utf8mb4',
                             cursorclass=pymysql.cursors.DictCursor)

# Create connection string for sqlalchemy
connect_string = 'mysql+pymysql://{}:{}@{}:{}/{}?charset=utf8'.format(DB_USER, DB_PASS, DB_HOST, DB_PORT, DATABASE)

# To setup the persistent connection, you do the following:
sql_engine = sql.create_engine(connect_string)

In [34]:
# Write query
query = "SELECT * FROM BoxOfficeMojo"
# Read SQL query into a DataFrame
dfSQL = pd.read_sql_query(query, sql_engine)

dfSQL

Unnamed: 0,id,Title,Distributor,Genre,MPAARating,ProductionBudget,ReleaseDate,Runtime,DomesticGross,ForeignGross,WorldwideGross,OpeningWeekendGross,OpeningWeekendTheaters,WidestTheaters,Genres,URL
0,1,Black Panther,Buena Vista,Action / Adventure,PG-13,,16-Feb-18,2 hrs. 20 min.,"$700,059,566","$646,845,310","$1,346,904,876","$202,003,951","4,020 theaters","4,084 theaters","['3D', 'Comic Book Adaptation', 'IMAX (Feature...",https://www.boxofficemojo.com/movies/?id=marve...
1,2,Incredibles 2,Buena Vista,Animation,PG,,15-Jun-18,1 hrs. 58 min.,"$606,917,087","$614,200,000","$1,221,117,087","$182,687,905","4,410 theaters","4,410 theaters","['3D', 'Animation', 'Animation - Computer', 'I...",https://www.boxofficemojo.com/movies/?id=thein...
2,3,Deadpool 2,Fox,Action,R,$110 million,18-May-18,1 hrs. 59 min.,"$318,489,015","$415,754,495","$734,243,510","$125,507,153","4,349 theaters","4,349 theaters","['Comedy - Sequel (Live Action)', 'Superhero -...",https://www.boxofficemojo.com/movies/?id=foxma...
3,4,Ant-Man and the Wasp,Buena Vista,Action,PG-13,,6-Jul-18,1 hrs. 58 min.,"$216,368,826","$405,246,936","$621,615,762","$75,812,205","4,206 theaters","4,206 theaters",[],https://www.boxofficemojo.com/movies/?id=ant-m...
4,5,A Quiet Place,Paramount,Horror Thriller,PG-13,$17 million,6-Apr-18,1 hrs. 35 min.,"$188,024,361","$146,500,000","$334,524,361","$50,203,562","3,508 theaters","3,808 theaters","['Horror - Supernatural', 'Off-Screen Couples ...",https://www.boxofficemojo.com/movies/?id=aquie...
5,6,Hotel Transylvania 3:Summer Vacation,Sony / Columbia,Animation,PG,$80 million,13-Jul-18,1 hrs. 37 min.,"$166,505,612","$346,700,000","$513,205,612","$44,076,225","4,267 theaters","4,267 theaters",['Vampire'],https://www.boxofficemojo.com/movies/?id=hotel...
6,7,Ocean's 8,Warner Bros.,Action / Adventure,PG-13,$70 million,8-Jun-18,1 hrs. 50 min.,"$139,377,762","$157,400,000","$296,777,762","$41,607,378","4,145 theaters","4,145 theaters",['Heist / Caper'],https://www.boxofficemojo.com/movies/?id=wbeve...
7,8,Mamma Mia! Here We Go Again,Universal,Musical,PG-13,$75 million,20-Jul-18,1 hrs. 54 min.,"$120,526,990","$271,100,000","$391,626,990","$34,952,180","3,317 theaters","3,514 theaters",['Musical'],https://www.boxofficemojo.com/movies/?id=mamma...
8,9,The Nun,Warner Bros. (New Line),Horror,R,$22 million,7-Sep-18,1 hrs. 36 min.,"$113,367,310","$233,300,000","$346,667,310","$53,807,379","3,876 theaters","3,876 theaters","['Horror - Period', 'Horror - R-Rated', 'Horro...",https://www.boxofficemojo.com/movies/?id=thenu...
9,10,A Wrinkle in Time,Buena Vista,Fantasy,PG,,9-Mar-18,1 hrs. 55 min.,"$100,478,608","$32,197,256","$132,675,864","$33,123,609","3,980 theaters","3,980 theaters",[],https://www.boxofficemojo.com/movies/?id=ladis...


In [4]:
# Read in dataset
df = pd.read_csv('/Users/Love/Documents/Projects/BoxOfficeMojo-Python-WebScraper/src/data/BoxOfficeMojoDataset.csv')

# Ensue the features are in the appropriate order to be inserted into the database
df[['id', 'Title', 'Distributor', 'Genre', 'MPAARating', 'ProductionBudget', 'ReleaseDate', 'Runtime', 'DomesticGross', 'ForeignGross', 'WorldwideGross', 'OpeningWeekendGross', 'OpeningWeekendTheaters', 'WidestTheaters', 'Genres', 'URL']]

In [33]:
for i in range(len(df)):
    with connection.cursor() as cursor:
        # Create a new record
        sql = "INSERT INTO `BoxOfficeMojo` (`id`, `Title`, `Distributor`, `Genre`, `MPAARating`, `ProductionBudget`, `ReleaseDate`, `Runtime`, `DomesticGross`, `ForeignGross`, `WorldwideGross`, `OpeningWeekendGross`, `OpeningWeekendTheaters`, `WidestTheaters`, `Genres`, `URL`) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
        cursor.execute(sql, (str(df.iloc[i][0]), str(df.iloc[i][1]), str(df.iloc[i][2]), str(df.iloc[i][3]), str(df.iloc[i][4]), str(df.iloc[i][5]), str(df.iloc[i][6]), str(df.iloc[i][7]), str(df.iloc[i][8]), str(df.iloc[i][9]), str(df.iloc[i][10]), str(df.iloc[i][11]), str(df.iloc[i][12]), str(df.iloc[i][13]), str(df.iloc[i][14]), str(df.iloc[i][15])))
    # connection is not autocommit by default. So you must commit to save your changes.
    connection.commit()