In [1]:
# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import ForeignKey
Base = declarative_base()
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, Column, Integer, String, Float, MetaData
import pandas as pd

In [2]:
engine = create_engine("sqlite:///movie_rating_v_revenue.sqlite", echo=False)
connection = engine.connect()
meta = MetaData()

In [3]:
# Need to use declarative base
Base = declarative_base()

In [4]:
#Movie class
class Movie(Base):
    __tablename__ = 'movie'
    id = Column(Integer, primary_key=True)
    title = Column(String)
    content_rating = Column(String)
    year = Column(Integer)
    runtime = Column(Float)

In [5]:
#Financial Class
class Financial(Base):
    __tablename__ = 'financial'
    id = Column(Integer, ForeignKey("movie.id"), primary_key = True, nullable=False)
    budget = Column(Float)
    gross = Column(Float)
    profit = Column(Float)

In [6]:
#Review Info
class Reviews(Base):
    __tablename__ = 'reviews'
    id = Column(Integer, ForeignKey("movie.id"), primary_key = True, nullable=False)
    imdb_score = Column(Float)
    imdb_reviewers = Column(Integer)
    imdb_voters = Column(Integer)
    imdb_user_reviewers = Column(Integer)
    top_list_rating = Column(Float)
    top_list_votes = Column(Integer)

In [7]:
#Social Media Info
class SocialMedia(Base):
    __tablename__ = 'social_media'
    id = Column(Integer, ForeignKey("movie.id"), primary_key = True, nullable=False)
    movie_likes = Column(Integer)
    director_likes = Column(Integer)
    cast_likes = Column(Integer)
    actor1_likes = Column(Integer)
    actor2_likes = Column(Integer)
    actor3_likes = Column(Integer)

In [8]:
#Social Media Info
class Actor(Base):
    __tablename__ = 'actor'
    id = Column(Integer, primary_key = True, nullable=False)
    actor_name = Column(String)

In [9]:
#Actor-film Info
class Actor_Movie(Base):
    __tablename__ = 'actor_movie'
    id = Column(Integer, primary_key = True, nullable=False)
    actor_id = Column(Integer, ForeignKey("actor.id"), nullable=False)
    movie_id = Column(Integer, ForeignKey("movie.id"), nullable=False)

In [10]:
#Genre Info
class Genre(Base):
    __tablename__ = 'genre'
    id = Column(Integer, primary_key = True, nullable=False)
    genre_name = Column(String)

In [11]:
#Genre-film Info
class Genre_Movie(Base):
    __tablename__ = 'genre_movie'
    id = Column(Integer, primary_key = True, nullable=False)
    genre_id = Column(Integer, ForeignKey("actor.id"), nullable=False)
    movie_id = Column(Integer, ForeignKey("movie.id"), nullable=False)

In [22]:
# This is where we create our tables in the database
Base.metadata.create_all(engine)

In [13]:
# create a session
session = Session(engine)

In [15]:
session.commit()

In [23]:
meta.reflect(engine)
meta.tables.keys()

dict_keys(['actor', 'actor_movie', 'movie', 'financial', 'genre', 'genre_movie', 'reviews', 'social_media'])

In [18]:
all_df = pd.read_csv('combined_df.csv')

In [19]:
all_df.columns

Index(['title', 'content_rating', 'budget', 'gross', 'imdb_score',
       'facenumber_in_poster', 'num_critic_for_reviews', 'num_voted_users',
       'num_user_for_reviews', 'movie_facebook_likes',
       'cast_total_facebook_likes', 'actor_1_facebook_likes',
       'actor_2_facebook_likes', 'actor_3_facebook_likes',
       'director_facebook_likes', 'genre', 'actors', 'year',
       'revenue_millions', 'runtime_minutes', 'rating', 'votes', 'metascore'],
      dtype='object')

In [20]:
movies_df = all_df[['title','content_rating','year','runtime_minutes']]
movies_df = movies_df.rename(columns = {'runtime_minutes':'runtime'})
movies_df.head()

Unnamed: 0,title,content_rating,year,runtime
0,Avatar,PG-13,2009,162
1,Pirates of the Caribbean: At World's End,PG-13,2007,169
2,Spectre,PG-13,2015,148
3,The Dark Knight Rises,PG-13,2012,164
4,John Carter,PG-13,2012,132


In [26]:
movies_df.to_sql('movie', connection, if_exists = 'append', index = False)

In [29]:
session.commit()

In [30]:
movie_check_df = pd.read_sql('SELECT * FROM movie', connection)

In [31]:
movie_check_df.head()

Unnamed: 0,id,title,content_rating,year,runtime
0,1,Avatar,PG-13,2009,162.0
1,2,Pirates of the Caribbean: At World's End,PG-13,2007,169.0
2,3,Spectre,PG-13,2015,148.0
3,4,The Dark Knight Rises,PG-13,2012,164.0
4,5,John Carter,PG-13,2012,132.0


In [32]:
financial_df = all_df[['budget','gross']].copy()
financial_df['profit'] = financial_df['gross'] - financial_df['budget']
financial_df.head()

Unnamed: 0,budget,gross,profit
0,237000000.0,760505847.0,523505847.0
1,300000000.0,309404152.0,9404152.0
2,245000000.0,200074175.0,-44925825.0
3,250000000.0,448130642.0,198130642.0
4,263700000.0,73058679.0,-190641321.0


In [33]:
financial_df.to_sql('financial', connection, if_exists = 'append', index = False)

In [34]:
session.commit()

In [38]:
financial_check_df = pd.read_sql('SELECT * FROM financial', connection)

In [39]:
financial_check_df.head()

Unnamed: 0,id,budget,gross,profit
0,1,237000000.0,760505847.0,523505847.0
1,2,300000000.0,309404152.0,9404152.0
2,3,245000000.0,200074175.0,-44925825.0
3,4,250000000.0,448130642.0,198130642.0
4,5,263700000.0,73058679.0,-190641321.0


In [44]:
review_df = all_df[['imdb_score','num_critic_for_reviews','num_voted_users','num_user_for_reviews','metascore','votes']]
review_df = review_df.rename(columns = {'num_critic_for_reviews':'imdb_reviewers','num_voted_users':'imdb_voters',
                              'num_voted_users':'imbd_user_reviewers','metascore':'top_list_rating',
                              'votes':'top_list_votes'})
review_df.head()

Unnamed: 0,imdb_score,imdb_reviewers,imbd_user_reviewers,num_user_for_reviews,top_list_rating,top_list_votes
0,7.9,723.0,886204,3054.0,83.0,935408
1,7.1,302.0,471220,1238.0,50.0,498821
2,6.8,602.0,275868,994.0,60.0,308981
3,8.5,813.0,1144337,2701.0,78.0,1222645
4,6.6,462.0,212204,738.0,51.0,220667


In [46]:
review_df.to_sql('reviews', connection, if_exists = 'append', index = False)

OperationalError: (sqlite3.OperationalError) table reviews has no column named imbd_user_reviewers [SQL: 'INSERT INTO reviews (imdb_score, imdb_reviewers, imbd_user_reviewers, num_user_for_reviews, top_list_rating, top_list_votes) VALUES (?, ?, ?, ?, ?, ?)'] [parameters: ((7.9, 723.0, 886204, 3054.0, 83.0, 935408), (7.1, 302.0, 471220, 1238.0, 50.0, 498821), (6.8, 602.0, 275868, 994.0, 60.0, 308981), (8.5, 813.0, 1144337, 2701.0, 78.0, 1222645), (6.6, 462.0, 212204, 738.0, 51.0, 220667), (6.2, 392.0, 383056, 1902.0, 59.0, 406219), (6.2, 392.0, 383071, 1902.0, 59.0, 406219), (7.8, 324.0, 294810, 387.0, 71.0, 316459)  ... displaying 10 of 616 total bound parameter sets ...  (7.4, 377.0, 141425, 283.0, 81.0, 151409), (5.4, 285.0, 21468, 164.0, 38.0, 26461))] (Background on this error at: http://sqlalche.me/e/e3q8)