In [1]:
import pyodbc
import pandas as pd
# for keeping credentials out of sight
import os
from dotenv import load_dotenv


# create a "sql-keys.env" file under the script working directory
# fill in database connection parameters and access tokens and save
# establish working directory path
# getcwd() returns current working directory
wdir_path = os.getcwd()

sql_path = os.path.join(wdir_path, "sql-keys.env") # absolute path of "sql-keys.env"
# load the credentials into os environment 
load_dotenv(sql_path)
#check if credentials loaded successfully
os.environ

# getting credentials information from "sql-keys.env"
server = os.getenv("DB_SERVER")
database = os.getenv("DB_NAME")
username = os.getenv("DB_USERNAME")
password = os.getenv("DB_PASSWORD")
driver = os.getenv("DB_DRIVER")

# setup connection engine details
#engine = db.create_engine(f"postgresql://{user}:{password}@{hostname}:{port_no}/{db_name}")  

# Establish the connection
conn = pyodbc.connect('DRIVER=' + driver + ';SERVER=' + server +
                      ';PORT=1433;DATABASE=' + database + ';UID=' + username +
                      ';PWD=' + password)

print(conn)

<pyodbc.Connection object at 0x00000248779271C0>


In [4]:
cursor = conn.cursor()

In [5]:
cursor.execute("CREATE TABLE IF NOT EXISTS movies "
               "(movie_id INT IDENTITY(1,1) PRIMARY KEY, "
               "movie_title VARCHAR(255) NOT NULL, "
               "released_year INT, "
               "runtime INT NOT NULL, "
               "genre VARCHAR(255), "
               "rating INT NOT NULL, "
               "director VARCHAR(255), "
               "star1 VARCHAR(255), "
               "star2 VARCHAR(255), "
               "number_of_votes INT, "
               "gross INT)")

<pyodbc.Cursor at 0x23632f50a30>

In [9]:
fpath = "movies.CSV"
df = pd.read_csv(fpath, delimiter=";", encoding="utf8")
df.head()

Unnamed: 0,Title,Released_Year,Runtime,Genre,Rating,Director,Star1,Star2,Number_of_Votes,Gross
0,The Dark Knight,2008,152 min,"Action, Crime, Drama",90,Christopher Nolan,Christian Bale,Heath Ledger,2303232,534858444
1,The Lord of the Rings: The Return of the King,2003,201 min,"Action, Adventure, Drama",89,Peter Jackson,Elijah Wood,Viggo Mortensen,1642758,377845905
2,Inception,2010,148 min,"Action, Adventure, Sci-Fi",88,Christopher Nolan,Leonardo DiCaprio,Joseph Gordon-Levitt,2067042,292576195
3,The Lord of the Rings: The Fellowship of the Ring,2001,178 min,"Action, Adventure, Drama",88,Peter Jackson,Elijah Wood,Ian McKellen,1661481,315544750
4,The Lord of the Rings: The Two Towers,2002,179 min,"Action, Adventure, Drama",87,Peter Jackson,Elijah Wood,Ian McKellen,1485555,342551365


In [10]:
def convert_runtime(x):
    """
    Convert the string value to int.
    - Remove "min".
    - Convert to int.
    """
    return int(x.split()[0])

In [11]:
df['Runtime'] = df['Runtime'].apply(convert_runtime)

In [12]:
df['Genre'] = df['Genre'].apply(lambda x: x.split(', ')[0])

In [13]:
def convert_gross(x):
    """
    Convert the string value to int.
    - Remove commas.
    - Convert to int.
    """
    return int(x.replace(',', ''))

In [14]:
df['Gross'] = df['Gross'].apply(convert_gross)

In [15]:
for index, row in df.iterrows():
    cursor.execute("INSERT INTO movies "
                   "(movie_title, released_year, runtime, "
                   "genre, rating, director, "
                   "star1, star2, number_of_votes, gross) "
                   "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", 
                   (row['Title'], row['Released_Year'], row['Runtime'], row['Genre'], row['Rating'], 
                   row['Director'], row['Star1'], row['Star2'], row['Number_of_Votes'], row['Gross']))

In [16]:
conn.commit()

In [17]:
cursor.execute("SELECT "
    "movie_id, movie_title "
    "FROM movies")
rows = cursor.fetchall()

In [18]:
for row in rows:
  print(row)

(1, 'The Dark Knight')
(2, 'The Lord of the Rings: The Return of the King')
(3, 'Inception')
(4, 'The Lord of the Rings: The Fellowship of the Ring')
(5, 'The Lord of the Rings: The Two Towers')
(6, 'Interstellar')
(7, 'Joker')
(8, 'Whiplash')
(9, 'The Intouchables')
(10, 'The Prestige')
(11, 'The Departed')
(12, 'The Pianist')
(13, 'Gladiator')
(14, 'Spider-Man: Into the Spider-Verse')
(15, 'Avengers: Endgame')
(16, 'Avengers: Infinity War')
(17, 'Coco')
(18, 'Django Unchained')
(19, 'The Dark Knight Rises')
(20, '3 Idiots')
(21, 'Taare Zameen Par')
(22, 'WALL·E')
(23, 'The Lives of Others')
(24, 'Memento')
(25, '1917')
(26, 'Amelie')
(27, 'Snatch')
(28, 'Requiem for a Dream')
(29, 'Green Book')
(30, 'Drishyam')
(31, 'Queen')
(32, 'Warrior')
(33, 'Shutter Island')
(34, 'Up')
(35, 'The Wolf of Wall Street')
(36, 'Chak De! India')
(37, 'There Will Be Blood')
(38, "Pan's Labyrinth")
(39, 'Toy Story 3')
(40, 'V for Vendetta')
(41, 'Rang De Basanti')
(42, 'Black')
(43, 'Batman Begins')
(44

  print(row)
