# Data Importing 
## Objective
In this Jupyter Notebook I will be focusing on:
- Unzipping provided Data
- Exploring the provided Data
- Creating a Database to store data for further analysis
- Prepare Data for further analysis

## Import Libraries 

In [1]:
# Import all the necessary libraries
import os #for interacting with path/directory

import numpy as np
import pandas as pd
import sqlite3

from glob import glob

### Load Data into Jupyter Notebook

In [2]:
# Import data into jupyter notebook using glob
csv_files = glob("zippedData/*.csv")

for file in csv_files:
    print(file)

zippedData/tmdb.movies.csv
zippedData/tn.movie_budgets.csv
zippedData/imdb.name.basics.csv
zippedData/imdb.title.principals.csv
zippedData/title.akas.csv
zippedData/bom.movie_gross.csv
zippedData/imdb.title.basics.csv
zippedData/title.ratings.csv


### Create dictionary to store Data properly

In [3]:
# Create a dictionary with file names as the Key
# and their contents as the values
# Clean up file names by getting rid or .csv & .tsv and replacing all '.' with '_'
files_dict = {}
for filename in csv_files:
    filename_cleaned = os.path.basename(filename).replace('.csv', '').replace('.', '_')
    filename_df = pd.read_csv(filename, index_col = 0)
    files_dict[filename_cleaned] = filename_df

In [4]:
# Check Keys to make sure data is stored correctly
for key in files_dict.keys():
    print(key)

tmdb_movies
tn_movie_budgets
imdb_name_basics
imdb_title_principals
title_akas
bom_movie_gross
imdb_title_basics
title_ratings


In [5]:
# Check Values to make sure data is stored correctly
for value in files_dict.values():
    print(value)

                 genre_ids      id original_language  \
0          [12, 14, 10751]   12444                en   
1      [14, 12, 16, 10751]   10191                en   
2            [12, 28, 878]   10138                en   
3          [16, 35, 10751]     862                en   
4            [28, 878, 12]   27205                en   
...                    ...     ...               ...   
26512             [27, 18]  488143                en   
26513             [18, 53]  485975                en   
26514         [14, 28, 12]  381231                en   
26515      [10751, 12, 28]  366854                en   
26516             [53, 27]  309885                en   

                                     original_title  popularity release_date  \
0      Harry Potter and the Deathly Hallows: Part 1      33.533   2010-11-19   
1                          How to Train Your Dragon      28.734   2010-03-26   
2                                        Iron Man 2      28.515   2010-05-07   
3      

## Create SQL Database for Data

In [6]:
# We already imported Sqlite3 so we can go ahead and create a table
conn = sqlite3.connect("movies_db.sqlite")

In [7]:
def create_sql_table_from_df(df, name, conn):
    # Use try except
    # it will try to make a table
    # if a table exists the except part of the code will stop the program from making duplicates
    try:
        df.to_sql(name, conn)
        print(f"Created table {name}")
    
    # if the table exists t will tell you, and won't cause an error
    except Exception as e:
        print(f"could not make table {name}")
        print(e)

In [8]:
for name, table in files_dict.items():
    create_sql_table_from_df(table, name, conn)

could not make table tmdb_movies
Table 'tmdb_movies' already exists.
could not make table tn_movie_budgets
Table 'tn_movie_budgets' already exists.
could not make table imdb_name_basics
Table 'imdb_name_basics' already exists.
could not make table imdb_title_principals
Table 'imdb_title_principals' already exists.
could not make table title_akas
Table 'title_akas' already exists.
could not make table bom_movie_gross
Table 'bom_movie_gross' already exists.
could not make table imdb_title_basics
Table 'imdb_title_basics' already exists.
could not make table title_ratings
Table 'title_ratings' already exists.


In [9]:
# Check tables for accuracy
conn.execute('''SELECT name
                FROM sqlite_master 
                WHERE type='table';
                ''').fetchall()

[('tmdb_movies',),
 ('tn_movie_budgets',),
 ('imdb_name_basics',),
 ('imdb_title_principals',),
 ('imdb_title_akas',),
 ('title_akas',),
 ('bom_movie_gross',),
 ('imdb_title_basics',),
 ('title_ratings',)]