# Jeff Pinegar
Project 2: ETL 
Due Dec. 23, 2022

### Load date in to Postgres database
---

In [7]:
# Import needed libraries
import pandas as pd
from sqlalchemy import create_engine
import psycopg2

import os

---
### Read in CSV files

In [8]:
# Set the paths to the data file
Sales_Data = os.path.join('.','Resources', 'Sales_Data_Clean.csv')
MUDD = os.path.join('.','Resources', 'Mfg_Data_Clean.csv')
CMATCodes = os.path.join('.','Resources', 'CMAT Decoder.csv')

# Read the data in data frames
dfs_clean = pd.read_csv(Sales_Data, encoding="utf-8")       # Sales data
dfm_clean = pd.read_csv(MUDD, encoding="utf-8")             # Manufacturing data
dfc_clean = pd.read_csv(CMATCodes, encoding="utf-8")        # CMAT code to english

In [9]:
dfs_clean.head(2)

Unnamed: 0.1,Unnamed: 0,IPC_Serial_No,Sell_Date,Country,Sale_Price_EUR
0,0,2125MW0241,2022-01-03,IT,437.14
1,1,2125MW0278,2022-01-03,IT,437.14


---
### Open connection to the postgres database

In [10]:
# Set the connection to the Postgres database created for this project
protocol = 'postgresql'
username = 'postgres'
password = 'jsp'
host = 'localhost'
port = 5432
database_name = 'etl_db'
rds_connection_string = f'{protocol}://{username}:{password}@{host}:{port}/{database_name}'
engine = create_engine(rds_connection_string)

In [11]:
# Verify that the tables are there to be loaded.
engine.table_names()

  engine.table_names()


['cmatcodes', 'muddata', 'salesinfo']

In [12]:
# Load the CMAT codes in the cmatcodes table
dfc_clean.to_sql(name='cmatcodes'                     # identify the target table
                  , con=engine                        # set the conection
                  , if_exists='replace'               # replace existing date in the table
                  , index=False)                      # Don't include the index

471

In [13]:
# Load the manufacturing data into the mudddata table.
dfm_clean.to_sql(name='muddata'                       # identify the target table
                  , con=engine                        # set the connection
                  , if_exists='replace'               # replace existing data in the table
                  , index=False)                      # Don't include the index

518

In [14]:
dfs_clean.to_sql(name='salesinfo'                     # identify the target table
                  , con=engine                        # set the connection
                  , if_exists='replace'               # replace the existing data in the table
                  , index=True                        # include the index this will become the primary key for this table
                  , index_label="ID")                 # rename the index (primary key) to ID

287