In [4]:
# we will need the credentials we saved in the .env file
from dotenv import dotenv_values

# We also will need SQLAlchemy and its functions
from sqlalchemy import create_engine, types
from sqlalchemy.dialects.postgresql import JSON as postgres_json

import pandas as pd

# requests library will make the API calls. 
# the json package will parse the JSON string and convert it to Python data structures
import requests
import json

# with 'datetime' we want to catch the timestamp of the API call. For the actuality reference. 
# and 'time' for slowing down a .bit
from datetime import datetime
import time

In [5]:
barstov_merged = pd.read_csv('../Capstone Project/Data/barstov_merged.csv', sep=',')

In [6]:
barstov_merged.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28682331 entries, 0 to 28682330
Data columns (total 18 columns):
 #   Column              Dtype  
---  ------              -----  
 0   Unnamed: 0          int64  
 1   t_dat               object 
 2   customer_id         object 
 3   article_id          int64  
 4   price               float64
 5   age                 float64
 6   postal_code         object 
 7   product_type_no     int64  
 8   product_type_name   object 
 9   product_group_name  object 
 10  colour_group_code   int64  
 11  colour_group_name   object 
 12  index_code          object 
 13  index_name          object 
 14  index_group_no      int64  
 15  index_group_name    object 
 16  section_name        object 
 17  garment_group_no    int64  
dtypes: float64(2), int64(6), object(10)
memory usage: 3.8+ GB


In [3]:
# getting API and DB credentials - Alternative 1: dotenv_values()

config = dotenv_values()
 
pg_user = config['POSTGRES_USER'] # align the key labels with your .env file
pg_host = config['POSTGRES_HOST']
pg_port = config['POSTGRES_PORT']
pg_db = config['POSTGRES_DB']
pg_schema = config['POSTGRES_SCHEMA']
pg_pass = config['POSTGRES_PASS']

In [4]:
# updating the url
url = f'postgresql://{pg_user}:{pg_pass}@{pg_host}:{pg_port}/{pg_db}'

# creating the engine
engine = create_engine(url, echo=False)

In [5]:
engine.url # checking the url (pass is hidden)

postgresql://ivanchertov:***@data-analytics-course-2.c8g8r1deus2v.eu-central-1.rds.amazonaws.com:5432/hh_analytics_24_2

In [6]:
# defining data types for the DB
dtype_dict = {
    't_dat':types.DateTime,
    'customer_id': types.String,
    'article_id': types.Integer,
    'price': types.Float,
    'age': types.Float,
    'postal_code': types.String,
    'product_type_no': types.Integer,
    'product_type_name': types.String,
    'product_group_name': types.String,
    'colour_group_code': types.Integer,
    'colour_group_name': types.String,
    'index_code': types.String,
    'index_name': types.String,
    'index_group_no': types.Integer,
    'index_group_name': types.String,
    'section_name': types.String,
    'garment_group_no': types.Integer
             }

: 

In [None]:
# writing dataframe to DB
barstov_merged.to_sql(name = 'barstov_data_merged', 
                       con = engine, 
                       schema = pg_schema, # pandas is allowing to specify, in which schema the table shall be created
                       if_exists='replace', 
                       dtype=dtype_dict,
                       index=False
                      )