In [1]:
import pandas as pd
from sqlalchemy import create_engine

### Extract CSVs into DataFrames

In [2]:
bitstamp_file = "resources/bitstampUSD_1-min_data_2012-01-01_to_2019-08-12.csv"
bitstamp_df = pd.read_csv(bitstamp_file)
bitstamp_df.head()

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
0,1325317920,4.39,4.39,4.39,4.39,0.455581,2.0,4.39
1,1325317980,,,,,,,
2,1325318040,,,,,,,
3,1325318100,,,,,,,
4,1325318160,,,,,,,


In [3]:
coinbase_file = "resources/coinbaseUSD_1-min_data_2014-12-01_to_2019-01-09.csv"
coinbase_df = pd.read_csv(coinbase_file)
coinbase_df.head()

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
0,1417411980,300.0,300.0,300.0,300.0,0.01,3.0,300.0
1,1417412040,,,,,,,
2,1417412100,,,,,,,
3,1417412160,,,,,,,
4,1417412220,,,,,,,


In [4]:
crypto_file = "resources/consolidated_coin_data.csv"
crypto_df = pd.read_csv(crypto_file)
crypto_df.head()

Unnamed: 0,Currency,Date,Open,High,Low,Close,Volume,Market Cap
0,ripple,"Apr 24, 2019",0.321114,0.321282,0.296982,0.302318,1517791002,12698877293
1,ripple,"Apr 23, 2019",0.323844,0.328396,0.320919,0.321222,1077333990,13492933875
2,ripple,"Apr 22, 2019",0.322277,0.32935,0.320237,0.323934,1131094080,13606823301
3,ripple,"Apr 21, 2019",0.328678,0.329627,0.318746,0.322449,1005803846,13533407430
4,ripple,"Apr 20, 2019",0.331871,0.333213,0.324969,0.328476,931570799,13786384592


### Transform bitstamp DataFrame

In [24]:
# Create a copy of the dataframe
bitstamp_transform = bitstamp_df.copy()

# Drop NaN rows
bitstamp_transform = bitstamp_transform.dropna(thresh=2)

# Rename columns to acceptable values within postgres
bitstamp_transform.columns = ['date', 'open', 'high', 'low', 'close', 'volume_btc', 'volume_currency', 'weighted_price']

# Create a new index to as the id
bitstamp_transform = bitstamp_transform.reset_index()

# Drop columns not being used
bitstamp_transform = bitstamp_transform.drop(columns=['index','volume_btc', 'volume_currency', 'weighted_price'])

# Add column to specify currency
bitstamp_transform['currency'] = 'bitcoin'

# Reorder columns
currency = bitstamp_transform['currency']
bitstamp_transform.drop(labels=['currency'], axis=1, inplace = True)
bitstamp_transform.insert(0, 'currency', currency)

# Transform timestamp into datetime
bitstamp_transform['date'] = pd.to_datetime(bitstamp_transform['date'], unit='s')
bitstamp_transform['date'] = bitstamp_transform['date'].dt.strftime('%b %d, %Y')

# Display database
bitstamp_transform

Unnamed: 0,currency,date,open,high,low,close
0,bitcoin,"Dec 31, 2011",4.39,4.39,4.39,4.39
1,bitcoin,"Dec 31, 2011",4.39,4.39,4.39,4.39
2,bitcoin,"Dec 31, 2011",4.50,4.57,4.50,4.57
3,bitcoin,"Dec 31, 2011",4.58,4.58,4.58,4.58
4,bitcoin,"Jan 01, 2012",4.58,4.58,4.58,4.58
...,...,...,...,...,...,...
2765814,bitcoin,"Aug 11, 2019",11555.57,11555.57,11540.37,11540.58
2765815,bitcoin,"Aug 11, 2019",11553.49,11556.22,11553.49,11556.22
2765816,bitcoin,"Aug 11, 2019",11559.73,11561.22,11546.77,11561.22
2765817,bitcoin,"Aug 11, 2019",11559.73,11589.73,11528.73,11528.73


### Transform coinbase DataFrame

In [25]:
# Create a copy of the dataframe
coinbase_transform = coinbase_df.copy()

# Drop NaN rows
coinbase_transform = coinbase_transform.dropna(thresh=2)

# Rename columns to acceptable values within postgres
coinbase_transform.columns = ['date', 'open', 'high', 'low', 'close', 'volume_btc', 'volume_currency', 'weighted_price']

# Create a new index to as the id
coinbase_transform = coinbase_transform.reset_index()

# Drop columns not being used
coinbase_transform = coinbase_transform.drop(columns=['index','volume_btc', 'volume_currency', 'weighted_price'])

# Add column to specify currency
coinbase_transform['currency'] = 'bitcoin'

# Reorder columns
currency = coinbase_transform['currency']
coinbase_transform.drop(labels=['currency'], axis=1, inplace = True)
coinbase_transform.insert(0, 'currency', currency)

# Transform timestamp into datetime
coinbase_transform['date'] = pd.to_datetime(coinbase_transform['date'], unit='s')
coinbase_transform['date'] = coinbase_transform['date'].dt.strftime('%b %d, %Y')

# Display dataframe
coinbase_transform

Unnamed: 0,currency,date,open,high,low,close
0,bitcoin,"Dec 01, 2014",300.00,300.00,300.00,300.00
1,bitcoin,"Dec 01, 2014",300.00,300.00,300.00,300.00
2,bitcoin,"Dec 01, 2014",370.00,370.00,370.00,370.00
3,bitcoin,"Dec 01, 2014",370.00,370.00,370.00,370.00
4,bitcoin,"Dec 02, 2014",377.00,377.00,377.00,377.00
...,...,...,...,...,...,...
1990686,bitcoin,"Jan 07, 2019",4006.01,4006.57,4006.00,4006.01
1990687,bitcoin,"Jan 07, 2019",4006.01,4006.57,4006.00,4006.01
1990688,bitcoin,"Jan 07, 2019",4006.01,4006.01,4006.00,4006.01
1990689,bitcoin,"Jan 07, 2019",4006.01,4006.01,4005.50,4005.50


### Transform consolidated Dataframe

In [33]:
# Create a copy of the dataframe
crypto_transform = crypto_df.copy()

# Rename columns to acceptable values within postgres
crypto_transform.columns = ['currency', 'date', 'open', 'high', 'low', 'close', 'volume_eth', 'market_cap']

# Drop all non-ETH cryptocurrencies
eth_df = crypto_transform[crypto_transform['currency'] == 'ethereum']

# Reorder dataframe based on date to match other dataframes
eth_df = eth_df.iloc[::-1]

# Create a new index to as the id
eth_df = eth_df.reset_index()

# Drop columns that won't be used
eth_df = eth_df.drop(columns=['index', 'market_cap', 'volume_eth'])

# Display Dataframe
eth_df

Unnamed: 0,currency,date,open,high,low,close
0,ethereum,"Aug 07, 2015",2.830000,3.540000,2.520000,2.770000
1,ethereum,"Aug 08, 2015",2.790000,2.800000,0.714725,0.753325
2,ethereum,"Aug 09, 2015",0.706136,0.879810,0.629191,0.701897
3,ethereum,"Aug 10, 2015",0.713989,0.729854,0.636546,0.708448
4,ethereum,"Aug 11, 2015",0.708087,1.130000,0.663235,1.070000
...,...,...,...,...,...,...
1352,ethereum,"Apr 20, 2019",173.720000,176.710000,171.790000,173.750000
1353,ethereum,"Apr 21, 2019",173.720000,174.420000,167.430000,170.050000
1354,ethereum,"Apr 22, 2019",170.020000,173.070000,168.730000,171.870000
1355,ethereum,"Apr 23, 2019",172.010000,176.850000,170.900000,171.450000


### Create database connection

In [8]:
connection_string = "postgres:password@localhost:5432/etl_db"
engine = create_engine(f'postgresql://{connection_string}')

In [9]:
engine.table_names()

OperationalError: (psycopg2.OperationalError) FATAL:  password authentication failed for user "postgres"

(Background on this error at: http://sqlalche.me/e/e3q8)

### Load Dataframes into database

In [None]:
bitstamp_transform.to_sql(name='bitstamp', con=engine, if_exists='append')

In [None]:
coinbase_transform.to_sql(name='coinbase', con=engine, if_exists='append', index=True)