In [23]:
import pandas as pd
from sqlalchemy import create_engine
import config

# Load Ethereum and Bitcoin Historical Data

* This notebook should only be run **once** in order to load the cleaned, up-to-date (as of 8/14/20) 1-minute price history data for the Ethereum and Bitcoin cryptocurrencies

* This notebook should be run only after creating the table schemas for the two cryptocurrencies in pgAdmin - the tables may be created in this notebook and loaded into the database directly from here at a later time, but for now that feature is unavailable

* The table schema code will be included in this repo and the instructions to use it should be located in the README

* A separate notebook will be created and used repeatedly to update the database incrementally as time goes on and the prices change and more data is generated

## Load Ethereum Data

* The steps below outline how to initially load the Ethereum (ETH) data from the .csv located in the **Ethereum/IO/** folder

In [24]:
#create the string to connect to the database - will be used with sqlalchemy!
protocol = "postgres"
user = config.user
password = config.pw
location = "localhost"
port = "5432"
database = "crypto"

connection_string = f"{protocol}://{user}:{password}@{location}:{port}/{database}"
print(connection_string)

postgres://postgres:Deldr1m0r!@localhost:5432/crypto


In [25]:
# load in Ethereum csv file for the notebook, to be loaded into SQL
eth_csv = './Ethereum/IO/ETH_1min.csv'

eth_df = pd.read_csv(eth_csv)

#convert the "Date" column to datetime objects with timezones, because it is read in as text
eth_df["Date"] = pd.to_datetime(eth_df["Date"], utc=True)
eth_df

Unnamed: 0,Unix Timestamp,Date,Symbol,Open,High,Low,Close,Volume
0,1462800720,2016-05-09 13:32:00+00:00,ETH-USD,0.00,12.00,0.00,12.00,4.156276
1,1462800780,2016-05-09 13:33:00+00:00,ETH-USD,12.00,12.00,10.00,10.00,43.497506
2,1462800840,2016-05-09 13:34:00+00:00,ETH-USD,10.00,10.00,10.00,10.00,8.000000
3,1462800900,2016-05-09 13:35:00+00:00,ETH-USD,10.00,10.00,10.00,10.00,60.000000
4,1462800960,2016-05-09 13:36:00+00:00,ETH-USD,10.00,10.00,10.00,10.00,0.375000
...,...,...,...,...,...,...,...,...
2156947,1597424940,2020-08-14 17:09:00+00:00,ETH-USD,438.82,439.45,438.66,439.28,133.787161
2156948,1597425000,2020-08-14 17:10:00+00:00,ETH-USD,439.17,439.37,438.65,438.82,675.504772
2156949,1597425060,2020-08-14 17:11:00+00:00,ETH-USD,438.76,438.83,437.71,438.37,489.871679
2156950,1597425120,2020-08-14 17:12:00+00:00,ETH-USD,438.08,438.62,437.84,438.46,168.889072


In [26]:
#update the column names to match the schema of the database table
sql_columns = ["Unix_Timestamp", "Entry_Date", "Symbol", "Open_Price", "High_Price", "Low_Price", "Close_Price", "Coin_Volume"]
lowercase_sql_columns = [a.lower() for a in sql_columns]
eth_df.columns = lowercase_sql_columns
eth_df.head()

Unnamed: 0,unix_timestamp,entry_date,symbol,open_price,high_price,low_price,close_price,coin_volume
0,1462800720,2016-05-09 13:32:00+00:00,ETH-USD,0.0,12.0,0.0,12.0,4.156276
1,1462800780,2016-05-09 13:33:00+00:00,ETH-USD,12.0,12.0,10.0,10.0,43.497506
2,1462800840,2016-05-09 13:34:00+00:00,ETH-USD,10.0,10.0,10.0,10.0,8.0
3,1462800900,2016-05-09 13:35:00+00:00,ETH-USD,10.0,10.0,10.0,10.0,60.0
4,1462800960,2016-05-09 13:36:00+00:00,ETH-USD,10.0,10.0,10.0,10.0,0.375


In [27]:
#setup the sqlalchemy engine and then load the dataframe into the SQL table!

#create the engine to interact with the database with the connection string
engine = create_engine(connection_string)


eth_df.to_sql(name="ethereum", con=engine, index=False, if_exists="append")
print("If you can see this, the table should have loaded successfully!")

If you can see this, the table should have loaded successfully!


In [28]:
#check that the table loaded correctly by reading it from sql and comparing it to the 
#dataframe we inserted
check_df = pd.read_sql_table(table_name="ethereum", con=engine)
check_df

Unnamed: 0,unix_timestamp,entry_date,symbol,open_price,high_price,low_price,close_price,coin_volume
0,1462800720,2016-05-09 13:32:00+00:00,ETH-USD,0.00,12.00,0.00,12.00,4.156276
1,1462800780,2016-05-09 13:33:00+00:00,ETH-USD,12.00,12.00,10.00,10.00,43.497506
2,1462800840,2016-05-09 13:34:00+00:00,ETH-USD,10.00,10.00,10.00,10.00,8.000000
3,1462800900,2016-05-09 13:35:00+00:00,ETH-USD,10.00,10.00,10.00,10.00,60.000000
4,1462800960,2016-05-09 13:36:00+00:00,ETH-USD,10.00,10.00,10.00,10.00,0.375000
...,...,...,...,...,...,...,...,...
2156947,1597424940,2020-08-14 17:09:00+00:00,ETH-USD,438.82,439.45,438.66,439.28,133.787161
2156948,1597425000,2020-08-14 17:10:00+00:00,ETH-USD,439.17,439.37,438.65,438.82,675.504772
2156949,1597425060,2020-08-14 17:11:00+00:00,ETH-USD,438.76,438.83,437.71,438.37,489.871679
2156950,1597425120,2020-08-14 17:12:00+00:00,ETH-USD,438.08,438.62,437.84,438.46,168.889072


In [29]:
#output whether the data read matches the data written to the database!
if(check_df.equals(eth_df)):
    print("Good Job! You have successfully loaded the 'Ethereum' data!")
else:
    print("It looks like the data you wrote to the database does not match the data read from the database.")

Good Job! You have successfully loaded the 'Ethereum' data!
