In [None]:
import pandas as pd
from sqlalchemy import create_engine
import config

# Load Ethereum and Bitcoin Historical Data

* This notebook should only be run **once** in order to load the cleaned, up-to-date (as of 8/14/20) 1-minute price history data for the Ethereum and Bitcoin cryptocurrencies

* This notebook should be run only after creating the table schemas for the two cryptocurrencies in pgAdmin - the tables may be created in this notebook and loaded into the database directly from here at a later time, but for now that feature is unavailable

* The table schema code will be included in this repo and the instructions to use it should be located in the README

* A separate notebook will be created and used repeatedly to update the database incrementally as time goes on and the prices change and more data is generated

## Load Ethereum Data

* The steps below outline how to initially load the Ethereum (ETH) data from the .csv located in the **Ethereum/IO/** folder

In [None]:
#create the string to connect to the database - will be used with sqlalchemy!
protocol = "postgres"
user = config.user
password = config.pw
location = "localhost"
port = "5432"
database = "crypto"

connection_string = f"{protocol}://{user}:{password}@{location}:{port}/{database}"
print(connection_string)

In [None]:
# load in Ethereum csv file for the notebook, to be loaded into SQL
eth_csv = './Ethereum/IO/ETH_1min.csv'

eth_df = pd.read_csv(eth_csv)

#convert the "Date" column to datetime objects with timezones, because it is read in as text
eth_df["Date"] = pd.to_datetime(eth_df["Date"], utc=True)
eth_df

In [None]:
#update the column names to match the schema of the database table
sql_columns = ["Unix_Timestamp", "Entry_Date", "Symbol", "Open_Price", "High_Price", "Low_Price", "Close_Price", "Coin_Volume"]
lowercase_sql_columns = [a.lower() for a in sql_columns]
eth_df.columns = lowercase_sql_columns
eth_df.head()

In [None]:
#setup the sqlalchemy engine

#create the engine to interact with the database with the connection string
engine = create_engine(connection_string)

In [None]:
#then load the dataframe into the SQL table!

#**********THIS WILL FAIL UPON RUNNING AS A DEFAULT - ONLY CHANGE THE "if_exists='fail'" PARAMETER BELOW TO 'append' 
#**********IF LOADING DATA FOR THE FIRST TIME! OTHERWISE CHECK THE README FOR THE CORRECT NOTEBOOK TO UPDATE THE DATABASE!
eth_df.to_sql(name="ethereum", con=engine, index=False, if_exists="fail")
print("If you can see this, the table should have loaded successfully!")

In [None]:
#check that the table loaded correctly by reading it from sql and comparing it to the 
#dataframe we inserted

check_df = pd.read_sql_table(table_name="ethereum", con=engine)
check_df

In [None]:
#output whether the data read matches the data written to the database!

#make sure both dataframes are sorted, and indexed correctly, or there may be issues - some data from the database was not
#matching due to having the order changed upon insertion somehow!
sorted_check_df = check_df.sort_values(by="unix_timestamp").reset_index(drop=True)
sorted_eth_df = eth_df.sort_values(by="unix_timestamp").reset_index(drop=True)

if(sorted_check_df.equals(sorted_eth_df)):
    print("Good Job! You have successfully loaded the 'Ethereum' data!")
else:
    print("It looks like the data you wrote to the database does not match the data read from the database.")

## Load Bitcoin Data

* The steps below outline how to initially load the Bitcoin (BTC) data from the .csv located in the **Bitcoin/IO/** folder
* The steps outlined below are essentially the same as the steps for loading the Ethereum data above, with some table names changed, so if you got the Ethereum data loaded already, loading the Bitcoin data here should not be a problem!

In [None]:
#already connected to the database from when we loaded the Ethereum data above
#so the first step is to load the Bitcoin .csv file into the notebook


# load in Bitcoin csv file for the notebook, to be loaded into SQL
btc_csv = './Bitcoin/IO/coinbaseUSD_1-min_data.csv'

btc_df = pd.read_csv(btc_csv)

#convert the "Date" column to datetime objects with timezones, because it is read in as text
btc_df["Date"] = pd.to_datetime(btc_df["Date"], utc=True)
btc_df

In [None]:
#update the column names to match the schema of the database table
sql_columns = ["Unix_Timestamp", "Entry_Date", "Symbol", "Open_Price", "High_Price", "Low_Price", "Close_Price", "Coin_Volume"]
lowercase_sql_columns = [a.lower() for a in sql_columns]
btc_df.columns = lowercase_sql_columns
btc_df.head()

In [None]:
#load the dataframe into the SQL table!
#no need to create the engine, it should already exist from loading the Ethereum data

#**********THIS WILL FAIL UPON RUNNING AS A DEFAULT - ONLY CHANGE THE "if_exists='fail'" PARAMETER BELOW TO 'append' 
#**********IF LOADING DATA FOR THE FIRST TIME! OTHERWISE CHECK THE README FOR THE CORRECT NOTEBOOK TO UPDATE THE DATABASE!
btc_df.to_sql(name="bitcoin", con=engine, index=False, if_exists="fail")
print("If you can see this, the table should have loaded successfully!")

In [None]:
#check that the table loaded correctly by reading it from sql and comparing it to the 
#dataframe we inserted
check_btc_df = pd.read_sql_table(table_name="bitcoin", con=engine)
check_btc_df

In [None]:
#output whether the data read matches the data written to the database!

#make sure both dataframes are sorted, and indexed correctly, or there may be issues - the data from the database was not
#matching due to having the order changed upon insertion somehow!
sorted_check_btc_df = check_btc_df.sort_values(by="unix_timestamp").reset_index(drop=True)
sorted_btc_df = btc_df.sort_values(by="unix_timestamp").reset_index(drop=True)

if(sorted_check_btc_df.equals(sorted_btc_df)):
    print("Good Job! You have successfully loaded the 'Bitcoin' data!")
else:
    print("It looks like the data you wrote to the database does not match the data read from the database.")