# Add Latest Data
* This notebook will enable a quick and easy method to gather and update Ethereum and Bitcoin 1-minute price data so that the database remains relevant and includes the latest information

## General-Purpose Code 
* This code is used for any and all cryptocurrencies updated in this notebook, so it is placed at the beginning and not under one of the coin-specific sections

In [None]:
import pandas as pd
from sqlalchemy import create_engine
import config
import coinbase_data_pull as c_pull
import time
import datetime as dt
import requests

In [None]:
#create the string to connect to the database - will be used with sqlalchemy!
protocol = "postgres"
user = config.user
password = config.pw
location = "localhost"
port = "5432"
database = "crypto"

connection_string = f"{protocol}://{user}:{password}@{location}:{port}/{database}"

In [None]:
#Use SQLAlchemy to connect to the database

#create the engine to interact with the database with the connection string
engine = create_engine(connection_string)

## Ethereum

### Import and Verify Data

In [None]:
# load in Ethereum csv file for the notebook, to be changed and then used to update itself and the 'ethereum' SQL table
eth_csv = './Ethereum/IO/ETH_1min.csv'

eth_df = pd.read_csv(eth_csv)

#convert the "Date" column to datetime objects with timezones, because it is read in as text
eth_df["Date"] = pd.to_datetime(eth_df["Date"], utc=True)
eth_df

In [None]:
#update the column names to match the schema of the database table
sql_columns = ["Unix_Timestamp", "Entry_Date", "Symbol", "Open_Price", "High_Price", "Low_Price", "Close_Price", "Coin_Volume"]
lowercase_sql_columns = [a.lower() for a in sql_columns]
eth_df.columns = lowercase_sql_columns
eth_df

In [None]:
#verify that the .csv and database table match - so pull the sql table!

eth_db_df = pd.read_sql_table(table_name="ethereum", con=engine)
eth_db_df

In [None]:
#check that tables are the same!
#this process may be accelerated in the future - e.g. only check latest 100000 rows?

#first, sort and reindex the tables in case any rows got mixed up

sorted_eth_db_df = eth_db_df.sort_values(by="unix_timestamp").reset_index(drop=True)
sorted_eth_df = eth_df.sort_values(by="unix_timestamp").reset_index(drop=True)

#ran into a precision error - probably due to floating point numbers
#worked when testing.assert_frame_equal did not have 'check_exact' set to True

#if no error is thrown, proceed!
#otherwise, check the output!
try:
    pd.testing.assert_frame_equal(sorted_eth_df, sorted_eth_db_df)
    print("Congratulations! The tables match - there are no differences between the .csv and your 'ethereum' table!")
except:
    print("It looks like the .csv file does not match the data read from the database.")

### Retrieve Additional Data

In [None]:
#change column names back so that the functions can work
sorted_eth_df.columns = ["Unix Timestamp", "Date", "Symbol", "Open", "High", "Low", "Close", "Volume"]
sorted_eth_df

In [None]:
#if the data is verified, then start to pull data using the API

results = c_pull.get_current_data(sorted_eth_df)
unclean_api_df = c_pull.convert_results_to_df(results)
clean_update_df = c_pull.clean_results_df(unclean_api_df, sorted_eth_df)

#resulting cleaned, updated dataframe
clean_update_df

### Update SQL Database

In [None]:
#Make a copy of the update with the correct column names for the SQL table
eth_sql_update = clean_update_df.copy()
eth_sql_update.columns = [a.lower() for a in ["Unix_Timestamp", "Low_Price", "High_Price", "Open_Price",
                          "Close_Price", "Coin_Volume", "Entry_Date",  "Symbol"]]
eth_sql_update

In [None]:
#now append data to SQL database
eth_sql_update.to_sql(name="ethereum", con=engine, index=False, if_exists="append")
print("If you can see this, the table should have loaded successfully!")

### Update CSV

In [None]:
output_csv_df = sorted_eth_df.append(clean_update_df)
output_csv_df

In [None]:
output_csv_df.plot(x="Date", y="Close")

In [None]:
#output final_df to csv
output_csv_df.to_csv("./Ethereum/IO/ETH_1min.csv", index=False)
print("1-minute Ethereum csv file output!")

## Bitcoin

### Import and Verify Data

In [None]:
# load in Bitcoin csv file for the notebook, to be changed and then used to update itself and the 'bitcoin' SQL table
btc_csv = './Bitcoin/IO/coinbaseUSD_1-min_data.csv'

btc_df = pd.read_csv(btc_csv)

#convert the "Date" column to datetime objects with timezones, because it is read in as text
btc_df["Date"] = pd.to_datetime(btc_df["Date"], utc=True)
btc_df

In [None]:
#update the column names to match the schema of the database table
sql_columns = ["Unix_Timestamp", "Entry_Date", "Symbol", "Open_Price", "High_Price", "Low_Price", "Close_Price", "Coin_Volume"]
lowercase_sql_columns = [a.lower() for a in sql_columns]
btc_df.columns = lowercase_sql_columns
btc_df

In [None]:
#verify that the .csv and database table match - so pull the sql table!

btc_db_df = pd.read_sql_table(table_name="bitcoin", con=engine)
btc_db_df

In [None]:
#check that tables are the same!
#this process may be accelerated in the future - e.g. only check latest 100000 rows?

#first, sort and reindex the tables in case any rows got mixed up

sorted_btc_db_df = btc_db_df.sort_values(by="unix_timestamp").reset_index(drop=True)
sorted_btc_df = btc_df.sort_values(by="unix_timestamp").reset_index(drop=True)

#ran into a precision error - probably due to floating point numbers
#worked when testing.assert_frame_equal did not have 'check_exact' set to True

#if no error is thrown, proceed!
#otherwise, check the output!
try:
    pd.testing.assert_frame_equal(sorted_btc_df, sorted_btc_db_df)
    print("Congratulations! The tables match - there are no differences between the .csv and your 'bitcoin' table!")
except:
    print("It looks like the .csv file does not match the data read from the database.")

### Retrieve Additional Data

In [None]:
#change column names back so that the functions can work
sorted_btc_df.columns = ["Unix Timestamp", "Date", "Symbol", "Open", "High", "Low", "Close", "Volume"]
sorted_btc_df

In [None]:
#if the data is verified, then start to pull data using the API

results = c_pull.get_current_data(sorted_btc_df)
unclean_api_df = c_pull.convert_results_to_df(results)
clean_update_df = c_pull.clean_results_df(unclean_api_df, sorted_btc_df)

#resulting cleaned, updated dataframe
clean_update_df

### Update SQL Database

In [None]:
#Make a copy of the update with the correct column names for the SQL table
btc_sql_update = clean_update_df.copy()
btc_sql_update.columns = [a.lower() for a in ["Unix_Timestamp", "Low_Price", "High_Price", "Open_Price",
                          "Close_Price", "Coin_Volume", "Entry_Date",  "Symbol"]]
btc_sql_update

In [None]:
#now append data to SQL database
btc_sql_update.to_sql(name="bitcoin", con=engine, index=False, if_exists="append")
print("If you can see this, the table should have loaded successfully!")

### Update CSV

In [None]:
output_csv_df = sorted_btc_df.append(clean_update_df)
output_csv_df

In [None]:
output_csv_df.plot(x="Date", y="Close")

In [None]:
#output final_df to csv
output_csv_df.to_csv(btc_csv, index=False)
print("1-minute Bitcoin csv file output!")