## IMPORTS

In [1]:
import yfinance as yf
import pandas as pd

## CRYPTO FUNCTIONS

In [2]:
# IMPORTS
import pandas as pd
import math
import os.path
import time
from binance.client import Client #  pip install python-binance
from datetime import timedelta, datetime
from dateutil import parser
from tqdm import tqdm_notebook #(Optional, used for progress-bars)

### API
binance_api_key = '<api_key>'    #Enter your own API-key here
binance_api_secret = '<api_key>' #Enter your own API-secret here

### CONSTANTS
binsizes = {"1m": 1, "5m": 5, "1h": 60, "1d": 1440}
batch_size = 750
binance_client = Client(api_key=binance_api_key, api_secret=binance_api_secret)


### FUNCTIONS
def minutes_of_new_data(symbol, kline_size, data, source):
    if len(data) > 0:  old = parser.parse(data["timestamp"].iloc[-1])
    elif source == "binance": old = datetime.strptime('1 Jan 2017', '%d %b %Y')
    if source == "binance": new = pd.to_datetime(binance_client.get_klines(symbol=symbol, interval=kline_size)[-1][0], unit='ms')
    return old, new

def get_all_binance(symbol, kline_size):
    data_df = pd.DataFrame()
    oldest_point, newest_point = minutes_of_new_data(symbol, kline_size, data_df, source = "binance")
    delta_min = (newest_point - oldest_point).total_seconds()/60
    available_data = math.ceil(delta_min/binsizes[kline_size])
    if oldest_point == datetime.strptime('1 Jan 2017', '%d %b %Y'): print('Downloading all available %s data for %s. Be patient..!' % (kline_size, symbol))
    else: print('Downloading %d minutes of new data available for %s, i.e. %d instances of %s data.' % (delta_min, symbol, available_data, kline_size))
    klines = binance_client.get_historical_klines(symbol, kline_size, oldest_point.strftime("%d %b %Y %H:%M:%S"), newest_point.strftime("%d %b %Y %H:%M:%S"))
    data = pd.DataFrame(klines, columns = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'close_time', 'quote_av', 'trades', 'tb_base_av', 'tb_quote_av', 'ignore' ])
    data = data.drop(columns=['close_time', 'quote_av', 'trades', 'tb_base_av', 'tb_quote_av', 'ignore' ])
    data = data.astype({"Open": float, "High": float, "Low": float, "Close": float, "Volume": float,})

    data['Date'] = pd.to_datetime(data['Date'], unit='ms')
    if len(data_df) > 0:
        temp_df = pd.DataFrame(data)
        data_df = data_df.append(temp_df)
    else: data_df = data
    data_df.set_index('Date', inplace=True)
    print('All caught up..!')
    return data_df

##  YFINANCE FUNCTIONS

In [3]:
def fetch_yfinnace(stock, startDate, endDate, interval):
    df = yf.download(stock, start=startDate, end=endDate, interval=interval, group_by='tickers')
    df = df.drop(columns=['Adj Close'])
    return df

## FUNCTIONS

In [4]:
def resample_data(df, time, aggregate):
    return df.resample(time).apply(aggregate)

In [5]:
def create_label(df, src, timeframe=-1):
    df['Label'] = df[src].shift(timeframe)
    return df

## MAIN FUNCTION

In [6]:
def get_data(config):
    
    # PARAMETERS
    dataParams = config['data']
    processingParams = config['processing']
    
    # FETCH DATA FROM YFINANCE
    if(dataParams['useyfinance']):
        df = fetch_yfinnace(
            dataParams['yfinance']['stock'], 
            dataParams['yfinance']['start'], 
            dataParams['yfinance']['end'], 
            dataParams['yfinance']['interval']
        )
    # FETCH DATA FROM CRYPTO 
    else:
        df = get_all_binance(
            dataParams['binance']['coin'], 
            dataParams['binance']['interval']
        )
        
    # RESAMPLE DATA
    if(processingParams['resample']):
        df = resample_data(
            df,
            processingParams['sampling']['time'],
            processingParams['sampling']['aggregate']
        )
    # REMOVE NA VALUES - DAYS WITH NO TRADING
    df = df.dropna()
    
    
    # CREATE LABEL 
    df = create_label(
        df, 
        processingParams['label']['source'],
        processingParams['label']['shift']
    )
    
    return df