In [None]:
# Download Ta-Lib and yfinance librarys to the google colab
!wget http://prdownloads.sourceforge.net/ta-lib/ta-lib-0.4.0-src.tar.gz
!tar -xzvf ta-lib-0.4.0-src.tar.gz
%cd ta-lib
!./configure --prefix=/usr
!make
!make install
!pip install Ta-Lib
!pip install yfinance

In [None]:
# Imports
import os
import json
import pandas as pd
import yfinance as yf
from datetime import datetime
import pickle
import talib
from talib import abstract

In [None]:
# Load stocks list from initial_data.json
os.chdir("..")
with open("./initial_data.json") as f:
    initial_data = json.loads(f.read())
    
# Create stocks_structs folder if not exist
if not os.path.exists('stocks_structs'):
    os.makedirs('stocks_structs')

In [None]:
# Create dictionary of abstract techlinical analysis function
# Each of them will be calculated on the raw data and then be added to the df
class Indicators:
    @staticmethod
    def create_indicators_file():
        # Technical Indicator: [function, [tags]]
        indicators_dict =  {'sma': [abstract.SMA, ['sma']],
                            'ema': [abstract.EMA, ['ema']],
                            'bbands': [abstract.BBANDS, ['upperband', 'middleband', 'lowerband']],
                            'stoch': [abstract.STOCH, ['slowk', 'slowd']],
                            'macd': [abstract.MACD, ['macd', 'macdsignal', 'macdhist']],
                            'rsi': [abstract.RSI, ['rsi']],
                            'adx': [abstract.ADX, ['adx']],
                            'cci': [abstract.CCI, ['cci']],
                            'aroon': [abstract.AROON, ['aroondown', 'aroonup']]
                           }
        return indicators_dict

In [None]:
# Stock object struct, contains all the basic data on the stock
class Stock:
    def __init__(self, symbol, company, category, start):
        self.sym = symbol
        self.company = company
        self.category = category
        self.start = start
        self.last_update = datetime.today().strftime('%Y-%m-%d')
        self.classification = None
        self.technical_indicators = None
        self.raw_data = None
        self.extended_df = None

In [None]:
class DataDownloader:
    # Dictionary of all the TA-lib indicators to calculate
    indicators = Indicators.create_indicators_file()   
    
    @staticmethod
    def call(stock):
        # Call the query and analyze functions
        # Return today's date as last_update, raw data from yf and extended df (yf + TaLib)
        prices = DataDownloader.query(stock.sym)
        analyzed = DataDownloader.analyze(prices, stock.start)
        return datetime.today().strftime('%Y-%m-%d'), prices, analyzed
        
    @staticmethod
    def query(symbol):
        # Download the stock's dataset from yf
        data = yf.download(symbol)
        # Rename the columns names so TaLib can use it abstract functions
        data.rename(columns={'Open':'open', 'High':'high',
                             'Low':'low', 'Adj Close': 'close',
                             'Volume':'volume'}, inplace=True)
        # Remoce the Close column (there is already Adj Close)
        data.drop(['Close'], axis = 1, inplace = True) 
        
        return data

    @staticmethod
    def analyze(df, start):
        # The extended df should contain only relevant data for training
        # Remove row that are before the start date which is determineded in initial_data.json
        extended_df = df.loc[df.index > start] 
        
        # Calculate each technical indicator and add a column of it to the extended df 
        for indicator in DataDownloader.indicators.values():
            new_data = pd.DataFrame(indicator[0](df))  # Calc the current indicator (new column)
            
            # Tag the new columns with the indicator name
            columns = new_data.columns
            new_cols = {col:tag for col, tag in zip(columns, indicator[1])} 
            new_data = new_data.rename(columns=new_cols)
            
            # Add the talib new column to the extended_df
            extended_df = extended_df.join(new_data)

        # Drop Na rows
        extended_df = extended_df.dropna()

        return extended_df

In [None]:
if __name__ == "__main__":
    os.chdir("/content")
    stocks_list = []

    # Initiate Stock object for each stock in initial_data.json add add it to the list
    for key, value in initial_data.items():
        print(f"Create {key} Object")
        cur_stock = Stock(key, value["company_name"], value["category"], value["start_date"])
        stocks_list.append(cur_stock)

    # Download the data for each stock and calculate technical indicators for the df
    for stk in stocks_list:
        print(f"Download {stk.sym} Data")
        stk.last_update, stk.raw_data, stk.extended_df = DataDownloader.call(stk)

        # Save the object as .stk file
        with open(f'./stocks_structs/{stk.sym}.stk', 'wb') as handle:
            pickle.dump(stk, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
# Zip all the .stk files in order to download it from colab to the local computer
import shutil
shutil.make_archive('stocks_structs', 'zip', 'stocks_structs')