<a href="https://colab.research.google.com/github/helmieresearch/helmieresearch/blob/main/Zipline_Reloaded_BacktestEngine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🧰**INSTALL MODULES**

In [None]:
# Mount Google Drive 
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Install ta-lib v0.4.0
%%bash
wget http://prdownloads.sourceforge.net/ta-lib/ta-lib-0.4.0-src.tar.gz
tar -xzf ta-lib-0.4.0-src.tar.gz
cd ta-lib/
./configure
make
make install

In [None]:
# Install zipline
%pip install zipline-reloaded

In [None]:
!pip install iso3166==2.0.2

In [None]:
# Install Pyfolio
!pip install pyfolio-reloaded

In [None]:
# Install matplot library
!pip install matplotlib

In [None]:
# Install data bundle 'Quandl'
!pip install quandl

In [None]:
import zipline
zipline.__version__

In [None]:
!pip install nasdaq-data-link

# 💽**SET WORKING DIRECTORY**

In [None]:
import os 

# Set your working directory to a folder in your Google Drive. This way, if your notebook times out,
# your files will be saved in your Google Drive!

# the base Google Drive directory
root_dir = "/content/drive/"


In [None]:
import os 

# Set your working directory to a folder in your Google Drive. This way, if your notebook times out,
# your files will be saved in your Google Drive!


# choose where you want your project files to be saved
project_folder = "MyDrive/Colab Notebooks/My Project Folder"


In [None]:
def create_and_set_working_directory(project_folder):
  # check if your project folder exists. if not, it will be created.
  if os.path.isdir(root_dir + project_folder) == False:
    os.mkdir(root_dir + project_folder)
    print(root_dir + project_folder + ' did not exist but was created.')

  # change the OS to use your project folder as the working directory
  os.chdir(root_dir + project_folder)

  # create a test file to make sure it shows up in the right place
  !touch 'new_file_in_working_directory.txt'
  print('\nYour working directory was changed to ' + root_dir + project_folder + \
        "\n\nAn empty text file was created there. You can also run !pwd to confirm the current working directory." )

create_and_set_working_directory(project_folder)

In [None]:
# Confirm current working directory
!pwd

In [None]:
import os
for f in os.listdir("//content/drive/MyDrive/Colab Notebooks/My Project Folder"):
	print(f)

# ⏳**INGEST DATA**

In [None]:
# Ingest custom bundle
!zipline ingest --bundle 'crypto'

In [None]:
# Ingest custom bundle
!zipline ingest --bundle 'equities_csvdir'

In [None]:
# Ingest custom bundle
!zipline ingest --bundle 'random_futures_data'

In [None]:
# Ingest custom bundle
!zipline ingest --bundle 'random_stock_data'

In [None]:
'''
INGEST QUANDL STOCK BUNDLE
'''
# Ingest bundle API
!QUANDL_API_KEY=KUnssHvVERHb5XYu9C1- zipline ingest -b 'quandl'

In [None]:
# Confirm existing bundles
!zipline bundles

In [None]:
# clean everything older than <date>
!zipline clean -b crypto --after 2022-04-13

# 📊**ANDREAS CLENOW MODELS**

## **Clenow Momentum Model**

---


In [None]:
#@title
%matplotlib inline

import zipline
from zipline import run_algorithm
from zipline.api import order_target_percent, symbol, set_commission, set_slippage, schedule_function, date_rules, time_rules
from pandas import Timestamp
from datetime import datetime
import pytz
import matplotlib.pyplot as plt 
import pyfolio as pf
import pandas as pd 
import numpy as np 
from scipy import stats
from zipline.finance.commission import PerDollar
from zipline.finance.slippage import VolumeShareSlippage, FixedSlippage

#Model Settings

intial_portfolio = 100000
momentum_window = 125
minimum_momentum = 40
portfolio_size = 30
vola_window = 20

#Commission and Slippage Settings

enable_commission = True 
commission_pct = 0.001 
enable_slippage = True 
slippage_volume_limit = 0.025
slippage_impact = 0.05

def momentum_score(ts):

  #Input: Price time series.Output: Annualized exponential regression slope, multiplied by the R2

  # Make a list of consecutive numbers 
  x = np.arange(len(ts))
  # Get logs
  log_ts = np.log(ts)
  # Calculate regression values
  slope, intercept, r_value, p_value, std_err = stats.linregress(x, log_ts) 
  # Annualize percent
  annualized_slope = (np.power(np.exp(slope), 252) - 1) * 100 
  #Adjust for fitness
  score = annualized_slope * (r_value ** 2) 
  return score

def volatility(ts):
  return ts.pct_change().rolling(vola_window).std().iloc[-1]

def output_progress(context):

  #Output some performance numbers during backtest run 
  #This code just prints out the past month's performance,
  # so that we have something to look at while the backtest runs.

  # Get today's date
  today = zipline.api.get_datetime().date()

  # Calculate percent difference since last month
  perf_pct = (context.portfolio.portfolio_value / context.last_month) - 1

  # Print performance, format as percent with two decimals. 
  print("{} - Last Month Result: {:.2%}".format(today, perf_pct))

  # Remember today's portfolio value for next month's calculation 
  context.last_month = context.portfolio.portfolio_value

#Initialization and trading logic


def initialize(context):

  # Set commission and slippage.
  if enable_commission:
    comm_model = PerDollar(cost=commission_pct) 
  else:
    comm_model = PerDollar(cost=0.0) 
  set_commission(comm_model)
  if enable_slippage: slippage_model=VolumeShareSlippage(volume_limit=slippage_volume_limit,
price_impact=slippage_impact) 
  else:
    slippage_model=FixedSlippage(spread=0.0) 
  set_slippage(slippage_model)

  # Used only for progress output. 
  context.last_month = intial_portfolio

  # Store index membership
  context.index_members = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/My Project Folder/index members/sp500.csv', engine='python', error_bad_lines=False)


  #Schedule rebalance monthly. 
  schedule_function(
    func=rebalance, 
    date_rule=date_rules.month_start(), 
    time_rule=time_rules.market_open()
  )

def rebalance(context, data):
  # Write some progress output during the backtest 
  output_progress(context)

  # First, get today's date
  today = zipline.api.get_datetime().date()

  # Second, get the index makeup for all days prior to today.
  all_prior = context.index_members.loc[context.index_members.index < today]

  todays_universe = [ 
    symbol(ticker) for ticker in
    context.index_members.loc[context.index_members.index < today].iloc[-1,0].split(',')
  ]

  # Get historical data
  hist = data.history(todays_universe, "close", momentum_window, "1d")

  # Make momentum ranking table
  ranking_table = hist.apply(momentum_score).sort_values(ascending=False)

  #Sell Logic
  #First we check if any existing position should be sold.
    #Sell if stock is no longer part of index.
    #Sell if stock has too low momentum value.

  kept_positions = list(context.portfolio.positions.keys()) 
  for security in context.portfolio.positions:
    if (security not in todays_universe): 
      order_target_percent(security, 0.0) 
      kept_positions.remove(security)
    elif ranking_table[security] < minimum_momentum: 
      order_target_percent(security, 0.0) 
      kept_positions.remove(security)

  #Stock Selection Logic
  #Check how many stocks we are keeping from last month.
  #Fill from top of ranking list, until we reach the desired total number of portfolio holdings.

  replacement_stocks = portfolio_size - len(kept_positions) 
  buy_list = ranking_table.loc[
    ~ranking_table.index.isin(kept_positions)][:replacement_stocks]
  new_portfolio = pd.concat(
    (buy_list,
    ranking_table.loc[ranking_table.index.isin(kept_positions)])
  )
  buy_list = ranking_table.loc[
    ~ranking_table.index.isin(kept_positions)][:replacement_stocks]

  #Calculate inverse volatility for stocks, and make target position weights.

  vola_table = hist[new_portfolio.index].apply(volatility) 
  inv_vola_table = 1 / vola_table
  sum_inv_vola = np.sum(inv_vola_table) 
  vola_target_weights = inv_vola_table / sum_inv_vola
  for security, rank in new_portfolio.iteritems(): 
    weight = vola_target_weights[security]
    if security in kept_positions:
      order_target_percent(security, weight)
    else:
      if ranking_table[security] > minimum_momentum: 
        order_target_percent(security, weight)

def analyze(context, perf):
  perf['max'] = perf.portfolio_value.cummax() 
  perf['dd'] = (perf.portfolio_value / perf['max']) - 1 
  maxdd = perf['dd'].min()

  ann_ret = (np.power((perf.portfolio_value.iloc[-1] / perf.portfolio_value.iloc[0]),(252 / len(perf)))) - 1

  print("Annualized Return: {:.2%} Max Drawdown: {:.2%}".format(ann_ret, maxdd))

  return
'''
start = datetime(1997, 1, 1, 8, 15, 12, 0, pytz.UTC)
end = datetime(2018, 12, 31, 8, 15, 12, 0, pytz.UTC)
'''

start = pd.Timestamp('1997-1-1', tz='utc')
end = pd.Timestamp('2018-12-31', tz='utc')

perf = zipline.run_algorithm(start=start, end=end, 
                             initialize=initialize, 
                             analyze=analyze, 
                             capital_base=intial_portfolio, 
                             data_frequency = 'daily', 
                             bundle='quandl' )

## **Clenow Trend Model**

In [None]:

%matplotlib inline

import zipline
from zipline.api import future_symbol,  \
    set_commission, set_slippage, schedule_function, date_rules, \
    time_rules, continuous_future, order_target
from pandas import Timestamp
import pytz
import datetime as datetime
import matplotlib.pyplot as plt
import pyfolio as pf
import pandas as pd
import numpy as np  
from zipline.finance.commission import PerTrade, PerContract
from zipline.finance.slippage import VolumeShareSlippage, \
    FixedSlippage, VolatilityVolumeShare

# These lines are for the dynamic text reporting
from IPython.display import display
import ipywidgets as widgets
out = widgets.HTML()
display(out)

"""
Model Settings
"""
starting_portfolio = 50000000
risk_factor = 0.0015
stop_distance = 3
breakout_window = 50
vola_window = 40
slow_ma = 80
fast_ma = 40
enable_commission = True
enable_slippage = True  


def report_result(context, data):
    context.months += 1
    today = zipline.api.get_datetime().date()
    # Calculate annualized return so far
    ann_ret = np.power(context.portfolio.portfolio_value / starting_portfolio, 
                   12 / context.months) - 1
    
    # Update the text
    out.value = """{} We have traded <b>{}</b> months 
    and the annualized return is <b>{:.2%}</b>""".format(today, context.months, ann_ret)

def roll_futures(context, data):
    open_orders = zipline.api.get_open_orders()
    
    for held_contract in context.portfolio.positions:
        # don't roll positions that are set to change by core logic
        if held_contract in open_orders: 
            continue
        
        # Save some time by only checking rolls for
        # contracts stopping trading in the next days
        days_to_auto_close = (
            held_contract.auto_close_date.date() - data.current_session.date()
        ).days
        if days_to_auto_close > 5:
            continue        
        
        # Make a continuation
        continuation = continuous_future(
                held_contract.root_symbol, 
                offset=0, 
                roll='volume', 
                adjustment='mul'
                )
        
        # Get the current contract of the continuation
        continuation_contract = data.current(continuation, 'contract')
        
        if continuation_contract != held_contract:
            # Check how many contracts we hold
            pos_size = context.portfolio.positions[held_contract].amount         
            # Close current position
            order_target(held_contract, 0)
            # Open new position
            order_target(continuation_contract, pos_size)     
            
def position_size(portfolio_value, std, point_value):
    target_variation = portfolio_value * risk_factor
    contract_variation = std * point_value
    contracts = target_variation / contract_variation
    return int(np.nan_to_num(contracts)) 
    
def initialize(context):
    
    """
    Cost Settings
    """
    if enable_commission:
        comm_model = PerContract(cost=0.85, exchange_fee=1.5)
    else:
        comm_model = PerTrade(cost=0.0)
        
    set_commission(us_futures=comm_model)
    
    if enable_slippage:
        slippage_model=VolatilityVolumeShare(volume_limit=0.2)
    else:
        slippage_model=FixedSlippage(spread=0.0)      
        
    set_slippage(us_futures=slippage_model)
    
    """
    Markets to trade
    """ 
    currencies = [
        'AD',
        'BP',
        'CD',
        'CU',
        'DX',
        'JY',
        'NE',
        'SF',
    ]
    
    agricultural = [
        '_C',
        'CT',
        'FC',
        'KC',
        'LR',
        'LS',
        '_O',
        '_S',
        'SB',
        'SM',
        '_W',
    ]
    nonagricultural = [
        'CL',
        'GC',
        'HG',
        'HO',
        'LG',
        'NG',
        'PA',
        'PL',
        'RB',
        'SI',
    ]
    equities = [
        'ES',
        'NK',
        'NQ',
        'TW',
        'VX',
        'YM',
    ]
    rates = [
        'ED',
        'FV',
        'TU',
        'TY',
        'US',
    ]
    
    # Make a list of all the markets
    markets = currencies + agricultural + nonagricultural + equities + rates
    
    # Make a list of all continuations
    context.universe = [
        continuous_future(market, offset=0, roll='volume', adjustment='mul')
            for market in markets
    ]
    
    # We'll use these to keep track of best position reading
    # Used to calculate stop points.
    context.highest_in_position = {market: 0 for market in markets} 
    context.lowest_in_position = {market: 0 for market in markets}    
    
    # Schedule the daily trading
    schedule_function(daily_trade, date_rules.every_day(), time_rules.market_close())
    
    # We'll just use this for the progress output
    # during the backtest. Doesn't impact anything.
    context.months = 0    
    
    # Schedule monthly report output
    schedule_function(
        func=report_result,
        date_rule=date_rules.month_start(),
        time_rule=time_rules.market_open()
    ) 
    
def analyze(context, perf):
    returns, positions, transactions = pf.utils.extract_rets_pos_txn_from_zipline(perf)
    pf.create_returns_tear_sheet(returns, benchmark_rets=None)
    
def daily_trade(context, data):
    # Get continuation data
    hist = data.history(
        context.universe, 
        fields=['close','volume'], 
        frequency='1d', 
        bar_count=250,
    )
    
    # Calculate trend
    hist['trend'] = hist['close'].ewm(span=fast_ma).mean() > hist['close'].ewm(span=slow_ma).mean()    
    
    # Make dictionary of open positions
    open_pos = {
        pos.root_symbol: pos 
        for pos in context.portfolio.positions
    } 
    
    # Iterate markets, check for trades
    for continuation in context.universe:
        
        # Get root symbol of continuation
        root = continuation.root_symbol
        
        # Slice off history for just this market
        h = hist.xs(continuation, 2)
        
        # Get standard deviation
        std = h.close.diff()[-vola_window:].std()

        if root in open_pos: # Position is open

            # Get position
            p = context.portfolio.positions[open_pos[root]]
            
            if p.amount > 0: # Position is long
                if context.highest_in_position[root] == 0: # First day holding the position
                    context.highest_in_position[root] = p.cost_basis
                else:
                    context.highest_in_position[root] = max(
                        h['close'].iloc[-1], context.highest_in_position[root]
                    ) 
                    
                # Calculate stop point
                stop = context.highest_in_position[root] - (std  * stop_distance)
                # Check if stop is hit
                if h.iloc[-1]['close'] < stop:
                    contract = open_pos[root]
                    order_target(contract, 0)
                    context.highest_in_position[root] = 0
                # Check if trend has flipped
                elif h['trend'].iloc[-1] == False:
                    contract = open_pos[root]
                    order_target(contract, 0)
                    context.highest_in_position[root] = 0
                    
            else: # Position is short
                if context.lowest_in_position[root] == 0: # First day holding the position
                    context.lowest_in_position[root] = p.cost_basis
                else:
                    context.lowest_in_position[root] = min(
                        h['close'].iloc[-1], context.lowest_in_position[root]
                    )
                
                # Calculate stop point
                stop = context.lowest_in_position[root] + (std  * stop_distance)
                
                # Check if stop is hit
                if h.iloc[-1]['close'] > stop:
                    contract = open_pos[root]
                    order_target(contract, 0)
                    context.lowest_in_position[root] = 0
                # Check if trend has flipped
                elif h['trend'].iloc[-1] == True:
                    contract = open_pos[root]
                    order_target(contract, 0)
                    context.lowest_in_position[root] = 0                         
        
        else: # No position on
            if h['trend'].iloc[-1]: # Bull trend
                # Check if we just made a new high
                if h['close'][-1] == h[-breakout_window:]['close'].max(): 
                    contract = data.current(continuation, 'contract')

                    contracts_to_trade = position_size( \
                                                       context.portfolio.portfolio_value, \
                                                       std, \
                                                       contract.price_multiplier)
                    
                    # Limit size to 20% of avg. daily volume
                    contracts_cap = int(h['volume'][-20:].mean() * 0.2)
                    contracts_to_trade = min(contracts_to_trade, contracts_cap)
                    
                    # Place the order
                    order_target(contract, contracts_to_trade)
             
            else: # Bear trend
                # Check if we just made a new low
                if h['close'][-1] == h[-breakout_window:]['close'].min(): 
                    contract = data.current(continuation, 'contract')

                    contracts_to_trade = position_size( \
                                                       context.portfolio.portfolio_value, \
                                                       std, \
                                                       contract.price_multiplier)
                    
                    # Limit size to 20% of avg. daily volume
                    contracts_cap = int(h['volume'][-20:].mean() * 0.2)
                    contracts_to_trade = min(contracts_to_trade, contracts_cap)
                    
                    # Place the order
                    order_target(contract, -1 * contracts_to_trade)
    
    # If we have open positions, check for rolls
    if len(open_pos) > 0:   
        roll_futures(context, data)                
                        

start = pd.Timestamp('2003-01-01', tz='utc')
end = pd.Timestamp('2017-12-31', tz='utc')

perf = zipline.run_algorithm(
    start=start, end=end, 
    initialize=initialize, 
    analyze=analyze,
    capital_base=starting_portfolio,  
    data_frequency = 'daily', 
    bundle='random_futures_data' ) 




### 💻**MODEL TESTING**

#**DIGITAL ASSETS PROGRAMME**



## **Systems**

---



### Single Asset Backtest

In [None]:
# This ensures that our graphs will be shown properly in the notebook.
%matplotlib inline

# Import libraries
import zipline
from zipline import run_algorithm
from zipline.api import order_target_percent, symbol
from datetime import datetime
import pytz
import matplotlib.pyplot as plt
import pandas as pd

def initialize(context):
  # Which asset to trade
  context.asset = symbol('btc')
  
  # Moving average window
  context.index_average_window = 100

def handle_data(context, data):
  # Request history for the stock
  btc_hist = data.history(context.asset, "close",
                               context.index_average_window, "1d")
  
  # Check if price is above moving average
  if btc_hist[-1] > btc_hist.mean():
    asset_weight = 1.0
  else:
    asset_weight = 0.0
    # Place order
    order_target_percent(context.asset, asset_weight)

def analyze(context, perf):
  
  fig = plt.figure(figsize=(12, 8))
  
    # First char
  ax = fig.add_subplot(311)
  ax.set_title('Strategy Results')
  ax.semilogy(perf['portfolio_value'], linestyle='-',
              label='Equity Curve', linewidth=3.0)
  ax.legend()
  ax.grid(False)

  # Second chart
  ax = fig.add_subplot(312)
  ax.plot(perf['gross_leverage'],
  label='Exposure', linestyle='-', linewidth=1.0)
  ax.legend()
  ax.grid(True)

  # Third chart
  ax = fig.add_subplot(313)
  ax.plot(perf['returns'], label='Returns', linestyle='-.',
          linewidth=1.0)

### Equal Weight Model

In [None]:
# This ensures that our graphs will be shown properly in the notebook.
%matplotlib inline

# Import a few libraries we need
from zipline import run_algorithm
from zipline.api import order_target_percent, record, symbol, set_benchmark
import pyfolio as pf
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

def initialize(context):

  # Which stock to trade

  """
  dji = ["JEC","BBY","MSFT","MCHP","PEP",
         "RAD","PTC","GCO","FAST","CTL",
         "APA","EL","TMK","VVI","HPQ",
         "CMCSA","JCI","T"]

  dji = ["aave","ada","algo","alpha","ant",
         "bal","bat","bch","bnb","bsv",
         "btc"]
  """

  dji = ["btc"]


  # Make a list of symbols from the list of tickers
  context.dji_symbols = [symbol(s) for s in dji]

  # Moving average window
  context.index_average_window = 52
  
def handle_data(context, data):
  
  # Get history for all the stocks
  stock_hist = data.history(context.dji_symbols, "close", context.index_average_window, "1d")

  # Make an empty DataFrame to start with
  stock_analytics = pd.DataFrame()

  # Add column for above or below average
  stock_analytics['above_mean'] = stock_hist.iloc[-1] > stock_hist.mean()

  # Set weight for stocks to buy
  stock_analytics.loc[stock_analytics['above_mean'] == True, 'weight'] = 1/len(context.dji_symbols)

  # Set weight to zero for the rest
  stock_analytics.loc[stock_analytics['above_mean'] == False, 'weight'] = 0.0

  # Iterate each row and place trades
  for stock, analytics in stock_analytics.iterrows():

    # Check if the stock can be traded
    if data.can_trade(stock):

      # Place the trade
      order_target_percent(stock, analytics['weight'])

def analyze(context, perf):

  # Use PyFolio to generate a performance report
  returns, positions, transactions = pf.utils.extract_rets_pos_txn_from_zipline(perf)

  benchmark_period_return = perf['benchmark_period_return']

  daily_benchmark_returns = np.exp(np.log(benchmark_period_return + 1.0).diff()) - 1

  # Create tear sheet
  pf.create_full_tear_sheet(returns, positions=positions, transactions=transactions, benchmark_rets=None)

"""

def analyze(context, perf):

  fig = plt.figure(figsize=(12, 8))

  # First chart
  ax = fig.add_subplot(311)
  ax.set_title('Strategy Results')
  ax.plot(perf['portfolio_value'], linestyle='-',
          label='Equity Curve', linewidth=3.0)
  ax.legend()
  ax.grid(False)

  # Second chart
  ax = fig.add_subplot(312)
  ax.plot(perf['gross_leverage'],label='Exposure',
          linestyle='-', linewidth=1.0)
  ax.legend()
  ax.grid(True)
  
  # Third chart
  ax = fig.add_subplot(313)
  ax.plot(perf['returns'], label='Returns', linestyle='-.',
          linewidth=1.0)
  ax.legend()
  ax.grid(True)

  """

# Set start and end date
start_date = pd.Timestamp('2016-05-02', tz='utc')
end_date = pd.Timestamp('2020-05-02', tz='utc')

# Fire off the backtest
perf = run_algorithm(
start=start_date,
end=end_date,
initialize=initialize,
analyze=analyze,
handle_data=handle_data,
capital_base=10000,
data_frequency = 'daily', 
bundle= 'crypto',)

In [None]:
#Export performance results to disk in csv file

perf.portfolio_value.to_csv('ewm_momentum_model.csv')

### Momentum Model

In [None]:
#@title

%matplotlib inline

import zipline
from zipline import run_algorithm
from zipline.api import order_target_percent, symbol, set_commission, set_slippage, schedule_function, date_rules, time_rules
import matplotlib.pyplot as plt 
import pyfolio as pf
import pandas as pd 
import numpy as np
from scipy import stats
import warnings
warnings.filterwarnings('ignore')
from zipline.finance.commission import PerDollar
from zipline.finance.slippage import VolumeShareSlippage, FixedSlippage


initial_portfolio = 10000
minimum_momentum = 10
portfolio_size = 1
vola_window = 30

def momentum_score(ts):

  #Input: Price time series.Output: Annualized exponential regression slope, multiplied by the R2

  # Make a list of consecutive numbers 
  x = np.arange(len(ts))
  # Get logs
  log_ts = np.log(ts)
  # Calculate regression values
  slope, intercept, r_value, p_value, std_err = stats.linregress(x, log_ts) 
  # Annualize percent
  annualized_slope = (np.power(np.exp(slope), 252) - 1) * 100 
  #Adjust for fitness
  score = annualized_slope * (r_value ** 2) 
  return score

def volatility(ts):
  return ts.pct_change().rolling(vola_window).std().iloc[-1]


#Initialization and trading logic


def initialize(context):

  context.rolling_window = 100

  #Commission and Slippage Settings

  enable_commission = True 
  commission_pct = 0.001 
  enable_slippage = True 
  slippage_volume_limit = 0.025
  slippage_impact = 0.05

  """
  dji = ["AAVE","ADA","ALPHA","BCH","BTC",
          "DOGE","DOT","ETH","LTC","USDT",
          "XLM","XMR","XRP"]
  """
  dji = ["btc","ltc","eth"]

  
  # Make a list of symbols from the list of tickers
  context.dji_symbols = [symbol(s) for s in dji]

  # Set commission and slippage.
  if enable_commission:
    comm_model = PerDollar(cost=commission_pct) 
  else:
    comm_model = PerDollar(cost=0.0) 
  set_commission(comm_model)
  if enable_slippage: slippage_model=VolumeShareSlippage(volume_limit=slippage_volume_limit,
price_impact=slippage_impact) 
  else:
    slippage_model=FixedSlippage(spread=0.0) 
  set_slippage(slippage_model)


  #Schedule rebalance monthly. 
  schedule_function(
    func=rebalance, 
    date_rule=date_rules.month_start(), 
    time_rule=time_rules.market_open()
  )

def rebalance(context, data):

  # Get historical data
  hist = data.history(context.dji_symbols, "close", context.rolling_window, "1d")

  # Make momentum ranking table
  ranking_table = hist.apply(momentum_score).sort_values(ascending=False)

  #Sell Logic
  #First we check if any existing position should be sold.
    #Sell if stock is no longer part of index.
    #Sell if stock has too low momentum value.

  kept_positions = list(context.portfolio.positions.keys()) 
  for security in context.portfolio.positions:
    if ranking_table[security] < minimum_momentum: 
      order_target_percent(security, 0.0)
      kept_positions.remove(security)
 
  #Stock Selection Logic
  #Check how many stocks we are keeping from last month.
  #Fill from top of ranking list, until we reach the desired total number of portfolio holdings.

  replacement_stocks = portfolio_size - len(kept_positions) 
  buy_list = ranking_table.loc[
    ~ranking_table.index.isin(kept_positions)][:replacement_stocks]
  new_portfolio = pd.concat(
    (buy_list,
    ranking_table.loc[ranking_table.index.isin(kept_positions)])
  )
  buy_list = ranking_table.loc[
    ~ranking_table.index.isin(kept_positions)][:replacement_stocks]

  #Calculate inverse volatility for stocks, and make target position weights.

  vola_table = hist[new_portfolio.index].apply(volatility) 
  inv_vola_table = 1 / vola_table
  sum_inv_vola = np.sum(inv_vola_table) 
  vola_target_weights = inv_vola_table / sum_inv_vola
  for security, rank in new_portfolio.iteritems(): 
    weight = vola_target_weights[security]
    if security in kept_positions:
      order_target_percent(security, weight)
    else:
      if ranking_table[security] > minimum_momentum: 
        order_target_percent(security, weight)


def analyze(context, perf):
    
  # Use PyFolio to generate a performance report
  returns, positions, transactions = pf.utils.extract_rets_pos_txn_from_zipline(perf)

  benchmark_period_return = perf['benchmark_period_return']

  daily_benchmark_returns = np.exp(np.log(benchmark_period_return + 1.0).diff()) - 1

  # Create tear sheet
  pf.create_full_tear_sheet(returns, positions=positions, transactions=transactions, benchmark_rets=None)

start= pd.Timestamp('2015-5-1', tz='utc')
end = pd.Timestamp('2022-5-2', tz='utc')

perf = zipline.run_algorithm(start=start, 
                             end=end, 
                             initialize=initialize, 
                             analyze=analyze, 
                             capital_base=initial_portfolio, 
                             data_frequency='daily',
                             bundle='crypto')

In [None]:
#Export Performance Result to disk in csv file

perf.portfolio_value.to_csv('crypto_momentum.csv')

### **Model Performance**

# **DATA CLEANING**



## Futures

In [None]:
# Import modules
import nasdaqdatalink
import quandl
# Get the data for Futures, Continuous Contract #6.
import matplotlib.pyplot as plt
data = quandl.get("CHRIS/CME_YM1",start_date="2017-12-03", end_date="2018-12-03", api_key='WDDHaLh3eG6vrEgiCCqy')

In [None]:
# Plot the prices
data.Settle.plot()
plt.show()

In [None]:
#import required libraries
import pandas as pd
from datetime import datetime
import numpy as np

data = data.rename(columns={'Open': 'open','High':'high',
                                'Low':'low','Last':'close',
                                'Volume':'volume','Previous Day Open Interest': 'openinterest'}, index=None)

data['expiration_date'] = '2018-12-21'
data['root_symbol'] = 'YM'
data['symbol'] = 'YMZ18'

new_data = data.drop(['Change', 'Settle',], axis=1)

print(new_data.head())



In [None]:
new_data.to_csv('/content/drive/MyDrive/Colab Notebooks/My Project Folder/Futures/YMZ18.csv')

## Digital assets

### Adjusting model performace csv data for comparison

In [None]:
import pandas as pd
import numpy as np

A = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/My Project Folder/Backtests/Raw Model Performance Data/Digital Assets/crypto_momentum.csv', parse_dates=True, index_col=0)

A.index = A.index.strftime('%Y/%m/%d')

header_row = 1

A.columns = A.iloc[header_row]

A

In [None]:
A.info()

In [None]:

A.to_csv('/content/drive/MyDrive/Colab Notebooks/My Project Folder/Backtests/crypto_momentum_2.csv')


### Adjusting crypto indices csv data for comparison

In [None]:
#import required libraries
import pandas as pd
from datetime import datetime
import exchange_calendars as xcals
from zipline import get_calendar
import numpy as np

df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/My Project Folder/crypto/indices/SPCBDM.csv', encoding='cp1252')
df.info()
print(df)

In [None]:

df.drop([1355],axis=0, inplace = True )

#Convert 'date' Column to datetime
df['2/28/2017'] = pd.to_datetime(df['2/28/2017'], utc=True)

#Set 'date' column as index
df.set_index('2/28/2017', inplace=True)

# Get all expected trading sessions in new dataframe.
sessions = get_calendar('NYSE').sessions_in_range('2017-02-28', '2022-05-05')

df.index = df.index.strftime('%Y/%m/%d')

header_row = 1

df.columns = df.iloc[header_row]

df.tail(5)
df.info()
df

In [None]:
df = df[~df.index.duplicated()]
df

In [None]:
df.to_csv('/content/drive/MyDrive/Colab Notebooks/My Project Folder/Backtests/SPCBDM_test3.csv')

### Removing weekends from timeseries

In [None]:
#@title
#Import statements
%matplotlib inline
import pandas as pd
import datetime as dt
import numpy as np

#Load csv file from disc
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/My Project Folder/crypto/crypto_data/DOGE.csv')

#Set date column to datetime
df['time'] = pd.to_datetime(df['time'], errors='coerce')

#Remove weekends from data
df = df[df.time.dt.weekday < 5]

#Set 'date' columns as Index
df.set_index("time", inplace = True)

#display weekday dataframe
df


In [None]:
#inspect dataframe
df.info()

In [None]:
#Save dataframe to csv

df.to_csv('/content/drive/MyDrive/Colab Notebooks/My Project Folder/crypto/crypto weekday data/DODGE_weekday.csv')

In [None]:
#Load final data to file to dataframe
dodge = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/My Project Folder/crypto/crypto weekday data/DODGE_weekday.csv')

print(dodge)



In [None]:
#Remove weekends from dataframe
btc = usholidays[usholidays.date.dt.weekday < 5]

#Set 'date' column as index
btc.set_index('date', inplace = True)

btc

In [None]:
#import US holiday calender
from pandas.tseries.holiday import USFederalHolidayCalendar as calendar

#Set period between start and end date in dataframe to identify and remove holidays
holidays = calendar().holidays(start='2009-01-05', end='2022-05-24') 
m = raw_data['date'].isin(holidays)
usholidays = raw_data[~m].copy()

#print new dataframe with holidays removed
usholidays

### Adjusting crypto data from **Yahoo Finance** for testing with NYSE trading calender.

In [None]:
#import required libraries
import pandas as pd
from datetime import datetime
import exchange_calendars as xcals
from zipline import get_calendar
import numpy as np

In [None]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/My Project Folder/crypto/yahoo_data/yahoo raw data/BTC-USD.csv', parse_dates=True, index_col=0)

# Remove Adj Close Column
new_df = df.drop(['Adj Close'], axis=1)

new_df["Volume"] = new_df["Volume"].astype(float)

#Reset index
new_df.reset_index(inplace = True)

#Convert 'date' Column to datetime
new_df['Date'] = pd.to_datetime(new_df['Date'], utc=True)

#Set 'date' column as index
new_df.set_index('Date', inplace=True)

# Get all expected trading sessions in new dataframe.
sessions = get_calendar('NYSE').sessions_in_range('2009-01-05', '2022-07-07')

# To set the trading session in  new dataframe to the NYSE Calender
btc = new_df.reindex(sessions)

#Reset index again to change the index name
btc.reset_index(inplace = True)

#Rename index column to 'date'
btc = btc.rename(columns={'index': 'trading_date'}, index=None)

#Change date format to Year-Month-Day
btc['trading_date'] =  pd.to_datetime(btc['trading_date']).dt.strftime('%Y-%m-%d')

crypto = btc.dropna()

#Change 'PriceUSD' columns to 'close'
crypto.rename(columns = {'Open':'open',
                      'High':'high',
                      'Low':'low',
                      'Close':'close',
                      'Volume':'volume'}, inplace = True)

crypto.set_index('trading_date', inplace=True)

crypto.info()

crypto.to_csv('/content/drive/MyDrive/Colab Notebooks/My Project Folder/crypto/yahoo_data/yahoo cleaned data/btc.csv')

crypto

### Adjusting crypto data from **Coinmetrics** for testing with NYSE trading calender.

In [None]:
#import required libraries
import pandas as pd
from datetime import datetime
import exchange_calendars as xcals
from zipline import get_calendar
import numpy as np

#load raw data from file
raw_data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/My Project Folder/crypto/coinmetrics_data/btc.csv', parse_dates=True, index_col=0)

new = raw_data.drop(raw_data.loc[:, :'PriceBTC'].columns, axis=1)


#drop columns not needed. Only closing prices required
new.drop(new.iloc[:, 1:76], inplace = True, axis = 1)

#Add new columns
new['high'] =0
new['low'] =0
new['open'] =0
new['volume'] =0

# Rearrange columns to set timestamp as first column
new = new[['high','open','low','PriceUSD','volume']]

#Change 'PriceUSD' columns to 'close'
new.rename(columns = {'PriceUSD':'close'}, inplace = True)

"""
#Drop row (date) not needed
new=new.drop(['2022-05-25'])
"""

#Fill 'close' column with integer values '0'
new["close"].fillna(0, inplace=True)

#Reset index
new.reset_index(inplace = True)

#Rename time columns to date
new = new.rename(columns={'time': 'date'}, index=None)

#Drop all NaN values from dataframe
new.dropna()

#Convert columns datatype from integer to float
new["high"] = new["high"].astype(float)
new["open"] = new["open"].astype(float)
new["low"] = new["low"].astype(float)
new["volume"] = new["volume"].astype(float)

#Convert 'date' Column to datetime
new['date'] = pd.to_datetime(new['date'], utc=True)

#Set 'date' column as index
new.set_index('date', inplace=True)

# Get all expected trading sessions in new dataframe.
sessions = get_calendar('NYSE').sessions_in_range('2009-01-05', '2022-05-24')

# To set the trading session in  new dataframe to the NYSE Calender
btc = new.reindex(sessions)

#Reset index again to change the index name
btc.reset_index(inplace = True)

#Rename index column to 'date'
btc = btc.rename(columns={'index': 'date'}, index=None)

#Change date format to Year-Month-Day
btc['date'] =  pd.to_datetime(btc['date']).dt.strftime('%Y-%m-%d')

"""
btc.set_index('date', inplace=True)
"""

# Remove two columns name is 'C' and 'D'
df = btc.drop(['high', 'open','low'], axis=1)

df['open'] = df['close'] - (0 * df ['close'])
df['high'] = df['open'] - (0 * df ['open'])
df['low'] = df['high'] - (0 * df ['high'])

# Rearrange columns
df = df[['date','open','high','low','close','volume']]

df.set_index('date', inplace=True)

crypto = df.dropna()

crypto.to_csv('/content/drive/MyDrive/Colab Notebooks/My Project Folder/crypto/final crypto bundle/btc.csv')



## Exploratory Data Analyses (EDA)





In [None]:
!pip install mpld3

In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
from scipy import stats
import seaborn as sn
import matplotlib.pyplot as plt
"""
import mpld3
mpld3.enable_notebook()
"""
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', 2)

In [None]:
B = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/My Project Folder/random_futures/YMZ18.csv', parse_dates=True, index_col=0)

B.info()

print(B.head(5))

In [None]:
new_data.info()

print(new_data.head(5))

In [None]:
pd.set_option('display.max_rows', None)

B.describe()

In [None]:
pd.set_option('display.max_rows', None)

new_data.describe()

In [None]:
print(B)
print(new_data)

In [None]:
"""
B['Open'].plot(figsize=(12,6), 
                  linestyle='--',color='black',
                  legend='Open')

B['High'].plot(figsize=(12,6),
                   linestyle='-',color='grey',
                   legend='High')
"""
B['close'].plot(figsize=(12,6),
                 linestyle=':',color='black',
                 legend='Low')

new_data['close'].plot(figsize=(12,6),
                  linestyle='-',color='black',
                  legend='Close')


In [None]:
no_outlier_prices = B[(np.abs(stats.zscore(B)) <
6).all(axis=1)]

In [None]:
no_outlier_prices['100'].plot(figsize=(12,6), linestyle='--',
color='black', legend='Close')

In [None]:
pd.set_option('display.max_rows', None)

no_outlier_prices[['533.95']].describe()

# **MARKETSTACK & STOCKDIO**

In [2]:
import pandas as pd
import json
import requests
params = {'access_key': 'e52cf3b93696352e880916f8c8adbf0c',
          'limit': 300}
api_result = requests.get('http://api.marketstack.com/v1/exchanges/XJSE/tickers', params)
api_response = api_result.json()
print(f"Exchange Name = {api_response['data']['name']}")
for ticker in api_response['data']['tickers']:
  print(f"{ticker['name']}: {ticker['symbol']}")

Exchange Name = Johannesburg Stock Exchange
PROSUS N.V.: PRX.XJSE
ANHEUSER-BUSCH INBEV SA NV: ANH.XJSE
BHP GROUP PLC: BHP.XJSE
BRITISH AMERICAN TOB PLC: BTI.XJSE
NASPERS LTD -N-: NPN.XJSE
COMPAGNIE FIN RICHEMONT: CFR.XJSE
GLENCORE PLC: GLN.XJSE
ANGLO AMERICAN PLC: AGL.XJSE
FIRSTRAND LTD: FSR.XJSE
ANGLO AMERICAN PLAT LTD: AMS.XJSE
STANDARD BANK GROUP LTD: SBK.XJSE
VODACOM GROUP LTD: VOD.XJSE
MONDI PLC: MNP.XJSE
CAPITEC BANK HLDGS LTD: CPI.XJSE
BEE - SASOL LIMITED: SOLBE1.XJSE
SANLAM LIMITED: SLM.XJSE
MTN GROUP LTD: MTN.XJSE
ANGLOGOLD ASHANTI LTD: ANG.XJSE
SASOL LIMITED: SOL.XJSE
ABSA GROUP LIMITED: ABG.XJSE
SOUTH32 LIMITED: S32.XJSE
RMB HOLDINGS LTD: RMH.XJSE
IMPALA PLATINUM HLGS LTD: IMP.XJSE
BID CORPORATION LTD: BID.XJSE
REMGRO LTD: REM.XJSE
KUMBA IRON ORE LTD: KIO.XJSE
NEDBANK GROUP LTD: NED.XJSE
SIBANYE STILLWATER LTD: SSW.XJSE
GOLD FIELDS LTD: GFI.XJSE
INVESTEC LTD: INL.XJSE
INVESTEC PLC: INP.XJSE
OLD MUTUAL LIMITED: OMU.XJSE
NEPI ROCKCASTLE PLC: NRP.XJSE
DISCOVERY LTD: DSY.XJSE
SH

In [None]:
api_response

In [3]:
import json

# Serializing json
json_object = json.dumps(api_response, indent=4)

# Writing to sample.json
with open("jse.json", "w") as outfile:
    outfile.write(json_object)

In [19]:
# Opening JSON file
with open('jse.json', 'r') as openfile:
 
    # Reading from json file
    json_object = json.load(openfile)
 
print(json_object)
print(type(json_object))

{'pagination': {'limit': 300, 'offset': 0, 'count': 300, 'total': 564}, 'data': {'name': 'Johannesburg Stock Exchange', 'acronym': 'JSE', 'mic': 'XJSE', 'country': 'South Africa', 'city': 'Johannesburg', 'website': 'WWW.JSE.CO.ZA', 'tickers': [{'name': 'PROSUS N.V.', 'symbol': 'PRX.XJSE', 'has_intraday': False, 'has_eod': True}, {'name': 'ANHEUSER-BUSCH INBEV SA NV', 'symbol': 'ANH.XJSE', 'has_intraday': False, 'has_eod': True}, {'name': 'BHP GROUP PLC', 'symbol': 'BHP.XJSE', 'has_intraday': False, 'has_eod': True}, {'name': 'BRITISH AMERICAN TOB PLC', 'symbol': 'BTI.XJSE', 'has_intraday': False, 'has_eod': True}, {'name': 'NASPERS LTD -N-', 'symbol': 'NPN.XJSE', 'has_intraday': False, 'has_eod': True}, {'name': 'COMPAGNIE FIN RICHEMONT', 'symbol': 'CFR.XJSE', 'has_intraday': False, 'has_eod': True}, {'name': 'GLENCORE PLC', 'symbol': 'GLN.XJSE', 'has_intraday': False, 'has_eod': True}, {'name': 'ANGLO AMERICAN PLC', 'symbol': 'AGL.XJSE', 'has_intraday': False, 'has_eod': True}, {'name

In [None]:
print(json_object.keys())

dict_keys(['pagination', 'data'])


In [None]:
print(json_object["data"].keys())

dict_keys(['name', 'acronym', 'mic', 'country', 'city', 'website', 'tickers'])


In [20]:
data = pd.json_normalize(json_object["data"]["tickers"])

print(data)

                           name    symbol  has_intraday  has_eod
0                   PROSUS N.V.  PRX.XJSE         False     True
1    ANHEUSER-BUSCH INBEV SA NV  ANH.XJSE         False     True
2                 BHP GROUP PLC  BHP.XJSE         False     True
3      BRITISH AMERICAN TOB PLC  BTI.XJSE         False     True
4               NASPERS LTD -N-  NPN.XJSE         False     True
..                          ...       ...           ...      ...
295    NUTRITIONAL HOLDINGS LTD  NUT.XJSE         False     True
296             KIBO ENERGY PLC  KBO.XJSE         False     True
297        EFORA ENERGY LIMITED  EEL.XJSE         False     True
298            KAYDAV GROUP LTD  KDV.XJSE         False     True
299     BAUBA RESOURCES LIMITED  BAU.XJSE         False     True

[300 rows x 4 columns]


In [None]:
'''
# use the pandas module
with open('sample.json') as json_file:
    jse = pd.read_json(json_file, orient='index')

print(jse)
'''

In [21]:
# dropping null value columns to avoid errors
data.dropna(inplace = True)
 
# new data frame with split value columns
new = data["symbol"].str.split(".", n = 1, expand = True)
 
# making separate first name column from new data frame
data["ticker"]= new[0]
 
# Dropping old Name columns
data.drop(columns =["has_intraday","has_eod"], inplace = True)
 
# df display
data

Unnamed: 0,name,symbol,ticker
0,PROSUS N.V.,PRX.XJSE,PRX
1,ANHEUSER-BUSCH INBEV SA NV,ANH.XJSE,ANH
2,BHP GROUP PLC,BHP.XJSE,BHP
3,BRITISH AMERICAN TOB PLC,BTI.XJSE,BTI
4,NASPERS LTD -N-,NPN.XJSE,NPN
...,...,...,...
295,NUTRITIONAL HOLDINGS LTD,NUT.XJSE,NUT
296,KIBO ENERGY PLC,KBO.XJSE,KBO
297,EFORA ENERGY LIMITED,EEL.XJSE,EEL
298,KAYDAV GROUP LTD,KDV.XJSE,KDV


In [24]:
new.columns

Index(['0', '1'], dtype='object')

In [25]:
new.columns.values[1] = "JSE_ID"

In [23]:
new.columns = new.columns.astype(str)

In [7]:
new

Unnamed: 0,0,1
0,PRX,XJSE
1,ANH,XJSE
2,BHP,XJSE
3,BTI,XJSE
4,NPN,XJSE
...,...,...
295,NUT,XJSE
296,KBO,XJSE
297,EEL,XJSE
298,KDV,XJSE


In [26]:
new['0'] + '.' + new['JSE_ID']

0      PRX.XJSE
1      ANH.XJSE
2      BHP.XJSE
3      BTI.XJSE
4      NPN.XJSE
         ...   
295    NUT.XJSE
296    KBO.XJSE
297    EEL.XJSE
298    KDV.XJSE
299    BAU.XJSE
Length: 300, dtype: object

In [27]:
new.replace("XJSE", "JO", inplace=True)
print(new)

       0 JSE_ID
0    PRX     JO
1    ANH     JO
2    BHP     JO
3    BTI     JO
4    NPN     JO
..   ...    ...
295  NUT     JO
296  KBO     JO
297  EEL     JO
298  KDV     JO
299  BAU     JO

[300 rows x 2 columns]


In [28]:

result = pd.concat([data, new], axis=1, join='inner')

In [29]:
final=result['0'] + '.' + result['JSE_ID']
final

0      PRX.JO
1      ANH.JO
2      BHP.JO
3      BTI.JO
4      NPN.JO
        ...  
295    NUT.JO
296    KBO.JO
297    EEL.JO
298    KDV.JO
299    BAU.JO
Length: 300, dtype: object

In [30]:
DF = pd.concat([data, final], axis=1, join='inner')

In [31]:
DF

Unnamed: 0,name,symbol,ticker,0
0,PROSUS N.V.,PRX.XJSE,PRX,PRX.JO
1,ANHEUSER-BUSCH INBEV SA NV,ANH.XJSE,ANH,ANH.JO
2,BHP GROUP PLC,BHP.XJSE,BHP,BHP.JO
3,BRITISH AMERICAN TOB PLC,BTI.XJSE,BTI,BTI.JO
4,NASPERS LTD -N-,NPN.XJSE,NPN,NPN.JO
...,...,...,...,...
295,NUTRITIONAL HOLDINGS LTD,NUT.XJSE,NUT,NUT.JO
296,KIBO ENERGY PLC,KBO.XJSE,KBO,KBO.JO
297,EFORA ENERGY LIMITED,EEL.XJSE,EEL,EEL.JO
298,KAYDAV GROUP LTD,KDV.XJSE,KDV,KDV.JO


In [34]:
DF.columns

Index(['name', 'symbol', 'ticker', 0], dtype='object')

In [35]:
DF.columns.values[3] = "JSE_ID"

In [36]:
DF.columns = DF.columns.astype(str)

In [37]:
DF

Unnamed: 0,name,symbol,ticker,JSE_ID
0,PROSUS N.V.,PRX.XJSE,PRX,PRX.JO
1,ANHEUSER-BUSCH INBEV SA NV,ANH.XJSE,ANH,ANH.JO
2,BHP GROUP PLC,BHP.XJSE,BHP,BHP.JO
3,BRITISH AMERICAN TOB PLC,BTI.XJSE,BTI,BTI.JO
4,NASPERS LTD -N-,NPN.XJSE,NPN,NPN.JO
...,...,...,...,...
295,NUTRITIONAL HOLDINGS LTD,NUT.XJSE,NUT,NUT.JO
296,KIBO ENERGY PLC,KBO.XJSE,KBO,KBO.JO
297,EFORA ENERGY LIMITED,EEL.XJSE,EEL,EEL.JO
298,KAYDAV GROUP LTD,KDV.XJSE,KDV,KDV.JO


In [38]:
#Dropping old Name columns
DF.drop(columns =["symbol","ticker"], inplace = True)

In [39]:
DF

Unnamed: 0,name,JSE_ID
0,PROSUS N.V.,PRX.JO
1,ANHEUSER-BUSCH INBEV SA NV,ANH.JO
2,BHP GROUP PLC,BHP.JO
3,BRITISH AMERICAN TOB PLC,BTI.JO
4,NASPERS LTD -N-,NPN.JO
...,...,...
295,NUTRITIONAL HOLDINGS LTD,NUT.JO
296,KIBO ENERGY PLC,KBO.JO
297,EFORA ENERGY LIMITED,EEL.JO
298,KAYDAV GROUP LTD,KDV.JO


In [41]:
# Converting a specific Dataframe 
# column to list using Series.tolist()
Stocks = DF["JSE_ID"].tolist()
  
print("Converting ticker to list:")
  
# displaying list
Stocks

Converting ticker to list:


['PRX.JO',
 'ANH.JO',
 'BHP.JO',
 'BTI.JO',
 'NPN.JO',
 'CFR.JO',
 'GLN.JO',
 'AGL.JO',
 'FSR.JO',
 'AMS.JO',
 'SBK.JO',
 'VOD.JO',
 'MNP.JO',
 'CPI.JO',
 'SOLBE1.JO',
 'SLM.JO',
 'MTN.JO',
 'ANG.JO',
 'SOL.JO',
 'ABG.JO',
 'S32.JO',
 'RMH.JO',
 'IMP.JO',
 'BID.JO',
 'REM.JO',
 'KIO.JO',
 'NED.JO',
 'SSW.JO',
 'GFI.JO',
 'INL.JO',
 'INP.JO',
 'OMU.JO',
 'NRP.JO',
 'DSY.JO',
 'SHP.JO',
 'BVT.JO',
 'RNI.JO',
 'CLS.JO',
 'PPH.JO',
 'QLT.JO',
 'GRT.JO',
 'MEI.JO',
 'APN.JO',
 'PSG.JO',
 'MCG.JO',
 'MRP.JO',
 'NHM.JO',
 'RMI.JO',
 'FFA.JO',
 'WHL.JO',
 'CCO.JO',
 'LHC.JO',
 'HMN.JO',
 'SPP.JO',
 'HAR.JO',
 'RDF.JO',
 'TFG.JO',
 'EXX.JO',
 'SNT.JO',
 'ARI.JO',
 'PIK.JO',
 'MTM.JO',
 'LBH.JO',
 'VVO.JO',
 'TBS.JO',
 'NTC.JO',
 'AVI.JO',
 'DGH.JO',
 'PFG.JO',
 'RES.JO',
 'TRU.JO',
 'DCP.JO',
 'ASR.JO',
 'GTC.JO',
 'BAW.JO',
 'SRE.JO',
 'SAP.JO',
 'ITE.JO',
 'EPP.JO',
 'VKE.JO',
 'MTH.JO',
 'CML.JO',
 'TKG.JO',
 'RBP.JO',
 'MSP.JO',
 'HYP.JO',
 'TCP.JO',
 'FFB.JO',
 'AHA.JO',
 'KST.JO',
 'AFE.J

Stockio

In [None]:
import pandas as pd
from urllib.request import urlopen
import json
html = urlopen('https://api.stockdio.com/data/financial/prices/v1/GetHistoricalPrices?app-key=55E6EB9A851947438E97EB72DE3F02B3&stockExchange=JSE&symbol=TFG&from=2020-08-24&to=2022-08-25&useAdjusted=false').read()

data1 = html.decode('utf-8').replace("'", '"')

data = json.loads(data1)
s = json.dumps(data, indent=4)
print(s)

# Writing to sample.json
with open("TFG.json", "w") as outfile:
    outfile.write(s)

In [None]:
data

In [None]:
import pandas as pd
from urllib.request import urlopen
import json
html = urlopen('https://api.stockdio.com/data/financial/info/v1/GetIndexInfo?app-key=55E6EB9A851947438E97EB72DE3F02B3&index=SPX').read()

data1 = html.decode('utf-8')

data = json.loads(data1)
s = json.dumps(data, indent=4)
print(s)

In [None]:
with open("naspers.json", "r") as read_it:
     data = json.load(read_it)

print (data['data'])

In [None]:
df = pd.read_json('/content/naspers.json')
df.drop(['code', 'message','symbol', 'company','exchange'], inplace = True )

In [None]:
df

In [None]:
df.info()

In [None]:
df_nested_list = pd.json_normalize(df, record_path = ['data'])

In [None]:
# Opening JSON file
with open('naspers.json', 'r') as openfile:
 
    # Reading from json file
    naspers = json.load(openfile)
 
print(naspers)
print(type(naspers))

In [None]:
print(naspers.keys())

In [None]:
print(naspers["data"].keys())

In [None]:
NPN = pd.json_normalize(naspers["data"])
NPN.drop(["symbol", "company", "exchange", "prices.columns"], axis = 1, inplace = True)
NPN

In [None]:
out = NPN.explode('prices.values').assign(Co2 = lambda x: x['prices.values'].str.get('prices.values')).reset_index(drop=True)
out.rename(columns = {'prices.values':'prices'}, inplace = True)
out.drop(["Co2"], axis = 1, inplace = True)

In [None]:
df2 = pd.DataFrame(out)
df2[['date','open','high','low', 'close', 'volume']] = pd.DataFrame(df2.prices.tolist(), index= df2.index)
df3 = pd.DataFrame(df2['prices'].to_list(), columns=['date','open','high','low', 'close', 'volume'])

In [None]:
df3.info()
df3

In [None]:
import pandas as pd
from urllib.request import urlopen
import json
import requests

results = []
responses = list()
for stock in Stocks:
  res = requests.get('https://api.stockdio.com/data/financial/prices/v1/GetHistoricalPrices?app-key=55E6EB9A851947438E97EB72DE3F02B3&stockExchange=JSE&symbol={}&from=2022-09-06&to=2022-09-13&useAdjusted=false'.format(stock))
  
  if res.status_code == 200:
    results.append(res.json())
  else:
    print('Request to {} failed'.format(stock))

In [None]:
results

In [None]:
XJSE = pd.json_normalize(xjse_stocks)

In [None]:
XJSE.dropna()

In [None]:
XJSE.drop(["status.message", "status.code", "data.symbol", "data.company", "data.exchange","data.prices.columns" ], axis = 1, inplace = True)
XJSE.dropna()

In [None]:
out = XJSE.explode('data.prices.values').assign(Co2 = lambda x: x['data.prices.values'].str.get('data.prices.values')).reset_index(drop=True)
out.rename(columns = {'data.prices.values':'prices'}, inplace = True)
out.drop(["Co2"], axis = 1, inplace = True)

In [None]:
df2 = pd.DataFrame(out)
df2.dropna()

In [None]:
data = df2.explode('prices').assign(Co2 = lambda x: x['prices'].str.get('prices')).reset_index(drop=True)
data.rename(columns = {'prices':'prices'}, inplace = True)
data.drop(["Co2"], axis = 1, inplace = True)

In [None]:
data2 = pd.DataFrame(data)

In [None]:
data2

In [None]:
data2.dropna()

In [None]:
df3.info()
df3

# **BS, PICKLE & REQUESTS**

In [None]:
import bs4 as bs
import requests
import yfinance as yf
import datetime

resp = requests.get('https://en.wikipedia.org/wiki/List_of_companies_traded_on_the_JSE')
soup = bs.BeautifulSoup(resp.text, 'lxml')
table = soup.find('table', {'class': 'wikitable sortable'})
tickers = []
for row in table.findAll('tr')[1:]:
    ticker = row.findAll('td')[0].text
    tickers.append(ticker)

tickers = [s.replace('\n', '') for s in tickers]
start = datetime.datetime(2021,7,25)
end = datetime.datetime(2022,8,25)
data = yf.download(tickers, start=start, end=end)
print(data)

In [None]:
jse_stocks = data.dropna()

In [None]:
jse_stocks

#**TEST CODE HERE**

In [None]:
!pip install yfinance

In [43]:
import yfinance as yf
stock_list = Stocks
print('stock_list:', stock_list)
data = yf.download(stock_list, start="2022-01-01", end="2023-04-19")
print('data fields downloaded:', set(data.columns.get_level_values(0)))
data.head()

stock_list: ['PRX.JO', 'ANH.JO', 'BHP.JO', 'BTI.JO', 'NPN.JO', 'CFR.JO', 'GLN.JO', 'AGL.JO', 'FSR.JO', 'AMS.JO', 'SBK.JO', 'VOD.JO', 'MNP.JO', 'CPI.JO', 'SOLBE1.JO', 'SLM.JO', 'MTN.JO', 'ANG.JO', 'SOL.JO', 'ABG.JO', 'S32.JO', 'RMH.JO', 'IMP.JO', 'BID.JO', 'REM.JO', 'KIO.JO', 'NED.JO', 'SSW.JO', 'GFI.JO', 'INL.JO', 'INP.JO', 'OMU.JO', 'NRP.JO', 'DSY.JO', 'SHP.JO', 'BVT.JO', 'RNI.JO', 'CLS.JO', 'PPH.JO', 'QLT.JO', 'GRT.JO', 'MEI.JO', 'APN.JO', 'PSG.JO', 'MCG.JO', 'MRP.JO', 'NHM.JO', 'RMI.JO', 'FFA.JO', 'WHL.JO', 'CCO.JO', 'LHC.JO', 'HMN.JO', 'SPP.JO', 'HAR.JO', 'RDF.JO', 'TFG.JO', 'EXX.JO', 'SNT.JO', 'ARI.JO', 'PIK.JO', 'MTM.JO', 'LBH.JO', 'VVO.JO', 'TBS.JO', 'NTC.JO', 'AVI.JO', 'DGH.JO', 'PFG.JO', 'RES.JO', 'TRU.JO', 'DCP.JO', 'ASR.JO', 'GTC.JO', 'BAW.JO', 'SRE.JO', 'SAP.JO', 'ITE.JO', 'EPP.JO', 'VKE.JO', 'MTH.JO', 'CML.JO', 'TKG.JO', 'RBP.JO', 'MSP.JO', 'HYP.JO', 'TCP.JO', 'FFB.JO', 'AHA.JO', 'KST.JO', 'AFE.JO', 'RCL.JO', 'MSM.JO', 'EQU.JO', 'IPF.JO', 'RLO.JO', 'TSG.JO', 'IPL.JO', 'JSE

Unnamed: 0_level_0,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,...,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Unnamed: 0_level_1,ABG.JO,ACG.JO,ACL.JO,ACS.JO,ACT.JO,ADH.JO,ADI.JO,ADR.JO,AEE.JO,AEG.JO,...,VVO.JO,WBO.JO,WEZ.JO,WHL.JO,WKF.JO,WSL.JO,YRK.JO,YYLBEE.JO,ZCL.JO,ZED.JO
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2022-01-03 00:00:00,14203.365234,,882.0,575.88855,490.990295,1689.360962,,497.372925,100.0,2685.0,...,,44673,1221507,1875202,0,,64605,611,,312580
2022-01-04 00:00:00,14462.581055,,918.0,566.447754,490.990295,1646.652588,,492.005615,110.0,2750.0,...,,19032,1516864,4930251,0,,30267,3365,,561720
2022-01-05 00:00:00,14588.5625,,971.0,566.447754,490.990295,1641.907104,,482.165497,110.0,2773.0,...,,13124,2161168,3330657,0,,46906,125,,435662
2022-01-06 00:00:00,14867.71875,,1030.0,566.447754,476.055237,1727.324219,,517.947693,95.0,2745.0,...,,25451,1628639,3181628,0,,161857,244,,147727
2022-01-07 00:00:00,14954.727539,,1040.0,528.68457,480.722412,1713.088013,,517.947693,95.0,2745.0,...,,34680,209174,1869559,0,,3038,197,,137827


In [44]:
data.tail()

Unnamed: 0_level_0,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,...,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Unnamed: 0_level_1,ABG.JO,ACG.JO,ACL.JO,ACS.JO,ACT.JO,ADH.JO,ADI.JO,ADR.JO,AEE.JO,AEG.JO,...,VVO.JO,WBO.JO,WEZ.JO,WHL.JO,WKF.JO,WSL.JO,YRK.JO,YYLBEE.JO,ZCL.JO,ZED.JO
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2023-04-12 00:00:00,17993.65625,,368.0,659.0,550.0,1761.499756,,460.0,100.0,1190.0,...,,19833,86862,3340068,338,,0,4255,,442165
2023-04-13 00:00:00,17841.306641,,377.0,685.0,550.0,1734.083374,,449.0,100.0,954.0,...,,15554,191600,3123080,0,,30018,200,,995557
2023-04-14 00:00:00,17986.90625,,378.0,700.0,550.0,1759.541382,,450.0,100.0,975.0,...,,8705,208807,3825351,0,,20512,400,,117105
2023-04-17 00:00:00,17687.029297,,366.0,700.0,544.0,1732.125122,,441.0,100.0,950.0,...,,23359,546044,3929876,0,,84126,465,,514054
2023-04-18 00:00:00,17526.0,,390.0,700.0,545.0,1738.0,,442.0,100.0,940.0,...,,22418,127138,3415036,0,,16475,14150,,502366


In [46]:

# Dropping the columns having NaN/NaT values
data = data.dropna(axis=1)
  
data

Unnamed: 0_level_0,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,...,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Unnamed: 0_level_1,ABG.JO,ACL.JO,ACS.JO,ACT.JO,ADH.JO,ADR.JO,AEE.JO,AEG.JO,AEL.JO,AFE.JO,...,VKE.JO,VOD.JO,VUN.JO,WBO.JO,WEZ.JO,WHL.JO,WKF.JO,YRK.JO,YYLBEE.JO,ZED.JO
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2022-01-03,14203.365234,882.0,575.888550,490.990295,1689.360962,497.372925,100.0,2685.0,885.417297,9684.815430,...,455237,550220,0,44673,1221507,1875202,0,64605,611,312580
2022-01-04,14462.581055,918.0,566.447754,490.990295,1646.652588,492.005615,110.0,2750.0,900.749207,9587.405273,...,472898,737634,1327,19032,1516864,4930251,0,30267,3365,561720
2022-01-05,14588.562500,971.0,566.447754,490.990295,1641.907104,482.165497,110.0,2773.0,929.496521,9711.142578,...,1095700,694323,4,13124,2161168,3330657,0,46906,125,435662
2022-01-06,14867.718750,1030.0,566.447754,476.055237,1727.324219,517.947693,95.0,2745.0,900.749207,9659.366211,...,1124327,672516,0,25451,1628639,3181628,0,161857,244,147727
2022-01-07,14954.727539,1040.0,528.684570,480.722412,1713.088013,517.947693,95.0,2745.0,913.206360,9621.629883,...,734365,866224,331,34680,209174,1869559,0,3038,197,137827
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-04-12,17993.656250,368.0,659.000000,550.000000,1761.499756,460.000000,100.0,1190.0,904.000000,9210.000000,...,2277728,1301880,0,19833,86862,3340068,338,0,4255,442165
2023-04-13,17841.306641,377.0,685.000000,550.000000,1734.083374,449.000000,100.0,954.0,900.000000,9221.000000,...,2536472,2151017,457,15554,191600,3123080,0,30018,200,995557
2023-04-14,17986.906250,378.0,700.000000,550.000000,1759.541382,450.000000,100.0,975.0,920.000000,9264.000000,...,576709,1146857,0,8705,208807,3825351,0,20512,400,117105
2023-04-17,17687.029297,366.0,700.000000,544.000000,1732.125122,441.000000,100.0,950.0,875.000000,9128.000000,...,4231398,1288716,0,23359,546044,3929876,0,84126,465,514054


In [48]:
data.columns

MultiIndex([('Adj Close',    'ABG.JO'),
            ('Adj Close',    'ACL.JO'),
            ('Adj Close',    'ACS.JO'),
            ('Adj Close',    'ACT.JO'),
            ('Adj Close',    'ADH.JO'),
            ('Adj Close',    'ADR.JO'),
            ('Adj Close',    'AEE.JO'),
            ('Adj Close',    'AEG.JO'),
            ('Adj Close',    'AEL.JO'),
            ('Adj Close',    'AFE.JO'),
            ...
            (   'Volume',    'VKE.JO'),
            (   'Volume',    'VOD.JO'),
            (   'Volume',    'VUN.JO'),
            (   'Volume',    'WBO.JO'),
            (   'Volume',    'WEZ.JO'),
            (   'Volume',    'WHL.JO'),
            (   'Volume',    'WKF.JO'),
            (   'Volume',    'YRK.JO'),
            (   'Volume', 'YYLBEE.JO'),
            (   'Volume',    'ZED.JO')],
           length=1440)

In [49]:
data.info

<bound method DataFrame.info of                Adj Close                                               \
                  ABG.JO  ACL.JO      ACS.JO      ACT.JO       ADH.JO   
Date                                                                    
2022-01-03  14203.365234   882.0  575.888550  490.990295  1689.360962   
2022-01-04  14462.581055   918.0  566.447754  490.990295  1646.652588   
2022-01-05  14588.562500   971.0  566.447754  490.990295  1641.907104   
2022-01-06  14867.718750  1030.0  566.447754  476.055237  1727.324219   
2022-01-07  14954.727539  1040.0  528.684570  480.722412  1713.088013   
...                  ...     ...         ...         ...          ...   
2023-04-12  17993.656250   368.0  659.000000  550.000000  1761.499756   
2023-04-13  17841.306641   377.0  685.000000  550.000000  1734.083374   
2023-04-14  17986.906250   378.0  700.000000  550.000000  1759.541382   
2023-04-17  17687.029297   366.0  700.000000  544.000000  1732.125122   
2023-04-18  17526.0

In [51]:
data.to_csv('jse_stocks.csv')