In [1]:
import findspark
findspark.init()

import time as t
from pyspark.sql import SparkSession
from pyspark.sql import SQLContext
from pyspark.streaming import StreamingContext
from pyspark.sql import functions as F
from pyspark.sql.types import *
import datetime
import sys
from lib import sparkStructuredStreaming
import os
%matplotlib inline
from matplotlib import pyplot as plt
from pyspark.sql.window import Window
import math as m

### Set-up to stream from Kafka topic + read and write from/to Elasticsearch

In [2]:
os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages org.apache.spark:spark-sql-kafka-0-10_2.11:2.4.5,org.elasticsearch:elasticsearch-spark-20_2.11:7.6.2 pyspark-shell'

In [3]:
#"127.0.0.1:9092" (local) //"10.0.0.8:9092" (BACC)
bootstrap = "127.0.0.1:9092"

In [5]:
spark = SparkSession \
            .builder \
            .appName("KafkaIEXStructuredStreaming") \
            .master("local[*]") \
            .getOrCreate()

sqlContext = SQLContext(spark)

## 1. Backtesting

### 1.1 Try different strategies

In [None]:
b = sparkStructuredStreaming.backtest()

#strategy looks at last 10,120,500... minutes to calculate average
strategy_momentum = ["1d","momentum",10,30,60,120,180]
# mean reverse strategy
rsi_buy = 15
rsi_sell = 60
ibr_buy = 0.2
ibr_sell = 0.7 
strategy_mean_reverse = ["mean_reverse",rsi_buy,rsi_sell,ibr_buy,ibr_sell,3,5,8,10,14,20]
# granularity of historical data
interval_momentum="1m"
interval_mean_reverse = "1d"
hdfs_path = "hdfs://0.0.0.0:19000"
#start capital
startCap = 10000.0
# regulatory trading fee
commission = 0.002
# risk free market return, assumed here 0.1% but is not really clear
risk_free = 0.001
# parameters to find stock symbols
symbol = None
n_stock = [1,2,3,4,5]

performance = b.performance(startCap, commission, risk_free, \
                            strategy_momentum[1:], interval_momentum, strategy_momentum[0], \
                            symbol, n_stock, \
                            hdfs_path, sqlContext)

[*********************100%***********************]  456 of 456 completed


In [6]:
hdfs_path = "hdfs://0.0.0.0:19000"
df_performance = sqlContext.read.format('parquet').load(hdfs_path+"/performance").orderBy("DepotId")
df_performance.show(200)

+-------+--------+-----+----+-------------+------+----------+----------+------+--------------------+------------------+
|DepotId|   Value|Alpha|Beta|Start-Capital|Profit|Start-Date|  End-Date|Trades|Performance_Strategy|Performance_S&P500|
+-------+--------+-----+----+-------------+------+----------+----------+------+--------------------+------------------+
|      1| 10503.4|  4.4|1.01|      10000.0| 503.4|2020-05-18|2020-05-18|    27|                5.03|              0.63|
|      2| 10778.2| 7.05|1.17|      10000.0| 778.2|2020-05-18|2020-05-18|     9|                7.78|              0.63|
|      3|10817.27| 7.27|1.43|      10000.0|817.27|2020-05-18|2020-05-18|     5|                8.17|              0.63|
|      4|10990.08| 8.96|1.49|      10000.0|990.08|2020-05-18|2020-05-18|     1|                 9.9|              0.63|
|      5| 10503.4|  4.4|1.01|      10000.0| 503.4|2020-05-18|2020-05-18|    27|                5.03|              0.63|
|      6| 10778.2| 7.05|1.17|      10000

In [7]:
df_depot = sqlContext.read.format('parquet').load(hdfs_path+"/depot")
df_depot.orderBy("DepotId").show(200, truncate=False)

+-------+-------------+------------+-------------------------+-------------------------------------------------------------+
|DepotId|Start-Caputal|Strategy    |ISIN                     |Share                                                        |
+-------+-------------+------------+-------------------------+-------------------------------------------------------------+
|1      |10000.0      |momentum10  |[APA, HES]               |[0.5, 0.5]                                                   |
|2      |10000.0      |momentum30  |[APA, HES]               |[0.5, 0.5]                                                   |
|3      |10000.0      |momentum60  |[APA, HES]               |[0.5, 0.5]                                                   |
|4      |10000.0      |momentum120 |[APA, HES]               |[0.5, 0.5]                                                   |
|5      |10000.0      |momentum180 |[APA, HES]               |[0.5, 0.5]                                                   |


## 2. Trading Simulation / Performance Evaluation with realtime Streams (Spark Streaming)

### Stream real time quotes from Kafka topic

In [5]:
r = sparkStructuredStreaming.realtime()

# same set up as in backtesting
symbol, share = # todo: use the ibes which performed best in backtesting

startCap = 10000.0
commission = 0.000119
index = "trading"
# choose momentum which worked best in backtesting
momentum_str = [r.momentum,"momentum",10]


r.realtime(symbol, share, startCap, commission, momentum_str, index, sqlContext)

2020-05-14 15:32:36 9999.999524 10013.43286
2020-05-14 15:33:59 10001.299524000002 10016.42286
2020-05-14 15:35:09 10008.359524 10027.41286
2020-05-14 15:35:24 10008.359524 10038.66286
2020-05-14 15:37:45 10008.359524 10047.70286
2020-05-14 15:38:10 10008.359524 10042.442860000001


KeyboardInterrupt: 