In [None]:
SPARK_SENTIMENTS_ARRAY = []

Import Dependencies:

In [None]:
import os
import json
import pandas as pd
from sklearn import svm
import matplotlib.pyplot as plt
import numpy as np
from numpy.fft import fft, ifft, fftfreq
from scipy import signal
from scipy.signal import lfilter
import pylab as pl
import matplotlib.pyplot as plt
from sklearn import metrics

Analyze Prices and Tweets and predict if price will go up or down:

In [None]:
def get_fft_factor(pred, window):
    """Returns the expected change in stock value in precent/100

    Parameters
    ----------
    pred : ndarray
        A list of previuos and predicted stock values
    window : integer
        A number representing the time-frame of stock analysis in days before the curret day

    Returns
    -------
    float
        Expected change stock value in precentace/100
    """
    return (pred[window]-pred[window-1])/pred[window-1]

def analyze(prices_n, sent_n, sent_fac, fft_fac, results, window):
    """Decides if to buy or sell the stock

    Parameters
    ----------
    prices_n : ndarray
        A list of n previuos stock values
    sent_n : float
        A number representing the combined sentiment value of the last n tweets
    sent_fac : list
        A list of all previuos combined sentiment values
    fft_fac : list
        A list of all previuos fft factores
    results : list
        A list of all decision results (success,failure)
    window : integer
        A number representing the time-frame of stock analysis in days before the curret day
        
    Returns
    -------
    decision : int
        A list of n previuos stock values
    a : float
        A number representing the weight of fft analysis results in desicion making, in the first days of runtime
    b : float
        A number representing the weight of sentiment analysis results in desicion making, in the first days of runtime
    fft_factor : float
        The expected change in stock value in precent/100
    sent_factor : float
        A number representing the combined sentiment value of the last n tweets
    fft_pred : ndarray
        A list of previuos and predicted stock values
    """
    a = 0.8
    b = 0.2
    fft_pred = fft_model.predict(prices_n)
    sent_factor = sent_n
    fft_factor = get_fft_factor(fft_pred, window)
    if len(results) > 100:
        clf = svm.SVC(kernel='linear')  # Linear Kernel
        X_train = pd.DataFrame({'fft': fft_fac, 'sent': sent_fac})
        clf.fit(X_train.tail(window), pd.Series(results).tail(window))
        decision = clf.predict(pd.DataFrame({'fft': fft_factor, 'sent': sent_factor}, index=[0]))[0]
        return decision, a, b, fft_factor, sent_factor, fft_pred
    else:
        decision = 1 if a*fft_factor+b*sent_factor > 1 else 0
        return decision, a, b, fft_factor, sent_factor, fft_pred

Check if predictions where true and what actually happand to the stock price:

In [None]:
def get_results(dec,actual_price, window):
            """Perform Fouirer analisys and extrapulation of future stock values

    Parameters
    ----------
    dec : tuple
        A tuple containing the results of previous 'analyze' function results
    actual_price : float
        The actual stock price of the next day
    window : integer
        A number representing the time-frame of stock analysis in days before the curret day
    Returns
    -------
    float
        The expected change in stock value in precent/100
    """
    pred = dec[5]
    diff = (actual_price - pred[window])/actual_price
    return diff

def get_outcome(last,next_day):
                """Check if the price whent up or down the next day

    Parameters
    ----------
    last : float
        The stock price (close price) on the day of analysis
    next_day : float
        The stock price of the next day
    Returns
    -------
    int
        Returns 1 or 0 depending if the stock whent up or down the next day
    """
    diff = 1 if (next_day - last) > 0 else 0
    return diff

Function for Performing Fourier analysis and extrapulation for the next day:

In [None]:
def fourierExtrapolation(x, n_predict):
        """Perform Fouirer analisys and extrapulation of future stock values

    Parameters
    ----------
    x : ndarray
        An array that holds stock values
    n_predict : integer
        The number of days required for extrapulation (prediction)

    Returns
    -------
    ndarray
        An array that holds astimated and predicted stock values
    """
    n = x.size
    n_harm = x.size  # number of harmonics in model
    t = np.arange(0, n)
    p = np.polyfit(t, x, 1)  # find linear trend in x
    x_freqdom = fft(x)  # detrended x in frequency domain
    f = fftfreq(n)  # frequencies
    indexes = list(range(n))
    # sort indexes by frequency, lower -> higher
    indexes.sort(key=lambda i: np.absolute(f[i]))

    t = np.arange(0, n + n_predict)
    restored_sig = np.zeros(t.size)
    for i in indexes[:1 + n_harm * 2]:
        ampli = np.absolute(x_freqdom[i]) / n  # amplitude
        phase = np.angle(x_freqdom[i])  # phase
        restored_sig += ampli * np.cos(2 * np.pi * f[i] * t + phase)
    return restored_sig + p[0] * t

Perform Fourier analysis and extrapulation for the next day:

In [None]:
def predict(data):
            """Predict future stock values

    Parameters
    ----------
    data : ndarray
        An array that holds stock values

    Returns
    -------
    ndarray
        An array that holds astimated and predicted stock values
    """
    detrended = signal.detrend(data)
    n = 15
    b = [1.0 / n] * n
    a = 1
    filtered_signal = lfilter(b, a, detrended)
    extrapolation = fourierExtrapolation(filtered_signal, 3)
    return extrapolation

Main loop - run analysis and evaluations, store data for further use:

In [1]:
'''
This is the main loop of the project, it goas tourgh the data that comes from SPARK, analyzes the price,
makes predictions and decides if to by or sell the stock
'''
# stock and sentiment data
spy_df = pd.read_csv(SPY_STREAM)
close_prices = spy_df['Close'].to_numpy()
sentiment_df = SPARK_SENTIMENTS_ARRAY
sentences = sentiment_df['Sentence']
# parameters:
start = 0
window = 50 #time frame
# stored data:
results = []
fft_factors = []
sent_factors = []
decisions = []
outcomes = []
# loop
for _ in range(0, 285):
    decision = ensemble_model.analyze(close_prices[start:start+window], sentiment_df[start:start+window], fft_factors, sent_factors, outcomes, window)
    decisions.append(decision[0])
    fft_factors.append(decision[3])
    sent_factors.append(decision[4])
    result = portfolio_management_model.get_results(decision, close_prices[start+window+1], window)
    results.append(result)
    outcome = portfolio_management_model.get_outcome(close_prices[start + window], close_prices[start + window + 1])
    outcomes.append(outcome)
    print(f'{start} out of {len(close_prices-1)}')
    start += 1

print("Accuracy:", metrics.accuracy_score(outcomes[100:], decisions[100:]))
# analysis of accuracy score of the last 100 days, from day 1 to day 185 of analysis
acc = []
for i in range(0, 185):
    acc.append(metrics.accuracy_score(outcomes[i:i+100], decisions[i:i+100]))
plt.plot(acc)
plt.show()

NameError: name 'pd' is not defined