#**Project Title:** Option Pricing base on Tesla stock price.

####**Class:** Option pricing in Hull-White model WMS 2021. 

**Authors:** *Marcin Baranek, Kamil Bartocha* 

**Data description:** 
Within the dataset  we can find Tesla Stock Price collected from the last couple years.
Columns contains below values: 


1.   The date - "Date"
2.   The opening price of the stock - "Open"
3.   The high price of the day - "High"
4.   The low price of the day - "Low"
5.   The closed price of that day - "Close"
6.   The amount of stocks treded during that day - "Volume"
7.   The stock's closing price that has been amended to include any distributions/corporate actions that occurs before next days open - "Adj[usted] Close"

####**Goal** Find a model that best describes the data. For each model, we will estimate the parameters and We will conduct a statistical test for choosing the best one.

In [None]:
import numpy as np
import math
from scipy.stats import chisquare, norm

# Load data, select only close, min and high

In [None]:
def describe_data(train_data, labels_data):
    print(train_data.describe())
    print(labels_data.describe())

def describe_shape(CA_number, train_data, labels_data):
    print(f"shape for train data in set number {CA_number}: ", train[CA_number].shape)
    print(f"shape for labels in set number  {CA_number}: ",   labels[CA_number].shape)
    print(80*"-")

train = []
labels = []

for CA_number in range(1,18):
    train_path = os.getcwd() + f'\\dataset\\x_train_CA{CA_number}.csv'
    labels_path = os.getcwd() + f'\\dataset\\y_train_CA{CA_number}.csv'
    
    train_data = pd.read_csv(train_path, header=None)
    labels_data = pd.read_csv(labels_path, header=None)
    
    describe_data(train_data, labels_data)
    train.append(np.array(train_data))
    labels.append(np.array(labels_data))
    describe_shape(CA_number-1, train_data, labels_data)

# Statistical Test


In [None]:
class EulerScheme:
    """Euler scheme class for solving stochastic differential equations
    # ToDO write the documentation
    """
    def __init__(self, a_func=None, b_func=None, init_val=None):
        """Constructor for stochastic EulerScheme.
         It is used to solve character equations:
         dX_t = a(t, X_t)dt + b(t, X_t)dW_t
        =========================================================
        :param a_func: a function of two arguments, time and a spatial variable
        :param b_func: a function of two arguments, time and a spatial variable
        :param init_val: initial condition of the equation
        """
        self.a_func = a_func if a_func is not None else lambda t, x: 0
        self.b_func = b_func if b_func is not None else lambda t, x: 1
        self.init_val = 0.0 if init_val is None else init_val

    def step(self, point=0.0, time=1.0, dt=1.e-4):
        """Internal function that computes the step of Euler's schema.
        :param point: numpy.array or float, initial condition
        :param time: float, beginning of time segment
        :param dt: float, time "gain"
        :return: numpy.array or float
        """
        # ToDo fix this below
        try:
            size = point.shape[0]
        except Exception:
            size = 1
        return point + self.a_func(time, point) * dt \
            + self.b_func(time, point) * np.sqrt(dt) \
            * np.random.normal(size=size)

    def generate(self, point=None, t_0=0.0, dt=1.e-4, end=None, grid=False):
        """Method returning the generator of successive points from the
         trajectories of the X_t process.
        :param point: float, beginning of time
        :param t_0: float, beginning of time
        :param dt: time "gain"
        :param end: ToDo write
        :param grid: ToDo write
        :return: generator generating successive elements from
            the trajectories of the process
        """
        point, time = self.init_val if point is None else point, t_0
        if end is not None:
            while time <= end:
                yield (point, time) if grid else point
                point = self.step(point, time, dt)
                time += dt
        else:
            while True:
                yield (point, time) if grid else point
                point = self.step(point, time, dt)
                time += dt

In [None]:
class ItoProcess(EulerScheme):
    """
    # ToDO write the documentation
    """
    def __init__(self, a_func=None, b_func=None, init_val=None):
        """
        # ToDO write the documentation
        :param a_func:
        :param b_func:
        :param init_val:
        """
        super().__init__(a_func, b_func, init_val)

    def __eq__(self, other):
        """
        # ToDO write the documentation
        :param other:
        :return:
        """
        a_check = self.a_func.__code__.co_code == other.a_func.__code__.co_code
        b_check = self.b_func.__code__.co_code == other.b_func.__code__.co_code
        init_check = self.init_val == other.init_val
        return a_check and b_check and init_check

    def __str__(self):
        return f"Base Ito Process with initial value {self.init_val}\n" \
               f"{self.__repr__()}"

    def __add__(self, other):
        """
        # ToDO write the documentation
        :param other:
        :return:
        """
        return ItoProcess(lambda t, x: self.a_func(t, x) + other.a_func(t, x),
                          lambda t, x: self.b_func(t, x) + other.b_func(t, x),
                          self.init_val + other.init_val)

    def fit_test(self, data, t_arr=None, df=None):
        """The method checks how well the process describes the data.
        =========================================================
        Statistical test:
        H_0: the data comes from the distribution described by the process
        H_1: The data is not from the distribution described by the process

        if p value < significance level then we reject the null hypothesis

        References:
        [1] Bak, J. (1998), Nonparametric methods in finance, Master’s thesis,
         Department of Mathematical Modelling, Technical University of Denmark,
         Lyngby. IMM-EKS-1998-34.
        =========================================================
        :param data: data array
        :param t_arr: time array, default is (1.e-4) * [0, 1, 2, ..., len_data]
        :param df: int, degrees of freedom, otherwise the number of simulations
            default is equal to int((len_data - 6) / 5)
        :return: float, p value
        """
        # data preparation
        len_data = len(data)
        df = int((len_data - 6) / 5) if df is None else df
        expected = (len_data - 1) / (df + 1)
        t_arr = np.arange(len_data) * 1.e-4 if t_arr is None else t_arr
        r_arr = np.ones(shape=(len_data - 1))

        # computing simulations
        for i in range(len_data - 1):
            for _ in range(df):
                # ToDO implement schema dependent of K
                r_arr[i] += int(
                    self.step(data[i], t_arr[i], t_arr[i + 1] - t_arr[i])
                    <= data[i + 1])

        # preparation for the test
        omega_arr = [sum(map(lambda x: 1 if x == i else 0, r_arr))
                     for i in range(1, df + 1)]
        return chisquare(omega_arr, expected)[1]

# Estimation of $\sigma$ and $\mu$.

In [None]:
sigma, mu = 0.5, 1
pass

#Predict

In [None]:
parameters = {
    "T": 1.0,
    "dt": 1.e-1,
    "S_0": 1.0,
    "drift": lambda t, s: mu*s,
    "vol": lambda t, s: sigma*s,
    "t_0": 0.0,
    "n_traj": 100,
    "alpha": 0.05
}

In [None]:
def predict(T, dt, n_traj, drift, vol, t_0, S_0, alpha):
    predictor = ItoProcess(drift, vol)
    result = [np.array([element for element in\
                        predictor.generate(S_0, t_0, dt, T)])\
               for _ in range(n_traj)]
    mean = sum(result) / n_traj
    # compute variance
    result = [(arr - mean * np.ones(arr.shape))**2 for arr in result]
    var = [sum([result[j][i] for j in range(n_traj)]) / (n_traj - 1)\
           for i in range(result[0].shape[0])]
    margin = [norm.interval(1-alpha)[1] * v / math.sqrt(n_traj) for v in var]
    return mean, margin

In [None]:
predict(**parameters)

  This is separate from the ipykernel package so we can avoid doing imports until


(array([1.0, array([1.11093764]), array([1.22299598]), array([1.3300864]),
        array([1.42122095]), array([1.5636095]), array([1.7154592]),
        array([1.85902572]), array([1.98897192]), array([2.20602753]),
        array([2.40071921])], dtype=object),
 [0.0,
  array([0.00590651]),
  array([0.01380959]),
  array([0.02446509]),
  array([0.03196496]),
  array([0.05065659]),
  array([0.06359268]),
  array([0.07696289]),
  array([0.08424821]),
  array([0.12149776]),
  array([0.14411794])])