In [13]:
import numpy as np
import pickle
from sklearn.linear_model import LinearRegression
from sklearn import metrics
import statsmodels.api as sm
from sklearn import decomposition
from sklearn import preprocessing
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from numpy import linalg
import statsmodels

In [2]:
def load_object(file_name):
    "load the pickled object"
    with open(file_name, 'rb') as f:
        return pickle.load(f)


def view_data(data_path):
    data = load_object(data_path)
    prices = data['prices']
    names = data['features']['names']
    features = data['features']['values']
    print(prices.shape)
    print(names)
    print(features.shape)
    return prices, features, names

In [3]:
prices, features, names = view_data('C3_train.pkl')

(757, 680)
['labour cost', 'analyst projected total earnings', 'weighted average outstanding shares', 'R&D intensity index', 'relative strength index', 'total assets', 'net book value', 'analyst sentiment', 'market share', 'Aggregate Capital Cost']
(756, 680, 10)


In [4]:
prices1 = preprocessing.normalize(prices, norm = 'l2', axis = 1)

# Forcast of S1 using only F0. (Noted as Eq. 2 in Latex Doc)

In [11]:
#This is to test if massive inversions will cause machine error for inversions.
'''inv = np.cov(prices1.T)
inv2 = linalg.inv(np.cov(prices1.T))

for x in range(0, 700):
    inv2 = linalg.inv(inv2)
    inv2 = linalg.inv(inv2)
    
x = np.matmul(inv, inv2)
x
'''

In [23]:
s1 = prices[1:,1] 
f1 = features[:,1]
f1squared = np.square(f1)
f1 = np.concatenate([f1,f1squared], axis = 1)


#Pull data
X_train, X_test, y_train, y_test = train_test_split(f1, s1, test_size=0.2, random_state=0) 

#Standardize
sc = StandardScaler()  
X_train = sc.fit_transform(X_train)  
X_test = sc.transform(X_test)  

#PCA on 6 components
pca = decomposition.PCA(n_components = 8)  
X_train = pca.fit_transform(X_train)
X_trainsquare = np.square(X_train)
X_train = np.concatenate([X_train,X_trainsquare], axis = 1)
X_test = pca.transform(X_test)
X_testsquare = np.square(X_test)
X_test = np.concatenate([X_test,X_testsquare], axis = 1)

model = LinearRegression()

explained_variance = pca.explained_variance_ratio_  
print(explained_variance)

[0.45377468 0.23047017 0.1186297  0.0925202  0.05994477 0.03027678
 0.00989239 0.00426836]


In [58]:
reg1 = model.fit(X_train, y_train)
reg1.coef_

array([-1.13246328, -1.71406779,  0.70154737,  0.61320896, -2.18444642,
       -0.5525321 , -1.20823111, -0.90292849,  0.06999308, -0.1729876 ,
       -0.19142105, -0.08111851, -0.52223091, -0.35715616,  0.29594176,
       -0.8906377 ])

# Time to create our L(ij) to use our prices at S1 to features at 1 (Equation 1 in Latex Doc)


In [61]:
s1 = prices[:756,1] 
f1 = features[:,1]

#Pull data
X_train, X_test, y_train, y_test = train_test_split(f1, s1, test_size=0.2, random_state=0) 

#Standardize
sc = StandardScaler()  
X_train = sc.fit_transform(X_train)  
X_test = sc.transform(X_test)

#PCA on 6 components
pca = decomposition.PCA(n_components = 8)  
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

model = LinearRegression()


explained_variance = pca.explained_variance_ratio_  
print(explained_variance)

[0.45377468 0.23047017 0.1186297  0.0925202  0.05994477 0.03027678
 0.00989239 0.00426836]


In [62]:
reg = model.fit(X_train, y_train)


array([-1.19921414, -2.66588303,  0.65011621, -0.91317583,  1.75203833,
       -0.96224694, -1.48356678, -0.75577469])

# Time to Update L(ij) based on real values of F2 (Equation 2)

In [63]:
#Using some 'Real Value' that we will get, but we must prepare for it now. Take prices + WN(0,1) process value
#the given F value
realval = prices[-1] + np.random.normal()
lij= -(reg.intercept_ - realval) / x 

# Time to Update the L(ij) based on the real values of S1 (Equation 1)

In [64]:
'''
We will also use a AR(1) model based on the previous value
'''

# Price Forcasting

In [None]:
#We will use equation 1 to estimate the features from the price at 1. 
#We will use equation 2 to estimate the prices at 2 from the featuress at 1
#This will give us a price vector that we can estimate on