In [138]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [139]:
data1 = pd.read_csv('../prices/prices_round_2_day_-1.csv', header = 0, sep=";")
data2 = pd.read_csv('../prices/prices_round_2_day_0.csv', header = 0, sep=";")
data3 = pd.read_csv('../prices/prices_round_2_day_1.csv', header = 0, sep=";")

#Join rows of data1, data2 and data3
#data = pd.concat([data1, data2, data3], ignore_index=True)

#data = pd.concat([data1,data2], ignore_index=True)
data = data1
data.head()

Unnamed: 0,timestamp,ORCHIDS,TRANSPORT_FEES,EXPORT_TARIFF,IMPORT_TARIFF,SUNLIGHT,HUMIDITY,DAY
0,0,1200.0,1.5,10.5,-2.0,2500.0,79.0,-1
1,100,1201.75,1.5,9.5,-2.0,2499.4197,79.0041,-1
2,200,1201.75,1.5,9.5,-2.0,2498.8457,79.00821,-1
3,300,1201.75,1.5,9.5,-2.0,2498.278,79.01234,-1
4,400,1201.75,1.5,9.5,-2.0,2497.7166,79.01649,-1


In [140]:
#calculating covariance matrix Q
price = data.loc[:,"ORCHIDS"]
sunlight = data.loc[:, "SUNLIGHT"]
humidity = data.loc[:,"HUMIDITY"]

pricediff = np.diff(price)
sunlightdiff = np.diff(sunlight)
humiditydiff = np.diff(humidity)

differences = np.vstack((pricediff,sunlightdiff,humiditydiff))
Q = np.cov(differences)

print(Q)

[[9.56912851e-01 1.56375213e-02 1.22659416e-04]
 [1.56375213e-02 7.82154498e-01 5.40530006e-03]
 [1.22659416e-04 5.40530006e-03 5.15517945e-05]]


In [141]:
#finding transition matrix A
from sklearn.linear_model import LinearRegression

X = np.vstack([price[:-1], sunlight[:-1], humidity[:-1]])
y = np.vstack([price[1:], sunlight[1:], humidity[1:]])
X = np.transpose(X)
y = np.transpose(y)
print(X.shape, y.shape)

model = LinearRegression().fit(X,y)

#transition matrix
A = model.coef_
print(A)



(10000, 3) (10000, 3)
[[ 9.97675010e-01  4.23560243e-05  4.78619978e-03]
 [-1.70494280e-02  9.99688565e-01  8.94087969e-02]
 [-7.75608778e-05 -5.51482238e-06  1.00056837e+00]]


In [142]:
from pykalman import KalmanFilter

initial_price = data.loc[0, "ORCHIDS"]
initial_sunlight = data.loc[0, "SUNLIGHT"]
initial_humidity = data.loc[0, "HUMIDITY"]

price = data.loc[:,"ORCHIDS"]
sunlight = data.loc[:, "SUNLIGHT"]
humidity = data.loc[:,"HUMIDITY"]

price = price.to_numpy()
sunlight = sunlight.to_numpy()
humidity = humidity.to_numpy()

price = price.reshape(-1,1)
sunlight = sunlight.reshape(-1,1)
humidity = humidity.reshape(-1,1)

print(price.shape, sunlight.shape, humidity.shape)
print(np.hstack([price, sunlight, humidity]).shape)

initial_state = np.array([initial_price, initial_sunlight, initial_humidity])

initial_covariance = Q
transition_matrix = A
observation_matrix = np.eye(3)
process_noise = np.diag([0.1,0.1,0.1])
observation_noise = np.diag([0.001,0.001,0.001])

kf = KalmanFilter(initial_state_mean = initial_state,
                  initial_state_covariance = initial_covariance,
                  transition_matrices = transition_matrix,
                  observation_matrices = observation_matrix,
                  observation_covariance = observation_noise,
                  transition_covariance = process_noise)

state_means, state_covariances = kf.filter(np.hstack([price, sunlight, humidity])) #this shape is wrong

#next price
last_state_mean = state_means[-1]
last_state_covariance = state_covariances[-1]

next_state_mean, next_state_covariance = kf.filter_update(
last_state_mean, last_state_covariance, observation= np.array([data.loc[data.index[-1],"ORCHIDS"], data.loc[data.index[-1], "SUNLIGHT"], data.loc[data.index[-1],"HUMIDITY"]])
)
predicted_price = next_state_mean[0]

print("predicted price from data1: ", predicted_price)
print("actual price: ", data2.loc[data.index[-1], "ORCHIDS"])


(10001, 1) (10001, 1) (10001, 1)
(10001, 3)
predicted price from data1:  1227.9772739096704
actual price:  985.75
