In [1]:
import pandas as pd
import numpy as np
from scipy import sparse
from tqdm import tqdm

In [2]:
df = pd.read_csv('1coinUSD.csv', header=None)
df.head()

Unnamed: 0,0,1,2
0,1394351059,621.0,0.01
1,1394351073,620.0,0.01
2,1394427477,620.0,0.01
3,1394427596,620.5,0.01
4,1394427614,621.0,0.01


In [3]:
coin_val = df.values[:,1]
coin_val = coin_val[:100000]

In [4]:
def normalize(arr):
    max_val = np.max(arr)
    min_val = np.min(arr)
    arr = (arr - min_val) / max_val
    return arr

In [5]:
coin_val_normed = normalize(coin_val)

In [6]:
import math
T = 10000
n = math.floor(math.pow(T, 2 / 3))
print(n, T)

464 10000


In [7]:
def distr_density(var, N):
    f_T = np.zeros((n, ))
    gist = []
    for j in range(n):
        a = j / n
        b = (j + 1) / n
        gist.append((a, b))
        f_T[j] = np.sum(np.logical_and(var >= a, var < b))
    return sparse.csc_matrix(f_T, dtype=np.int), np.array(gist)

In [8]:
dens, _ = distr_density(coin_val_normed[:T], n)
print(dens.shape)

(1, 464)


In [8]:
def get_gist_bin(gist, element):
    for i, (a, b) in enumerate(gist):
        if (a <= element < b):
            return i

In [24]:
def window_distr_density(var, N):
    f_T1, gist = distr_density(var[:T], N)
    f_T_arr = [f_T1]
    for i in tqdm(range(1, var.shape[0] - T)):
        f_T_curr = f_T_arr[-1].toarray()
        f_T_curr[0][get_gist_bin(gist, var[i - 1])] -= 1
        f_T_curr[0][get_gist_bin(gist, var[i + T])] += 1
        f_T_arr.append(sparse.csc_matrix(f_T_curr, dtype=np.int))
    return sparse.vstack(f_T_arr, format='csc')

In [11]:
densities = window_distr_density(coin_val_normed, n)
#print(densities)

100%|██████████| 89999/89999 [01:27<00:00, 1026.38it/s]


In [12]:
print(densities.shape)
print(type(densities))

(90000, 464)
<class 'scipy.sparse.csc.csc_matrix'>


In [13]:
sparse.save_npz("densities_csc.npz", densities)

In [9]:
densities = sparse.load_npz("densities_csc.npz")

In [10]:
def get_increases(var):
    v = var[1:] - var[:-1]
    return v

In [11]:
inc = get_increases(coin_val_normed)

In [12]:
print(inc.shape)

(99999,)


In [13]:
n_v = 100

In [14]:
def get_integrated(matrix, v_mean):
    return np.array([np.sum(np.multiply(matrix[:, i], v_mean)) for i in range(matrix.shape[1])])

In [15]:
def get_rev_dens(dens, i):
    densit = dens[i].toarray()[0].astype(dtype=float)
    densit[densit == 0] = 0.0000001
    return 1/densit

In [16]:
def get_u_matrix(var, v, N_v, N, dens):
    v_gist = [(j * 2 / N_v - 1, (j + 1) * 2 / N_v - 1) for j in range(N_v)]
    v_mean = np.array([(a + b) / 2 for a, b in v_gist])
    f_gist = [(j / N, (j + 1) / N) for j in range(N)]
    v_c = v[:T]
    var_c = var[:T]
    f_c = []
    for a, b in v_gist:
        v_ind = np.logical_and(v_c >= a, v_c < b)
        f, _ = distr_density(var_c[v_ind], n)
        f_c.append(f.toarray()[0])
    f_c = np.array(f_c)
    u = [sparse.csc_matrix(np.multiply(get_integrated(f_c, v_mean), get_rev_dens(dens, 0)))]
    for i in tqdm(range(1, var.shape[0] - T - 1)):
        f_c[get_gist_bin(v_gist, v[i - 1])][get_gist_bin(f_gist, var[i - 1])] -= 1
        f_c[get_gist_bin(v_gist, v[i + T])][get_gist_bin(f_gist, var[i + T])] += 1
        u.append(sparse.csc_matrix(np.multiply(get_integrated(f_c, v_mean), get_rev_dens(dens, i))))
    return sparse.vstack(u, format='csc')

In [17]:
u_matrix = get_u_matrix(coin_val_normed, inc, n_v, n, densities)

100%|██████████| 89998/89998 [33:05<00:00, 45.32it/s]


In [21]:
def get_lambda_vec(var):
    l = np.zeros(var.shape[0] - T)
    for t in tqdm(range(T, var.shape[0] - 1)):
        l[t - T] = 1 / T * ((var[t - T:t] - var[t - T + 1: t + 1]) ** 2).sum()
        l[t - T] += 1 / (T ** 2) * (var[t] - var[t - T])
    return l

In [22]:
lambda_vec = get_lambda_vec(coin_val_normed)

100%|██████████| 89999/89999 [00:04<00:00, 19925.35it/s]


In [25]:
h = n * 100
small_step_dens = window_distr_density(coin_val_normed, n)

100%|██████████| 89999/89999 [01:26<00:00, 1036.66it/s]


In [20]:
sparse.save_npz("u_matrix_csc.npz", u_matrix)