In [87]:
import pandas as pd
import numpy as np
import mysql.connector as mdb
import json as js
from matplotlib import pyplot as plt
import seaborn as sns
from tqdm import tqdm_notebook as tqdm, tnrange as trange
from scipy.stats import poisson, norm, gaussian_kde as kde

pd.plotting.register_matplotlib_converters()
sns.set()

In [88]:
with open('./../settings/maria_login.json') as file:
    conn = mdb.connect(**js.load(file))

In [89]:
dataset = pd.read_sql("""\
SELECT Market.record_date,
	Market.volume,
	Market.avg_price
FROM MarketHistory AS Market
WHERE Market.record_date >= '2019-01-01'
	AND Market.region_id = 10000043
	AND Market.type_id = 34
;""", conn)
dataset = pd.DataFrame(
    index=pd.date_range(dataset['record_date'].min(), dataset['record_date'].max())
).join(dataset.set_index('record_date'))
dataset['avg_price'] = dataset['avg_price'].fillna(method='ffill').fillna(method='bfill')
dataset['volume'] = dataset['volume'].fillna(0)

In [90]:
class Anneal:
    def __init__(self, data_set, temp, temp_target=0, ascend=True, settings={}, verbose=False):
        self.data_set = data_set
        self.temp = temp
        self.temp_target = temp_target
        self.ascend = ascend
        self.verbose = verbose
        self.settings = settings
        
        self.metric = None
        self.coefs = {}
        
    def run_process(self, iters):
        temps = self.build_temps(self.temp, self.temp_target, iters)
        t = tqdm(temps) if self.verbose else temps
        for temp in t:
            coefs, metric = self.generate()
            accept = self.calc_accept(metric, self.metric, temp, self.ascend)
            self.update(coefs, metric, accept)
            if self.verbose: t.set_postfix(dict(coefs=self.coefs, temp=temp, metric=self.metric))
                
    def build_temps(self, temp, target, iters):
        temps = np.arange(temp, target, (target - temp) / iters)
        return temps
    
    def generate(self):
        coefs = {}
        coefs['tau'] = np.random.randint(1, len(self.data_set), size=self.settings['splits'])
        coefs['tau'].sort()
        coefs['mu'] = np.random.uniform(self.data_set.min(), self.data_set.max(), size=self.settings['splits']+1)
        coefs['sigma'] = np.random.uniform(0, self.data_set.std(), size=self.settings['splits']+1)
        
        metric = []
        for data, mu, sigma in zip(np.split(self.data_set, coefs['tau']), coefs['mu'], coefs['sigma']):
            metric.append(norm(loc=mu, scale=sigma).logpdf(data))
        metric = np.concatenate(metric, axis=0).mean()
        return (coefs, metric)
    
    """
    def generate(self):
        coefs = {}
        coefs['tau'] = np.random.randint(1, len(self.data_set), size=self.settings['splits'])
        coefs['tau'].sort()
        coefs['mu'] = np.random.uniform(self.data_set.min(), self.data_set.max(), size=self.settings['splits']+1)
        
        metric = []
        for data, mu in zip(np.split(self.data_set, coefs['tau']), coefs['mu']):
            metric.append(poisson(mu=mu).logpmf(data))
        metric = np.concatenate(metric, axis=0).mean()
        return (coefs, metric)
    """
    
    def calc_accept(self, current_metric, old_metric, temp, ascend):
        if old_metric is None: return 1
        accept = current_metric - old_metric if ascend else old_metric - current_metric
        accept = np.exp(accept / temp)
        return accept
    
    def update(self, coefs, metric, accept):
        if accept > np.random.uniform():
            self.metric = metric
            self.coefs = coefs

In [91]:
anneal_data = dataset['avg_price']
anneal = Anneal(anneal_data, temp=anneal_data.mean(), settings={'splits': 1}, verbose=True)
anneal.run_process(25000)
anneal.coefs

HBox(children=(IntProgress(value=0, max=25000), HTML(value='')))




{'tau': array([161]),
 'mu': array([4.58361968, 5.78347221]),
 'sigma': array([0.18968532, 0.75617417])}

In [93]:
anneal.coefs

{'tau': array([161]),
 'mu': array([4.58361968, 5.78347221]),
 'sigma': array([0.18968532, 0.75617417])}