# Main notebook for testing Mocha

## 0. Load packages



In [1]:
import os
import sys
import scipy
import numpy as np
sys.path.append(os.getcwd()+'/assistive_functions')

from household import Household
from utils_households import get_lags, connect_to_households

## 1. Mocha algorithm

In [2]:
def run_mocha(Xtrain, Ytrain, Xtest, Ytest, lambda_, opts):
    '''
    Mocha Method
    % Inputs
    %   Xtrain: input training data
    %   Ytrain: output training data
    %   Xtest: input test data
    %   Ytest: output test data
    %   lambda: regularization parameter
    %   opts: optional arguments
    % Output
    %   Average RMSE across tasks, primal and dual objectives
    '''
    # intialize variables
    print('Running MOCHA\n')
    m = len(Xtrain)                     # num of tasks = num of households
    d = Xtrain[0].shape[1]              # num of features
    W = np.zeros((d, m)) 
    alpha = []
    Sigma = np.eye(m)/m
    Omega = np.linalg.inv(Sigma)
    totaln = 0 
    n = np.zeros(m, dtype=int)          # number of samples for each household
    for t in np.arange(m):
        n[t] = len(Ytrain[t])
        totaln = totaln + n[t]
        alpha.append(np.zeros(n[t]))
    rho = 1                             # learning rate of SGD
    # intialize counters
    mocha_outer_iters = opts['mocha_outer_iters']
    mocha_inner_iters = opts['mocha_inner_iters']
    if opts['w_update']:
        rmse        = np.zeros(mocha_inner_iters)
        dual_objs   = np.zeros(mocha_inner_iters) 
        primal_objs = np.zeros(mocha_inner_iters)
    else:
        rmse        = np.zeros()
        dual_objs   = np.zeros(mocha_outer_iters) 
        primal_objs = np.zeros(mocha_outer_iters)


    for h in np.arange(mocha_outer_iters):
        print('Outer itter: ', h)
        
        if not opts['w_update']:
            curr_err = compute_rmse(Xtest, Ytest, W, opts)
            rmse[h] = curr_err;
            primal_objs[h] = compute_primal(Xtrain, Ytrain, W, Omega, lambda_)
            dual_objs[h] = compute_dual(alpha, Ytrain, W, Omega, lambda_)
        

        # update W
        for hh in np.arange(mocha_inner_iters):
            #print('Inner itter: ', hh)
            np.random.seed(hh*1000)
            if opts['sys_het']: # change num of iterations
                sys_iters = (opts['top'] - opts['bottom']) * np.random.rand(m,1) + opts['bottom']

            if opts['w_update']:
                # compute RMSE
                rmse[hh] = compute_rmse(Xtest, Ytest, W, opts) 
                primal_objs[hh] = compute_primal(Xtrain, Ytrain, W, Omega, lambda_)
                dual_objs[hh]   = compute_dual(alpha, Ytrain, W, Omega, lambda_)

            # loop over tasks (in parallel)
            deltaW = np.zeros((d, m))
            deltaB = np.zeros((d, m))
            for t in np.arange(m):
                tperm = np.random.permutation(n[t]) # avoid updating 1 coordinate twice if there is an unchanged coor
                alpha_t = alpha[t]                  # select the dual var for a random sample point
                curr_sig = Sigma[t,t]
                if opts['sys_het']:
                    local_iters = n[t] * sys_iters[t]
                else:
                    local_iters = n[t] * opts['mocha_sdca_frac']    

                # run Stochastic Dual Coordinate Ascent (SDCA) locally
                for s in np.arange(local_iters):                    # each iter is 1 grad step in 1 coordinate
                    # select random coordinate
                    idx = tperm[int(s%n[t])]
                    alpha_old = alpha_t[idx]                        # shape: (1, )
                    curr_y = Ytrain[t][idx]                         # shape: (1, 1)
                    curr_x = Xtrain[t][idx, :]                      # shape: (1, d)
                    # compute update
                    update = curr_y * np.matmul(curr_x, (W[:,t]+rho*deltaW[:,t]).reshape(-1, 1)) # shape: (1, 1)
                    grad_num = lambda_ * n[t] * (1 - update)        # shape: (1, 1)
                    grad_den = curr_sig * rho * np.matmul(curr_x, np.transpose(curr_x))
                    grad = grad_num/grad_den + (alpha_old*curr_y)   # shape: (1, 1)
                    alpha_t[idx] = (curr_y * max(0, min(1, grad)))  # shape: (1, 1) -> update to alpha[idx] by node t
                    tmp = Sigma[t, t]*(alpha_t[idx] - alpha_old)/(lambda_*n[t]) * np.transpose(curr_x)
                    deltaW[:, t] = deltaW[:, t] + (Sigma[t, t]*(alpha_t[idx] - alpha_old)/(lambda_*n[t]) * np.transpose(curr_x)).flatten() 
                    deltaB[:, t] = deltaB[:, t] + ((alpha_t[idx] - alpha_old) * np.transpose(curr_x) / n[t]).flatten()
                    alpha[t] = alpha_t

            # combine updates globally
            for t in np.arange(m):
                for tt in np.arange(m):
                    W[:, t] = W[:, t] + deltaB[:, tt] * Sigma[t, tt]/ lambda_


        # make sure eigenvalues are positive
        A = np.matmul(np.transpose(W), W)
        eig_val, eig_vec = np.linalg.eig(A)
        if any(eig_val < 0):
            V, Dmat = np.linalg.eig(A)
            dm = np.diag(Dmat)
            dm[dm <= 1e-7] = 1e-7
            D_c = np.diag(dm)
            A = V*D_c*np.transpose(V)
        

        # update Omega, Sigma
        sqm = scipy.linalg.sqrtm(A)
        Sigma = sqm / np.trace(sqm)
        Omega = np.linalg.inv(Sigma)
        rho = np.max(np.sum(np.abs(Sigma),1)/ np.diag(Sigma))


    return rmse, primal_objs, dual_objs

## 2. Load households data

In [3]:
# number of devices and their group
num_households = 2
group="ACORN-L"
stdorToU="ToU"
household_options = {"num_households":num_households,
                    "group":group,
                    "stdorToU":stdorToU}

households = connect_to_households(household_options)



# regression options
options = {"dayparts":[],
           "resolution":60,
           "remove_holiday":True,
           "filt_days":['Tuesday'], 
           "replacement_method":'week_before',
           "feat_cols":['hourofd_x', 'hourofd_y', 'dayofy_x', 'dayofy_y', 'temperature_hourly']}
step_ahead=1

# find dates when all households had data
date_st = []
date_en = []
for household in households:
    household.construct_dataset(lags=get_lags(step_ahead), step_ahead=step_ahead, options=options)
    date_st.append(household.cons_data.date.iloc[0])
    date_en.append(household.cons_data.date.iloc[-1])
date_st_com = max(date_st)
date_en_com = min(date_en)

# construct dataset
for household in households:
    household.construct_dataset(lags=get_lags(step_ahead), step_ahead=step_ahead, options=options, 
                                date_st=date_st_com, date_en=date_en_com)
    household.train_test_split(test_frac=0.25)


[INFO] Connected to 2 households


## 3. Form train and test matrices

In [4]:
# form train and test lists
Xtrain = []
Ytrain = []
Xtest  = []
Ytest  = []
for household in households:
    Xtrain.append(np.hstack((np.ones((household.X_train.shape[0],1)),household.X_train)))
    Ytrain.append(household.y_train)
    Xtest.append(np.hstack((np.ones((household.X_test.shape[0],1)),household.X_test)))
    Ytest.append(household.y_test)

lambda_ = 0.1
# not sure what the options are, setting arbitrary values to check
opts = {'mocha_outer_iters': 10,
        'mocha_inner_iters': 10,
        'w_update': True,
        'sys_het': False,
        'top': 2,
        'buttom': 1,
        'mocha_sdca_frac':0.1}

# 4. Run

In [5]:
from util import compute_rmse, compute_dual, compute_primal

# run code with real 'compute_...' functions\
opts['avg']=False
rmse, primal_objs, dual_objs = run_mocha(Xtrain, Ytrain, Xtest, Ytest, lambda_, opts)

Running MOCHA

Outer itter:  0


AttributeError: 'list' object has no attribute 'shape'

In [6]:
from util import compute_rmse, compute_primal
def compute_dual(alpha, Ytrain, W, Omega, lambda_):
    return 0

# run code with empty 'compute_...' functions
opts['avg']=False
rmse, primal_objs, dual_objs = run_mocha(Xtrain, Ytrain, Xtest, Ytest, lambda_, opts)

Running MOCHA

Outer itter:  0
Outer itter:  1
Outer itter:  2
Outer itter:  3
Outer itter:  4
Outer itter:  5
Outer itter:  6
Outer itter:  7
Outer itter:  8
Outer itter:  9
