In [None]:
import numpy as np
import gurobipy as grb
import csv
import time
import statsmodels.api as sm
from joblib import Parallel,delayed

def five_variate(n,SNR,rho):
    beta1 = np.array([1,1,1,0,1])
    beta2 = np.array([-1,-1,-1,0,-1])
    p = len(beta1)
    Sigma = np.zeros((p,p));
    for i in range(p):
        for j in range(p):
            Sigma[i,j] = rho**(abs(i-j))
    X = np.random.multivariate_normal(mean =np.zeros(p), cov = Sigma, size = n)
    noise1 = np.random.normal(0,np.sqrt(np.var(X[:int(n/2)]@beta1)/SNR), size = int(n/2)) 
    noise2 = np.random.normal(0,np.sqrt(np.var(X[:int(n/2)]@beta2)/SNR), size = int(n/2)) 
    Y1 = X[:int(n/2)]@beta1 + noise1
    Y2 = X[int(n/2):]@beta2 + noise2
    Y = np.r_[Y1,Y2]
    lam = 0.1*n
    #ols = sm.OLS(Y, X)
    #ols_result = ols.fit()
    #bigM = 10*float(1.96*(sum(ols_result.bse)))#1000

    M = grb.Model()
    M.Params.TimeLimit = 400
    beta = M.addMVar((int(n),5), lb = -grb.GRB.INFINITY)
    z = M.addMVar(len(Y) - 1, vtype = grb.GRB.BINARY)
    sum_of_sq = sum((Y[t] - sum(X[t,j]*beta[t,j] for j in range(5)))* (Y[t] - sum(X[t,j]*beta[t,j] for j in range(5))) for t in range(len(Y)))
    for t in range(len(Y)-1):
        for j in range(5):
            #M.addConstr(beta[t+1,j] - beta[t,j] <= bigM*z[t])
            #M.addConstr(beta[t+1,j] - beta[t,j] >= -bigM*z[t])
            M.addConstr((z[t] == 0) >> (beta[t+1, j] == beta[t, j])) # not using big M
        
    #optional constratins to add more cuts
    M.addConstr(sum(z) <= 5)
    for t in range(1, len(Y) - 2):
        M.addConstr(z[t-1] + z[t] + z[t+1] <= 1)
    M.setObjective(sum_of_sq+ lam*sum(z))
    M.optimize()
    beta_hat = [beta[t].x for t in range(len(Y))]
    z_hat = [z[t].x for t in range(len(Y)-1)]
    opt_gap = M.MIPGap
    return z_hat, opt_gap,beta_hat

SNR_list = np.array([6, 3.52, 2.07, 1.22, 0.71])
rho_list = np.array([0, 0.3, 0.7])
n_list = np.array([1000])

header = ['Repitition','Time','rho', 'SNR', 'n', 'Optimality Gap', 'Change Points']#, 'beta_hat', 'nonzero count']
f = open('simulation_five_variate_no_bigM.csv', 'w')
writer = csv.writer(f)
writer.writerow(header)
f.close()

def to_repeat(rep):
    for n in n_list:
        for SNR in SNR_list:
            for rho in rho_list:
                tik = time.time()
                z_hat, opt_gap, beta_hat = five_variate(n,SNR,rho)
                tok = time.time()
                duration = tok - tik
                cps = [idx for idx,item in enumerate(z_hat) if item == 1 ]
                # Write result
                resultrow = [rep,duration,rho,SNR, n, opt_gap, cps]
                f = open('simulation_five_variate_no_bigM.csv', 'a')
                writer = csv.writer(f)
                writer.writerow(resultrow)
                f.close()
                
Parallel(n_jobs = 8)(delayed(to_repeat)(rep) for rep in range(500)) #Tomo's machine has 8 cores.

In [1]:
# try small sample and 2 repeat

import numpy as np
import gurobipy as grb
import csv
import time
import statsmodels.api as sm
from joblib import Parallel,delayed

def five_variate(n,SNR,rho):
    beta1 = np.array([1,1,1,0,1])
    beta2 = np.array([-1,-1,-1,0,-1])
    p = len(beta1)
    Sigma = np.zeros((p,p));
    for i in range(p):
        for j in range(p):
            Sigma[i,j] = rho**(abs(i-j))
    X = np.random.multivariate_normal(mean =np.zeros(p), cov = Sigma, size = n)
    noise1 = np.random.normal(0,np.sqrt(np.var(X[:int(n/2)]@beta1)/SNR), size = int(n/2)) 
    noise2 = np.random.normal(0,np.sqrt(np.var(X[:int(n/2)]@beta2)/SNR), size = int(n/2)) 
    Y1 = X[:int(n/2)]@beta1 + noise1
    Y2 = X[int(n/2):]@beta2 + noise2
    Y = np.r_[Y1,Y2]
    lam = 0.1*n
    #ols = sm.OLS(Y, X)
    #ols_result = ols.fit()
    #bigM = 10*float(1.96*(sum(ols_result.bse)))#1000

    M = grb.Model()
    M.Params.TimeLimit = 400
    beta = M.addMVar((int(n),5), lb = -grb.GRB.INFINITY)
    z = M.addMVar(len(Y) - 1, vtype = grb.GRB.BINARY)
    sum_of_sq = sum((Y[t] - sum(X[t,j]*beta[t,j] for j in range(5)))* (Y[t] - sum(X[t,j]*beta[t,j] for j in range(5))) for t in range(len(Y)))
    for t in range(len(Y)-1):
        for j in range(5):
            #M.addConstr(beta[t+1,j] - beta[t,j] <= bigM*z[t])
            #M.addConstr(beta[t+1,j] - beta[t,j] >= -bigM*z[t])
            M.addConstr((z[t] == 0) >> (beta[t+1, j] == beta[t, j])) # not using big M
        
    #optional constratins to add more cuts
    M.addConstr(sum(z) <= 5)
    for t in range(1, len(Y) - 2):
        M.addConstr(z[t-1] + z[t] + z[t+1] <= 1)
    M.setObjective(sum_of_sq+ lam*sum(z))
    M.optimize()
    beta_hat = [beta[t].x for t in range(len(Y))]
    z_hat = [z[t].x for t in range(len(Y)-1)]
    opt_gap = M.MIPGap
    return z_hat, opt_gap,beta_hat

SNR_list = np.array([6])
rho_list = np.array([0])
n_list = np.array([100])

header = ['Repitition','Time','rho', 'SNR', 'n', 'Optimality Gap', 'Change Points']#, 'beta_hat', 'nonzero count']
f = open('simulation_five_variate_no_bigM.csv', 'w')
writer = csv.writer(f)
writer.writerow(header)
f.close()

def to_repeat(rep):
    for n in n_list:
        for SNR in SNR_list:
            for rho in rho_list:
                tik = time.time()
                z_hat, opt_gap, beta_hat = five_variate(n,SNR,rho)
                tok = time.time()
                duration = tok - tik
                cps = [idx for idx,item in enumerate(z_hat) if item == 1 ]
                # Write result
                resultrow = [rep,duration,rho,SNR, n, opt_gap, cps]
                f = open('simulation_five_variate_no_bigM.csv', 'a')
                writer = csv.writer(f)
                writer.writerow(resultrow)
                f.close()
                
Parallel(n_jobs = 8)(delayed(to_repeat)(rep) for rep in range(2)) #Tomo's machine has 8 cores.

Set parameter Username
Academic license - for non-commercial use only - expires 2024-02-18
Set parameter TimeLimit to value 400
Set parameter Username
Academic license - for non-commercial use only - expires 2024-02-18
Set parameter TimeLimit to value 400
Gurobi Optimizer version 10.0.1 build v10.0.1rc0 (mac64[x86])

CPU model: Intel(R) Core(TM) i5-8257U CPU @ 1.40GHz
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads

Optimize a model with 98 rows, 599 columns and 390 nonzeros
Model fingerprint: 0x6bfe6d03
Model has 1500 quadratic objective terms
Model has 495 general constraints
Variable types: 500 continuous, 99 integer (99 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  Objective range  [3e-03, 2e+01]
  QObjective range [4e-05, 2e+01]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 5e+00]
  GenCon coe range [1e+00, 1e+00]
Found heuristic solution: objective 427.7648471
Presolve added 495 rows and 495 columns
Presolve time: 0

 27355  3622   30.44694   32  147   46.52213   20.00000  57.0%   212  116s
 28448  3686   32.62379   67  124   46.52213   20.00000  57.0%   215  120s
 44156 10932   40.07417   29  140   57.27815   20.00000  65.1%   121  121s
 46137 11179   46.39171   46  125   57.27815   20.00000  65.1%   121  126s
 30802  3904     cutoff   42        46.52213   20.00000  57.0%   219  128s
 47858 11448     cutoff   50        57.27815   20.00000  65.1%   122  131s
 32118  3910   30.00000   39  146   46.52213   20.00000  57.0%   220  132s
 49712 11480     cutoff   58        57.27815   20.00000  65.1%   123  135s
 33234  3936   32.45555   45  131   46.52213   20.00000  57.0%   222  136s
 34372  3994   30.00000   39  148   46.52213   20.00000  57.0%   224  140s
 52910 13008     cutoff   39        57.27815   20.00000  65.1%   123  141s
 55045 14047   35.11757   41  139   57.27815   20.00000  65.1%   122  145s
 36894  4455   30.00000   39  147   46.52213   20.00000  57.0%   226  149s
 57783 14238   38.74079  

 169865 39393   38.52260   57  114   57.27815   25.54654  55.4%   123  390s
 115080  9448   30.70448   46  147   46.52213   20.70448  55.5%   225  391s
 173144 40255     cutoff   52        57.27815   25.54654  55.4%   122  395s
 116519  9544   31.54367   51  133   46.52213   20.70448  55.5%   225  397s
 117280  9590   31.96272   65  126   46.52213   20.70448  55.5%   224  400s

Explored 117978 nodes (26434233 simplex iterations) in 400.14 seconds (50.81 work units)
Thread count was 8 (of 8 available processors)

Solution count 10: 46.5221 47.2558 47.8969 ... 300.129

Time limit reached
Best objective 4.652212944745e+01, best bound 2.070448305480e+01, gap 55.4954%
 175937 40555     cutoff   57        57.27815   25.56997  55.4%   121  400s

Explored 176245 nodes (21396175 simplex iterations) in 400.10 seconds (53.60 work units)
Thread count was 8 (of 8 available processors)

Solution count 10: 57.2781 64.943 65.1025 ... 132.47

Time limit reached
Best objective 5.727814642212e+01, best b

[None, None]