In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import ruptures as rpt
import statsmodels.api as sm

df = pd.read_csv("https://raw.githubusercontent.com/TomoyaOzawa-DA/research-project/main/data/CO2DriversEU_dataset_2021.csv")

# sort values for making lag variables
df = df.sort_values(["country", "year"])

# log-transformation
df["log_transport.emissions"] = np.log(df["transport.emissions"])
df["log_gdp"] = np.log(df["gdp"])
df["log_pop"] = np.log(df["pop"])

# create additional variables
df["transport.emissions_lag"] = df.groupby("country")["transport.emissions"].shift(1)
df["gdp_lag"] = df.groupby("country")["gdp"].shift(1)
df["pop_lag"] = df.groupby("country")["pop"].shift(1)

df["log_transport.emissions_lag"] = df.groupby("country")["log_transport.emissions"].shift(1)
df["log_gdp_lag"] = df.groupby("country")["log_gdp"].shift(1)
df["log_pop_lag"] = df.groupby("country")["log_pop"].shift(1)

# squared varibales
df["gdp_sq"] = df["gdp"]**2
df["pop_sq"] = df["pop"]**2

df["log_gdp_sq"] = df["log_gdp"]**2
df["log_pop_sq"] = df["log_pop"]**2

# add constant
df["const"] = 1

df.head()

Unnamed: 0,country,year,transport.emissions,gdp,pop,log_transport.emissions,log_gdp,log_pop,transport.emissions_lag,gdp_lag,pop_lag,log_transport.emissions_lag,log_gdp_lag,log_pop_lag,gdp_sq,pop_sq,log_gdp_sq,log_pop_sq,const
0,Austria,1970,7679.742167,135169100000.0,7467086.0,8.946341,25.629793,15.826015,,,,,,,1.827069e+22,55757370000000.0,656.886271,250.462763,1
1,Austria,1971,8464.743388,142083000000.0,7500482.0,9.043665,25.679677,15.830478,7679.742167,135169100000.0,7467086.0,8.946341,25.629793,15.826015,2.018758e+22,56257230000000.0,659.445819,250.604029,1
2,Austria,1972,9479.252474,150903300000.0,7544201.0,9.156861,25.739905,15.83629,8464.743388,142083000000.0,7500482.0,9.043665,25.679677,15.830478,2.277181e+22,56914970000000.0,662.542714,250.788073,1
3,Austria,1973,10294.92017,158283100000.0,7586115.0,9.239406,25.787651,15.84183,9479.252474,150903300000.0,7544201.0,9.156861,25.739905,15.83629,2.505355e+22,57549140000000.0,665.002957,250.963583,1
4,Austria,1974,9561.188741,164522900000.0,7599038.0,9.165467,25.826315,15.843532,10294.92017,158283100000.0,7586115.0,9.239406,25.787651,15.84183,2.706778e+22,57745380000000.0,666.998569,251.017513,1


In [46]:
import numpy as np
import gurobipy as grb
import csv
import time
import statsmodels.api as sm
from joblib import Parallel,delayed

def function_proposed(Y, X):
    p = X.shape[1]
    n = len(Y)
    lam = 0.1*n
    ols = sm.OLS(Y, X)
    ols_result = ols.fit()
    bigM = 10*float(1.96*(sum(ols_result.bse)))#1000


    M = grb.Model()
    M.Params.TimeLimit = 400
    beta = M.addMVar((int(n),int(p)), lb = -grb.GRB.INFINITY)
    z = M.addMVar(int(n) - 1, vtype = grb.GRB.BINARY)
    sum_of_sq = sum((Y[t] - sum(X[t,j]*beta[t,j] for j in range(int(p))))* (Y[t] - sum(X[t,j]*beta[t,j] for j in range(int(p)))) for t in range(int(n)))
    for t in range(int(n)-1):
        for j in range(int(p)):
            M.addConstr(beta[t+1,j] - beta[t,j] <= bigM*z[t])
            M.addConstr(beta[t+1,j] - beta[t,j] >= -bigM*z[t])
    #optional constratins to add more cuts
    M.addConstr(sum(z) <= 5)
    for t in range(1, int(n) - 2):
        M.addConstr(z[t-1] + z[t] + z[t+1] <= 1)

    M.setObjective(sum_of_sq+ lam*sum(z))
    M.optimize()
    beta_hat = [beta[t].x for t in range(int(n))]
    z_hat = [z[t].x for t in range(int(n)-1)]
    opt_gap = M.MIPGap
    return z_hat, opt_gap,beta_hat

In [49]:
a, b, c = function_proposed(df.loc[df["country"] == "Germany", "log_transport.emissions"].values, df.loc[df["country"] == "Germany", ["log_gdp", "log_pop"]].values)

Set parameter TimeLimit to value 400
Gurobi Optimizer version 10.0.1 build v10.0.1rc0 (mac64[x86])

CPU model: Intel(R) Core(TM) i5-8257U CPU @ 1.40GHz
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads

Optimize a model with 254 rows, 155 columns and 810 nonzeros
Model fingerprint: 0x2373cc8a
Model has 156 quadratic objective terms
Variable types: 104 continuous, 51 integer (51 binary)
Coefficient statistics:
  Matrix range     [1e-05, 1e+00]
  Objective range  [5e-03, 7e+02]
  QObjective range [7e+02, 2e+03]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 5e+00]
Found heuristic solution: objective 7252.0982072
Presolve time: 0.00s
Presolved: 254 rows, 155 columns, 810 nonzeros
Presolved model has 156 quadratic objective terms
Variable types: 104 continuous, 51 integer (51 binary)

Root relaxation: objective 5.955304e-01, 355 iterations, 0.00 seconds (0.00 work units)

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl 

In [50]:
a

[array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.),
 array(-0.)]