In [None]:
!pip install cvxpy

In [1]:
import pandas as pd
import numpy as np
import matplotlib as plt

In [2]:
df = pd.read_excel("teste_controle.xlsx")

In [4]:
df.head(100)

Unnamed: 0,ANOMES,ID,T,TD,Y
0,2023-01-01,1,1,0,121
1,2023-02-01,1,1,0,16
2,2023-03-01,1,1,0,48
3,2023-04-01,1,1,0,32
4,2023-05-01,1,1,0,132
...,...,...,...,...,...
95,2024-09-01,4,0,1,29
96,2024-10-01,4,0,1,143
97,2024-11-01,4,0,1,55
98,2024-12-01,4,0,1,27


In [7]:
treated = list(df.query("T==1")["ID"].unique())
print(treated)
tr_period = df.query("TD==1")["ANOMES"].min()
print(tr_period)

[1, 2, 3]
2024-05-01 00:00:00


In [8]:
def reshape_sc_data(df: pd.DataFrame, geo_col: str,  time_col: str, y_col: str, tr_geos: str, tr_start: str):
    df_pivot = df.pivot(time_col, geo_col, y_col)
    y_co = df_pivot.drop(columns=tr_geos)
    y_tr = df_pivot[tr_geos]
    y_pre_co = y_co[df_pivot.index < tr_start]
    y_pre_tr = y_tr[df_pivot.index < tr_start]
    y_post_co = y_co[df_pivot.index >= tr_start]
    y_post_tr = y_tr[df_pivot.index >= tr_start]
    return y_pre_co, y_pre_tr, y_post_co, y_post_tr

In [9]:
y_pre_co, y_pre_tr, y_post_co, y_post_tr = reshape_sc_data(df, 
                                                            geo_col="ID", 
                                                            time_col="ANOMES", 
                                                            y_col="Y", 
                                                            tr_geos=treated, 
                                                            tr_start=str(tr_period))
y_pre_tr.head()

  df_pivot = df.pivot(time_col, geo_col, y_col)


ID,1,2,3
ANOMES,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-01,121,130,146
2023-02-01,16,65,128
2023-03-01,48,62,112
2023-04-01,32,129,130
2023-05-01,132,143,140


In [10]:
y_pre_co.head()

ID,4,5,6,7,8,9,10
ANOMES,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-01-01,78,130,26,81,73,114,33
2023-02-01,128,46,139,123,22,81,80
2023-03-01,137,60,134,65,105,47,106
2023-04-01,111,64,44,58,14,123,79
2023-05-01,104,87,31,39,144,129,88


In [11]:
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.utils.validation import (check_X_y, check_array, check_is_fitted)
import cvxpy as cp

class SyntheticControl(BaseEstimator, RegressorMixin):
    def __init__(self,):
        pass

    def fit(self, y_pre_co, y_pre_tr):
        y_pre_co, y_pre_tr = check_X_y(y_pre_co, y_pre_tr)
            
        w = cp.Variable(y_pre_co.shape[1])
        
        objective = cp.Minimize(cp.sum_squares(y_pre_co@w - y_pre_tr))
        constraints = [cp.sum(w) == 1, w >= 0]
        
        problem = cp.Problem(objective, constraints)
        self.loss_ = problem.solve(verbose=False)
        self.w_ = w.value
        
        self.is_fitted_ = True
        return self
    
    def predict(self, y_co):
        check_is_fitted(self)
        y_co = check_array(y_co)
        
        return y_co @ self.w_

In [12]:
model = SyntheticControl()
model.fit(y_pre_co, y_pre_tr.mean(axis=1))
# extrac the weights
model.w_.round(3)

array([-0.   ,  0.498, -0.   , -0.   ,  0.077,  0.425, -0.   ])

In [30]:
y0_tr_post = y_post_co.dot(model.w_)
y0_tr_pre = y_pre_co.dot(model.w_)

In [31]:
# valor sintetico pre
y0_tr_pre.head()

ANOMES
2023-01-01    118.796963
2023-02-01     59.004370
2023-03-01     57.961349
2023-04-01     85.183430
2023-05-01    109.242622
dtype: float64

In [32]:
# valor sintetico pos
y0_tr_post.head()

ANOMES
2024-05-01     69.484905
2024-06-01     73.497012
2024-07-01     17.368147
2024-08-01    139.665321
2024-09-01     58.206204
dtype: float64

In [40]:
sintetico = pd.concat([y0_tr_pre,y0_tr_post])

In [42]:
real = pd.concat([y_pre_tr.mean(axis=1),y_post_tr.mean(axis=1)])