## Iterative Proportional fitting

In [2]:
import pandas as pd
import numpy as np


In [3]:
data_tremi = pd.read_csv('insulation.csv', index_col=[0, 1, 2, 3]).squeeze()
data_tremi

Wall   Floor  Roof   Windows
True   True   True   False       35145.246407
False  True   True   False        3356.056566
True   True   False  False        1559.094204
       False  True   False       88720.495408
              False  False       82471.917883
False  False  True   False      782532.284906
       True   False  False        1685.144509
True   True   True   True        32147.673467
False  True   True   True        42110.446978
True   True   False  True        11086.666583
       False  True   True         5717.938288
              False  True         2876.939109
False  False  True   True         8821.088307
       True   False  True        60009.792006
       False  False  True        43791.891007
Name: 0, dtype: float64

In [4]:
market_share = pd.read_csv('market_share_insulation_tremi.csv', index_col=[0, 1, 2, 3]).squeeze().rename(None)
market_share

Wall   Floor  Roof   Windows
True   True   True   False      0.029238
False  True   True   False      0.002792
True   True   False  False      0.001297
       False  True   False      0.073809
              False  False      0.068610
False  False  True   False      0.651007
       True   False  False      0.001402
True   True   True   True       0.026744
False  True   True   True       0.035033
True   True   False  True       0.009223
       False  True   True       0.004757
              False  True       0.002393
False  False  True   True       0.007338
       True   False  True       0.049924
       False  False  True       0.036432
dtype: float64

In [5]:
ms_cee = dict()
ms_cee['Wall'] = pd.Series([0.1021, 1 - 0.1021], index=pd.Index([True, False], name='Wall')) * 1_000
ms_cee['Floor'] = pd.Series([0.3038, 1 - 0.3038], index=pd.Index([True, False], name='Floor')) * 1_000
ms_cee['Roof'] = pd.Series([0.4972, 1 - 0.4972], index=pd.Index([True, False], name='Roof')) * 1_000
ms_cee['Windows'] = pd.Series([0.0969, 1 - 0.0969], index=pd.Index([True, False], name='Windows')) * 1_000
ms_cee

{'Wall': Wall
 True     102.1
 False    897.9
 dtype: float64,
 'Floor': Floor
 True     303.8
 False    696.2
 dtype: float64,
 'Roof': Roof
 True     497.2
 False    502.8
 dtype: float64,
 'Windows': Windows
 True      96.9
 False    903.1
 dtype: float64}

In [6]:
def ipf_update(M, ms_cee):
    for i in M.index.names:
        M = ms_cee[i] * M / M.groupby(i).sum()

    d = {i: np.linalg.norm(ms_cee[i] - M.groupby(i).sum(), 2) for i in M.index.names}
    return M, d

In [7]:
M = data_tremi.copy()
for _ in range(10):
    M, d = ipf_update(M, ms_cee)
    print(d)

{'Wall': 138.3583167135311, 'Floor': 222.02490953143857, 'Roof': 398.6193437468762, 'Windows': 1.4210854715202004e-14}
{'Wall': 133.50707626953027, 'Floor': 42.526154578684405, 'Roof': 232.52221224127044, 'Windows': 1.4210854715202004e-14}
{'Wall': 68.0931835419668, 'Floor': 112.31053023224167, 'Roof': 68.24021444208763, 'Windows': 1.4210854715202004e-14}
{'Wall': 55.0568247603812, 'Floor': 125.97726623787538, 'Roof': 10.932318582700045, 'Windows': 1.1368683772161603e-13}
{'Wall': 53.388777529953416, 'Floor': 101.54528222728653, 'Roof': 3.203913571650475, 'Windows': 1.4210854715202004e-14}
{'Wall': 48.1657593941225, 'Floor': 83.86525781711907, 'Roof': 4.139774883749001, 'Windows': 1.1368683772161603e-13}
{'Wall': 42.92803527067386, 'Floor': 72.0101587231646, 'Roof': 5.234198959607468, 'Windows': 0.0}
{'Wall': 38.327999469533054, 'Floor': 63.043781536529735, 'Roof': 5.9477319769905606, 'Windows': 1.1368683772161603e-13}
{'Wall': 34.25851971699842, 'Floor': 55.779327221625415, 'Roof': 6.

In [8]:
weight_m = M / M.sum()
weight_m

Wall   Floor  Roof   Windows
True   True   True   False      1.960728e-04
False  True   True   False      2.126667e-03
True   True   False  False      2.267890e-03
       False  True   False      4.979769e-04
              False  False      1.206952e-01
False  False  True   False      4.988925e-01
       True   False  False      2.784237e-01
True   True   True   True       1.007638e-06
False  True   True   True       1.499216e-04
True   True   False  True       9.060551e-05
       False  True   True       1.803137e-07
              False  True       2.365477e-05
False  False  True   True       3.159591e-05
       True   False  True       5.570512e-02
       False  False  True       4.089791e-02
dtype: float64

In [9]:
pd.set_option('display.float_format', lambda x: '%.6f' % x)
df = pd.concat((weight_m, market_share), axis=1)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,0,1
Wall,Floor,Roof,Windows,Unnamed: 4_level_1,Unnamed: 5_level_1
True,True,True,False,0.000196,0.029238
False,True,True,False,0.002127,0.002792
True,True,False,False,0.002268,0.001297
True,False,True,False,0.000498,0.073809
True,False,False,False,0.120695,0.06861
False,False,True,False,0.498892,0.651007
False,True,False,False,0.278424,0.001402
True,True,True,True,1e-06,0.026744
False,True,True,True,0.00015,0.035033
True,True,False,True,9.1e-05,0.009223


In [10]:
weight_m.to_csv('market_share_insulation_cee.csv')

## Ad hoc method

We calibrate only number of work with the technology

In [46]:
from scipy.optimize import fsolve
from project.utils import reindex_mi

In [51]:
def func(coeff, ms, target):
    names = ['Wall', 'Floor', 'Roof', 'Windows']
    coeff = pd.Series(coeff, index=pd.MultiIndex.from_tuples(((True, False, False, False),
                                                              (False, True, False, False),
                                                              (False, False, True, False),
                                                              (False, False, False, True),
                                                                 ), names=names))
    ms = reindex_mi(coeff, ms.index).fillna(1) * ms

    return np.array([ms.xs(True, level='Wall').sum() / ms.sum() - target[i] for i in names])





In [53]:
names = ['Wall', 'Floor', 'Roof', 'Windows']
coeff = pd.Series(coeff, index=pd.MultiIndex.from_tuples(((True, False, False, False),
                                                          (False, True, False, False),
                                                          (False, False, True, False),
                                                          (False, False, False, True),
                                                             ), names=names))
ms = reindex_mi(coeff, ms.index).fillna(1) * ms

In [55]:
ms

Wall   Floor  Roof   Windows
True   True   True   False     0.029238
False  True   True   False     0.002792
True   True   False  False     0.001297
       False  True   False     0.073809
              False  False     0.068610
False  False  True   False     0.651007
       True   False  False     0.001402
True   True   True   True      0.026744
False  True   True   True      0.035033
True   True   False  True      0.009223
       False  True   True      0.004757
              False  True      0.002393
False  False  True   True      0.007338
       True   False  True      0.049924
       False  False  True      0.036432
dtype: float64

In [52]:
func(x0, ms, target)

array([ 0.11397231, -0.08772769, -0.28112769,  0.11917231])

In [50]:
x0 = np.array([1, 1, 1, 1])
ms = market_share.copy()
target = {'Wall': 0.1021, 'Floor': 0.3038, 'Roof': 0.4972, 'Windows':0.0969 }


  improvement from the last ten iterations.


array([  227.738232  , 28199.78788353,    49.34108093,  -670.63097305])