In [26]:
import os 
import pandas as pd
from pathlib import Path
from tqdm.notebook import tqdm

sectors = ['agriculture', 'apparel', 'chemicals', 'machinery', 'materials', 
           'metals', 'miscellaneous', 'transport', 'minerals']

new_sectors = ['Agriculture', 'Apparel', 'Chemicals', 'Machinery', 'Materials', 
           'Metals', 'Miscellaneous', 'Transport', 'Minerals']
rename_columns = {s: ns for s, ns in zip(sectors, new_sectors)}
sectors = new_sectors

region2agg_dict = {
    'east africa': 'Africa', 
    'central africa': 'Africa',
    'north africa': 'Africa',
    'southern africa': 'Africa',
    'west africa': 'Africa',
    'caribbean': 'America',
    'central america': 'America',
    'south america': 'America',
    'north america': 'America',
    'central asia': 'Asia',
    'east asia': 'Asia',
    'south asia': 'Asia',
    'southeast asia': 'Asia',
    'west asia': 'Asia',
    'eastern europe': 'Europe', 
    'nordic': 'Europe',
    'southern europe': 'Europe',
    'western europe': 'Europe',
    'australia and new zealand': 'Oceania',
    'melanesia': 'Oceania',
    'micronesia': 'Oceania',
    'polynesia': 'Oceania',
}


project_dir = Path('C:\shares_TARIFF_project\comtrade_new\DID_country')


volume_df = pd.read_csv(project_dir / '2015_2021.csv', header=0)
volume_df = volume_df.rename(columns=rename_columns)
volume_df.head()

Unnamed: 0,source,year,month,Agriculture,Apparel,Chemicals,Machinery,Materials,Metals,Minerals,Miscellaneous,Transport
0,Afghanistan,2015,1,807693.0,508764.0,,45445.0,,4956.0,,269303.0,20858.0
1,Afghanistan,2015,2,564296.0,170889.0,,47867.0,,4838991.0,,403538.0,
2,Afghanistan,2015,3,153039.0,731135.0,,5582.0,,146989.0,,279085.0,
3,Afghanistan,2015,4,384263.0,281741.0,,26328.0,2856.0,474628.0,,1070675.0,4451.0
4,Afghanistan,2015,5,57945.0,240599.0,,10655.0,24119.0,1664127.0,,676760.0,3607.0


In [27]:
# find the top 20 countries
volume_df['sum'] = volume_df.apply(lambda x: sum([x[i] for i in sectors]), axis=1)
top_countries = volume_df.groupby(by=['source'])[['sum']] \
                .sum().sort_values(by = ['sum'], ascending = False) \
                .head(22).index.tolist()

#print(volume_df)
volume_df = volume_df.query('source in @top_countries')


# 更新下国家或地区的名称，去掉空格（空格在did运算中会出路径问题）
volume_df['source'] = volume_df['source'].apply(lambda x: str(x).replace(' ', '_'))
top_countries = volume_df['source'].unique()
print(top_countries)

['Brazil' 'Canada' 'China' 'France' 'Germany' 'India' 'Indonesia'
 'Ireland' 'Israel' 'Italy' 'Japan' 'Malaysia' 'Mexico' 'Netherlands'
 'Other_Asia,_nes' 'Rep._of_Korea' 'Russian_Federation' 'Singapore'
 'Switzerland' 'Thailand' 'United_Kingdom' 'Viet_Nam']


In [28]:
region_df = pd.read_csv(project_dir / 'countries2regions_new.csv', header=0)
country2region_dict = {c:r for c, r in zip(region_df['source_country'], region_df['source_region'])}

region_df['agg'] = region_df['source_region'].map(region2agg_dict)

region2countries_dict = {}
for region, group_df in region_df.groupby(by=['agg']):
    region2countries_dict[region] = group_df['source_country'].tolist()

# 区分 USA和 Rest of America(ROA)

China_list = ['China']
region2countries_dict['China'] = China_list
roa_list = []
for c in region2countries_dict.get('Asia'):
    if c not in China_list:
        roa_list.append(c)
region2countries_dict['ROA'] = roa_list
del region2countries_dict['Asia']


  for region, group_df in region_df.groupby(by=['agg']):


# get panel data - trade war

In [29]:
country2region_dict = {}
for rg, ct_list in region2countries_dict.items():
    for ct in ct_list:
        country2region_dict[ct] = rg



volume_sum_df = volume_df.groupby(by=['source', 'year', 'month']).sum()
volume_sum_df['region'], volume_sum_df['year'], volume_sum_df['month'] = zip(*volume_sum_df.index)
volume_sum_df.reset_index(drop=True, inplace=True)

# volume_mean_df = volume_sum_df.groupby(by=['year', 'month']).mean()
# volume_mean_df['year'], volume_mean_df['month'] = zip(*volume_mean_df.index)
# volume_mean_df.reset_index(drop=True, inplace=True)



# volume => percentage 转化为占比
years = volume_sum_df['year'].unique()
months = volume_sum_df['month'].unique()
for y in years:
    for m in months:
        query_df = volume_sum_df.query('year == @y and month == @m')[sectors]
        #print(query_df)
        _sum = query_df.sum()
        secter2sum_dict = {s:v for s, v in zip(_sum.index, _sum.tolist())}
        for idx in query_df.index:
            for s, v in secter2sum_dict.items():
                _v = volume_sum_df.loc[idx, s]
                p = _v / v * 100
                volume_sum_df.loc[idx, s] = p

print(volume_sum_df)

      Agriculture    Apparel  Chemicals  Machinery  Materials    Metals  \
0        4.260698   0.449057   0.975222   0.402846   2.959273  4.610218   
1        4.212676   0.396849   1.083890   0.349177   2.800043  3.163597   
2        4.211868   0.353062   0.720292   0.330025   2.819042  2.520451   
3        3.811707   0.542173   0.834325   0.378019   3.039443  2.548202   
4        3.735771   0.434122   0.792456   0.355908   3.262317  2.435849   
...           ...        ...        ...        ...        ...       ...   
1830     4.629573  21.344427   0.388025   5.234711   3.430279  1.663261   
1831     3.857840  16.901892   0.363505   5.145161   3.246773  1.759954   
1832     3.330712  16.311611   0.426333   6.408609   3.102815  1.842076   
1833     2.767803  14.883711   0.381127   6.995031   3.014355  2.217961   
1834     3.957236  17.472758   0.378028   6.319674   3.502277  2.408479   

      Minerals  Miscellaneous  Transport           sum    region  year  month  
0     3.451515     

In [30]:
# empty dir
panel_dir = project_dir / 'panels-trade-war'
for fp in panel_dir.rglob('*.csv'):
    os.remove(fp)

In [31]:
exp_years = [2018,2019]
com_years = [2015,2016,2017]



def get_did_panel(volume_sum_df, exp_years, com_years, sector, region):
    # volume_sum_df 中除去region外的求平均
    volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
    volume_mean_df['year'], volume_mean_df['month'] = zip(*volume_mean_df.index)
    volume_mean_df.reset_index(drop=True, inplace=True)
    com_df = volume_mean_df.query('year < 2020')[['year', 'month', sector]]
    com_df['name'] = 'mean'
    com_df.loc[com_df.query('year in @com_years').index, 'time'] = 0
    com_df.loc[com_df.query('year in @exp_years').index, 'time'] = 1
    com_df['time'] = com_df['time'].astype('int')
    com_df.loc[:, 'group'] = 0
    print(volume_mean_df)


    exp_df = volume_sum_df.query('year < 2020 and region == @region')[['region', 'year', 'month', sector]]
    exp_df = exp_df.rename(columns={'region': 'name'})
    exp_df.loc[exp_df.query('year in @com_years').index, 'time'] = 0
    exp_df.loc[exp_df.query('year in @exp_years').index, 'time'] = 1
    exp_df['time'] = exp_df['time'].astype('int')
    exp_df.loc[:, 'group'] = 1


    concat_df = pd.concat([exp_df, com_df])
    concat_df['did'] = concat_df.apply(lambda x: x['time'] * x['group'], axis=1)
    concat_df['date'] = concat_df.apply(lambda x: '{}-{}'.format(x['year'], x['month']), axis=1)
    del concat_df['year'], concat_df['month']
    concat_df = concat_df.rename(columns={sector: 'value'})
    # concat_df['value'] = np.log(concat_df['value'] + 1)
    return concat_df

for region in top_countries:
    for sector in sectors:
        panel_df = get_did_panel(volume_sum_df, exp_years, com_years, sector, region)
        panel_df.to_csv(panel_dir / f'{region}-{sector}.csv', header=True, index=False)

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()


    Agriculture   Apparel  Chemicals  Machinery  Materials    Metals  \
0      4.559014  4.740521   4.715466   4.742722   4.620987  4.542371   
1      4.561301  4.743007   4.710291   4.745277   4.628569  4.611257   
2      4.561340  4.745092   4.727605   4.746189   4.627665  4.641883   
3      5.062542  5.234622   5.219246   5.243262   5.103187  5.129042   
4      4.584011  4.741232   4.724169   4.744957   4.606556  4.645912   
..          ...       ...        ...        ...        ...       ...   
79     4.600226  4.743070   4.729583   4.743963   4.596065  4.614879   
80     4.581353  4.745898   4.726317   4.740987   4.592440  4.598820   
81     4.557084  4.746480   4.723012   4.740128   4.600889  4.656413   
82     4.557989  4.741563   4.718304   4.744750   4.590495  4.673229   
83     4.549179  4.741892   4.712522   4.741257   4.591383  4.650291   

    Minerals  Miscellaneous  Transport           sum  year  month  
0   4.597547       4.667270   4.721926  7.362274e+09  2015      1  

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()


    Agriculture   Apparel  Chemicals  Machinery  Materials    Metals  \
0      3.660803  4.680379   4.148595   4.546881   3.488935  3.946910   
1      3.585739  4.662675   4.180786   4.517309   3.399899  3.875454   
2      3.636015  4.672263   4.168223   4.543787   3.541681  3.928924   
3      3.936417  5.143235   4.711509   5.023590   3.833489  4.220738   
4      3.663419  4.662248   4.193140   4.556486   3.574887  3.952095   
..          ...       ...        ...        ...        ...       ...   
79     3.509258  4.701255   4.373152   4.577587   3.631457  3.815235   
80     3.527123  4.696685   4.378062   4.580126   3.637227  3.811812   
81     3.573620  4.698622   4.326327   4.581423   3.597362  3.751924   
82     3.640962  4.688724   4.336359   4.584107   3.594375  3.862138   
83     3.770605  4.698981   4.332081   4.586344   3.675591  3.878917   

    Minerals  Miscellaneous  Transport           sum  year  month  
0   1.716544       4.203283   3.696468  6.248113e+09  2015      1  

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()


    Agriculture   Apparel  Chemicals  Machinery  Materials    Metals  \
0      4.333527  2.021010   4.317343   2.978264   3.390206  3.869607   
1      4.368900  2.096792   4.358952   3.179664   3.493363  3.830288   
2      4.396994  2.176607   4.264151   3.073718   3.432199  3.755720   
3      4.916801  2.484570   4.806518   3.431453   3.911826  4.167547   
4      4.417250  2.150993   4.293114   3.042922   3.382297  3.782357   
..          ...       ...        ...        ...        ...       ...   
79     4.550544  2.521320   4.401956   3.444174   3.577201  3.958920   
80     4.539316  2.339812   4.380910   3.274305   3.537336  3.939036   
81     4.548757  2.458319   4.325878   3.307069   3.529942  3.959266   
82     4.555550  2.500488   4.317039   3.256844   3.605926  4.005111   
83     4.498311  2.602813   4.121825   3.225187   3.541225  3.909769   

    Minerals  Miscellaneous  Transport           sum  year  month  
0   4.705296       3.023109   4.491390  5.588358e+09  2015      1  

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()


    Agriculture   Apparel  Chemicals  Machinery  Materials    Metals  \
0      4.569237  4.720823   4.565732   4.656738   4.678309  4.685879   
1      4.550680  4.715951   4.529303   4.653045   4.680855  4.673205   
2      4.519308  4.720724   4.524832   4.666742   4.683885  4.676788   
3      4.989685  5.207710   5.036676   5.155682   5.174927  5.168732   
4      4.512099  4.723988   4.564067   4.668565   4.682313  4.678055   
..          ...       ...        ...        ...        ...       ...   
79     4.492398  4.710962   4.615527   4.698874   4.694085  4.707583   
80     4.485617  4.706061   4.587252   4.703003   4.692749  4.665842   
81     4.470452  4.697228   4.587561   4.703809   4.708758  4.698790   
82     4.494717  4.688101   4.583533   4.702501   4.690671  4.694823   
83     4.527962  4.684043   4.589567   4.693723   4.696140  4.702466   

    Minerals  Miscellaneous  Transport           sum  year  month  
0   4.730320       4.571063   4.667598  7.313686e+09  2015      1  

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()


    Agriculture   Apparel  Chemicals  Machinery  Materials    Metals  \
0      4.569237  4.720823   4.565732   4.656738   4.678309  4.685879   
1      4.550680  4.715951   4.529303   4.653045   4.680855  4.673205   
2      4.519308  4.720724   4.524832   4.666742   4.683885  4.676788   
3      4.989685  5.207710   5.036676   5.155682   5.174927  5.168732   
4      4.512099  4.723988   4.564067   4.668565   4.682313  4.678055   
..          ...       ...        ...        ...        ...       ...   
79     4.492398  4.710962   4.615527   4.698874   4.694085  4.707583   
80     4.485617  4.706061   4.587252   4.703003   4.692749  4.665842   
81     4.470452  4.697228   4.587561   4.703809   4.708758  4.698790   
82     4.494717  4.688101   4.583533   4.702501   4.690671  4.694823   
83     4.527962  4.684043   4.589567   4.693723   4.696140  4.702466   

    Minerals  Miscellaneous  Transport           sum  year  month  
0   4.730320       4.571063   4.667598  7.313686e+09  2015      1  

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()


    Agriculture   Apparel  Chemicals  Machinery  Materials    Metals  \
0      4.665557  4.728385   4.094454   4.471800   4.554706  4.545566   
1      4.677846  4.728407   4.098578   4.441039   4.511194  4.516265   
2      4.673153  4.728352   4.168351   4.472059   4.529909  4.531047   
3      5.170996  5.217105   4.538981   4.951637   5.006857  4.999539   
4      4.674163  4.727702   4.189853   4.479113   4.529985  4.543670   
..          ...       ...        ...        ...        ...       ...   
79     4.664392  4.719793   4.062792   4.487052   4.547186  4.499940   
80     4.669545  4.720582   4.158031   4.504024   4.553552  4.519877   
81     4.667566  4.717378   4.189009   4.517262   4.561553  4.538968   
82     4.666869  4.702290   4.218192   4.517367   4.544323  4.555267   
83     4.673718  4.711301   4.249044   4.516461   4.537807  4.552970   

    Minerals  Miscellaneous  Transport           sum  year  month  
0   4.748567       4.567519   4.279806  7.044802e+09  2015      1  

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()


    Agriculture   Apparel  Chemicals  Machinery  Materials    Metals  \
0      4.517327  4.437484   4.511186   4.729092   4.697035  4.403252   
1      4.568363  4.405489   4.519356   4.729544   4.702790  4.369331   
2      4.559114  4.388262   4.513396   4.731844   4.700249  4.354397   
3      5.032614  4.743244   4.961915   5.225099   5.182986  4.796963   
4      4.574879  4.420380   4.472169   4.732585   4.694372  4.303453   
..          ...       ...        ...        ...        ...       ...   
79     4.508913  4.372537   4.531583   4.709977   4.650350  4.230161   
80     4.533193  4.354326   4.518858   4.712400   4.644610  4.280185   
81     4.518842  4.282779   4.449014   4.716688   4.638917  4.189252   
82     4.558361  4.307242   4.415740   4.715016   4.636505  4.369452   
83     4.539836  4.283147   4.347215   4.712840   4.627162  4.341305   

    Minerals  Miscellaneous  Transport           sum  year  month  
0   4.684063       4.720614   4.736550  7.303561e+09  2015      1  

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()


    Agriculture   Apparel  Chemicals  Machinery  Materials    Metals  \
0      4.601187  4.483523   4.741316   4.743869   4.615498  4.741829   
1      4.617709  4.493136   4.744651   4.743656   4.651706  4.744105   
2      4.614893  4.468451   4.738156   4.741560   4.630825  4.740152   
3      5.000000  5.000000   5.000000   5.000000   5.000000  5.000000   
4      4.621584  4.490407   4.747832   4.742043   4.619810  4.743108   
..          ...       ...        ...        ...        ...       ...   
79     4.635722  4.520811   4.731115   4.739500   4.626352  4.725793   
80     4.642986  4.485074   4.714984   4.744010   4.592576  4.718988   
81     4.612769  4.447022   4.746580   4.741729   4.582478  4.729940   
82     4.627490  4.454656   4.732475   4.741220   4.592005  4.732038   
83     4.587738  4.467590   4.736867   4.738279   4.567397  4.715508   

    Minerals  Miscellaneous  Transport           sum  year  month  
0   4.718265       4.727876   4.759877  7.400864e+09  2015      1  

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()


    Agriculture   Apparel  Chemicals  Machinery  Materials    Metals  \
0      4.705682  4.760694   4.021274   4.722099   4.753872  4.758151   
1      4.699228  4.759923   4.036351   4.705923   4.746892  4.757613   
2      4.714339  4.760429   4.011210   4.714829   4.753254  4.758608   
3      5.209636  5.260674   4.358426   5.210411   5.254989  5.257831   
4      4.711295  4.759502   3.973638   4.715243   4.754430  4.758563   
..          ...       ...        ...        ...        ...       ...   
79     4.712239  4.759807   3.784287   4.707390   4.755596  4.757899   
80     4.705873  4.760248   3.898721   4.708980   4.754212  4.757937   
81     4.702441  4.760584   3.755892   4.710212   4.753580  4.759131   
82     4.705693  4.760511   3.875861   4.709885   4.753895  4.759085   
83     4.715576  4.760705   4.088895   4.705262   4.753042  4.759009   

    Minerals  Miscellaneous  Transport           sum  year  month  
0   4.754496       4.626496   4.761644  7.341490e+09  2015      1  

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()


    Agriculture   Apparel  Chemicals  Machinery  Materials    Metals  \
0      4.735141  4.743870   4.557219   4.727672   4.729197  4.451770   
1      4.740271  4.746300   4.499949   4.722847   4.730917  4.456125   
2      4.743061  4.746060   4.483814   4.729167   4.730452  4.441925   
3      5.000000  5.000000   5.000000   5.000000   5.000000  5.000000   
4      4.747557  4.747485   4.660533   4.728759   4.729426  4.311393   
..          ...       ...        ...        ...        ...       ...   
79     4.746225  4.752137   4.709323   4.728512   4.729611  4.617285   
80     4.749560  4.753233   4.720050   4.732741   4.734024  4.672165   
81     4.753020  4.752079   4.729174   4.732838   4.739928  4.588983   
82     4.747658  4.745416   4.704422   4.731190   4.730126  4.579956   
83     4.743727  4.749380   4.723191   4.727125   4.731381  4.622802   

    Minerals  Miscellaneous  Transport           sum  year  month  
0   4.760445       4.725722   4.751492  7.381733e+09  2015      1  

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()


    Agriculture   Apparel  Chemicals  Machinery  Materials    Metals  \
0      4.592184  4.585268   4.614745   4.672956   4.640042  4.631326   
1      4.573160  4.561126   4.599947   4.672348   4.647780  4.637130   
2      4.547020  4.598441   4.603836   4.672497   4.643112  4.637885   
3      5.043830  5.060171   5.121513   5.164985   5.118929  5.107995   
4      4.549155  4.562419   4.628323   4.669624   4.630370  4.611426   
..          ...       ...        ...        ...        ...       ...   
79     4.521356  4.523872   4.607950   4.655214   4.647232  4.644734   
80     4.544935  4.556732   4.596426   4.677857   4.658307  4.649957   
81     4.541626  4.529819   4.572699   4.675555   4.670961  4.642151   
82     4.544374  4.464803   4.596991   4.668710   4.653921  4.651786   
83     4.561468  4.453807   4.627592   4.679373   4.652291  4.637968   

    Minerals  Miscellaneous  Transport           sum  year  month  
0   4.754128       4.653356   4.697393  7.325017e+09  2015      1  

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()


    Agriculture   Apparel  Chemicals  Machinery  Materials    Metals  \
0      4.726227  4.734004   4.488667   4.375119   4.546008  4.552315   
1      4.727834  4.738088   4.563043   4.388405   4.551567  4.568589   
2      4.716086  4.730065   4.553562   4.301400   4.497345  4.548575   
3      5.207395  5.212850   5.017439   4.761632   4.925165  4.969883   
4      4.714632  4.730165   4.543370   4.379774   4.541188  4.562601   
..          ...       ...        ...        ...        ...       ...   
79     4.708535  4.735070   4.428293   4.423839   4.605106  4.649114   
80     4.712557  4.731569   4.540882   4.451245   4.610234  4.636005   
81     4.716836  4.739511   4.548369   4.437630   4.620791  4.644886   
82     4.709992  4.729415   4.546604   4.466462   4.612759  4.627050   
83     4.715080  4.732127   4.520935   4.466913   4.608338  4.647269   

    Minerals  Miscellaneous  Transport           sum  year  month  
0   4.753804       4.591411   3.820369  6.951534e+09  2015      1  

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_

    Agriculture   Apparel  Chemicals  Machinery  Materials    Metals  \
0      4.739082  4.740453   4.735929   4.580655   4.653419  4.739892   
1      4.743092  4.740915   4.740985   4.580804   4.663869  4.747233   
2      4.738882  4.737212   4.735638   4.585565   4.650228  4.745692   
3      5.232501  5.234439   5.237193   5.044002   5.117388  5.238906   
4      4.740159  4.738461   4.738319   4.564617   4.657477  4.747796   
..          ...       ...        ...        ...        ...       ...   
79     4.743703  4.754174   4.699307   4.524860   4.527488  4.741022   
80     4.749676  4.752636   4.688516   4.551966   4.624360  4.742188   
81     4.745104  4.750750   4.729726   4.566669   4.601923  4.744794   
82     4.746536  4.749422   4.747251   4.560495   4.605128  4.728443   
83     4.743680  4.750282   4.745937   4.525471   4.624565  4.731113   

    Minerals  Miscellaneous  Transport           sum  year  month  
0   4.761186       4.715440   4.759125  7.357887e+09  2015      1  

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()


    Agriculture   Apparel  Chemicals  Machinery  Materials    Metals  \
0      3.482199  4.552327   4.643274   3.980889   4.346737  4.278311   
1      3.350172  4.516379   4.632953   3.869052   4.306225  4.208311   
2      3.423117  4.523291   4.647252   3.980916   4.367440  4.320278   
3      3.737308  4.939687   5.147568   4.345629   4.793959  4.701427   
4      3.435159  4.521068   4.645170   3.940694   4.365763  4.303329   
..          ...       ...        ...        ...        ...       ...   
79     3.593321  4.537221   4.659081   3.886742   4.341185  4.204797   
80     3.541501  4.539167   4.656629   3.929115   4.324177  4.197609   
81     3.497375  4.536312   4.643546   3.911487   4.323396  4.178298   
82     3.413327  4.520990   4.655569   3.951481   4.325233  4.207088   
83     3.358019  4.538278   4.642983   4.016410   4.344590  4.137925   

    Minerals  Miscellaneous  Transport           sum  year  month  
0   4.163849       4.192635   3.653592  6.422156e+09  2015      1  

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()


    Agriculture   Apparel  Chemicals  Machinery  Materials    Metals  \
0      4.641364  4.750479   4.623956   4.722458   4.731892  4.737201   
1      4.647687  4.749457   4.635049   4.726871   4.728064  4.731456   
2      4.651583  4.751475   4.649880   4.721203   4.731142  4.738586   
3      5.127784  5.251922   5.135719   5.231559   5.227980  5.233257   
4      4.631360  4.750360   4.640524   4.730125   4.728814  4.729223   
..          ...       ...        ...        ...        ...       ...   
79     4.648209  4.751971   4.656661   4.719098   4.735982  4.736968   
80     4.641260  4.752170   4.598265   4.730354   4.734498  4.726846   
81     4.644930  4.751720   4.617786   4.733657   4.730734  4.730718   
82     4.643730  4.748638   4.587704   4.729404   4.728031  4.718839   
83     4.669190  4.751734   4.560152   4.732775   4.733568  4.731395   

    Minerals  Miscellaneous  Transport           sum  year  month  
0   4.669971       4.702794   4.750698  7.405605e+09  2015      1  

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()


    Agriculture   Apparel  Chemicals  Machinery  Materials    Metals  \
0      4.738426  4.717083   4.730592   4.611853   4.652912  4.589734   
1      4.738560  4.717032   4.726126   4.605931   4.648585  4.592112   
2      4.732844  4.709577   4.729737   4.584207   4.622424  4.523512   
3      5.228393  5.200517   5.225934   5.064547   5.099926  5.008236   
4      4.731598  4.713886   4.728864   4.610614   4.631525  4.583233   
..          ...       ...        ...        ...        ...       ...   
79     4.727587  4.721505   4.722303   4.476152   4.645594  4.565542   
80     4.726745  4.713558   4.728636   4.481319   4.632442  4.547486   
81     4.718698  4.712841   4.719675   4.468494   4.608644  4.536997   
82     4.732531  4.719962   4.722121   4.488463   4.641414  4.582939   
83     4.729035  4.719999   4.720679   4.479723   4.633761  4.539144   

    Minerals  Miscellaneous  Transport           sum  year  month  
0   4.737662       4.659522   4.711972  7.330168e+09  2015      1  

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()


    Agriculture   Apparel  Chemicals  Machinery  Materials    Metals  \
0      4.724254  4.705030   4.643175   4.561183   4.584868  4.391254   
1      4.735426  4.717919   4.694612   4.573863   4.608397  4.518373   
2      4.724326  4.699148   4.696718   4.583051   4.564122  4.500350   
3      5.208809  5.176323   5.176728   5.046270   5.011536  4.937386   
4      4.721991  4.700871   4.674943   4.551813   4.572004  4.573359   
..          ...       ...        ...        ...        ...       ...   
79     4.711722  4.694201   4.659901   4.513855   4.566546  4.585563   
80     4.705046  4.700955   4.632338   4.512571   4.567721  4.558233   
81     4.726684  4.704524   4.628680   4.534818   4.596950  4.592886   
82     4.713309  4.698816   4.654082   4.539969   4.581373  4.598106   
83     4.710817  4.698883   4.587579   4.528783   4.572932  4.568231   

    Minerals  Miscellaneous  Transport           sum  year  month  
0   4.675080       4.697262   4.338856  7.168928e+09  2015      1  

    Agriculture   Apparel  Chemicals  Machinery  Materials    Metals  \
0      4.749688  4.761398   4.712979   4.760381   4.746637  4.618929   
1      4.751363  4.761598   4.704031   4.760072   4.744901  4.621293   
2      4.748688  4.761343   4.702690   4.759936   4.745579  4.642150   
3      5.237934  5.262758   5.201794   5.261637   5.242826  5.105255   
4      4.737828  4.761305   4.709117   4.760323   4.742997  4.664647   
..          ...       ...        ...        ...        ...       ...   
79     4.701668  4.761528   4.719454   4.760228   4.743876  4.589679   
80     4.712225  4.761422   4.729706   4.760307   4.736351  4.590368   
81     4.703062  4.761328   4.718989   4.760302   4.735000  4.633093   
82     4.696629  4.761581   4.701628   4.760421   4.731185  4.592093   
83     4.700038  4.761356   4.737649   4.760415   4.738264  4.608479   

    Minerals  Miscellaneous  Transport           sum  year  month  
0   4.407983       4.743406   4.757820  7.405686e+09  2015      1  

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_

    Agriculture   Apparel  Chemicals  Machinery  Materials    Metals  \
0      4.756117  4.761237   4.620061   4.707435   4.749432  4.743217   
1      4.755868  4.761240   4.621097   4.711555   4.745082  4.744924   
2      4.755997  4.761000   4.643221   4.707541   4.746450  4.748230   
3      5.256722  5.262498   4.978117   5.187058   5.242210  5.252543   
4      4.756620  4.760186   4.644642   4.699726   4.747392  4.737248   
..          ...       ...        ...        ...        ...       ...   
79     4.714991  4.761686   4.646247   4.704476   4.749037  4.743841   
80     4.680783  4.761461   4.475169   4.703816   4.747105  4.743690   
81     4.712484  4.761046   4.551769   4.708408   4.749507  4.743234   
82     4.655429  4.761302   4.597630   4.708765   4.746947  4.746381   
83     4.669848  4.761274   4.661991   4.708843   4.745582  4.745316   

    Minerals  Miscellaneous  Transport           sum  year  month  
0   4.761784       4.696486   4.759580  7.422696e+09  2015      1  

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()


    Agriculture   Apparel  Chemicals  Machinery  Materials    Metals  \
0      4.688654  4.754769   4.455265   4.695889   4.744907  4.713861   
1      4.698663  4.755831   4.472882   4.685248   4.739166  4.705889   
2      4.696492  4.756935   4.434575   4.691639   4.741678  4.678062   
3      5.179443  5.256728   4.958042   5.190012   5.239246  5.162139   
4      4.695023  4.753586   4.473138   4.693234   4.741885  4.682307   
..          ...       ...        ...        ...        ...       ...   
79     4.690495  4.758618   4.204097   4.693576   4.746664  4.597097   
80     4.701801  4.758510   4.219610   4.690410   4.747642  4.641572   
81     4.702162  4.756340   4.334281   4.688853   4.748623  4.646132   
82     4.679939  4.756982   4.248699   4.686807   4.744960  4.350593   
83     4.688125  4.757304   4.255277   4.688984   4.745815  4.538493   

    Minerals  Miscellaneous  Transport           sum  year  month  
0   4.761847       4.666796   4.751599  7.379023e+09  2015      1  

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()


    Agriculture   Apparel  Chemicals  Machinery  Materials    Metals  \
0      4.581122  4.700719   4.753109   4.655098   4.654578  4.678904   
1      4.617207  4.709814   4.751937   4.648370   4.670942  4.691201   
2      4.580032  4.695716   4.752651   4.647270   4.658520  4.673370   
3      5.025127  5.173857   5.249956   5.134133   5.124239  5.157432   
4      4.585863  4.705976   4.752936   4.652146   4.659593  4.668939   
..          ...       ...        ...        ...        ...       ...   
79     4.594535  4.705467   4.747829   4.609773   4.581439  4.658835   
80     4.608569  4.709146   4.747847   4.611124   4.570722  4.652531   
81     4.611241  4.704907   4.745391   4.624228   4.580028  4.656523   
82     4.612443  4.705601   4.744772   4.621634   4.575368  4.656181   
83     4.589788  4.703506   4.744065   4.615069   4.584482  4.658531   

    Minerals  Miscellaneous  Transport           sum  year  month  
0   4.761867       4.730842   4.746729  7.374752e+09  2015      1  

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()


    Agriculture   Apparel  Chemicals  Machinery  Materials    Metals  \
0      4.630619  4.745769   4.549285   4.658780   4.689287  4.643656   
1      4.651440  4.740380   4.421769   4.641129   4.669188  4.663122   
2      4.647843  4.743687   4.495871   4.657744   4.678365  4.674573   
3      5.116284  5.239183   4.659666   5.145449   5.170355  5.159653   
4      4.622951  4.741728   4.246823   4.655718   4.684121  4.681029   
..          ...       ...        ...        ...        ...       ...   
79     4.682513  4.747492   4.566131   4.682524   4.703390  4.672411   
80     4.662137  4.743701   4.558198   4.682443   4.700419  4.670755   
81     4.671946  4.745250   4.615040   4.682007   4.705886  4.703434   
82     4.666368  4.740340   4.551268   4.685702   4.696996  4.653113   
83     4.684797  4.741729   4.550050   4.689797   4.699553  4.675884   

    Minerals  Miscellaneous  Transport           sum  year  month  
0   4.613825       4.489088   4.628736  7.281284e+09  2015      1  

    Agriculture   Apparel  Chemicals  Machinery  Materials    Metals  \
0      4.602587  4.174772   4.756380   4.698168   4.730539  4.736067   
1      4.639490  4.178541   4.757352   4.698049   4.730047  4.736725   
2      4.618873  4.125869   4.754631   4.687136   4.723476  4.739293   
3      5.079778  4.437907   5.257060   5.181953   5.217979  5.225495   
4      4.571403  4.136053   4.755316   4.680918   4.722794  4.736255   
..          ...       ...        ...        ...        ...       ...   
79     4.541449  3.745503   4.743427   4.512633   4.598558  4.682702   
80     4.578198  3.957053   4.744595   4.516897   4.607297  4.678097   
81     4.603299  3.985161   4.741603   4.456733   4.614152  4.674187   
82     4.630105  4.053157   4.743756   4.428808   4.618364  4.656288   
83     4.573465  3.929869   4.743903   4.460968   4.595130  4.647215   

    Minerals  Miscellaneous  Transport           sum  year  month  
0   4.761472       4.628010   4.756778  7.349464e+09  2015      1  

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()


# DID-trade-war

In [32]:
import re
import ipystata

region_list = list(top_countries)
sector_list = sectors

# empty dir
panel_dir = project_dir / 'temp'
for fp in panel_dir.rglob('*.txt'):
    os.remove(fp)

In [33]:
%%stata -i sector_list -i region_list
foreach region in `region_list' {
    display "------------------------`region'---------------------------------"
    clear
    cd "C:\shares_TARIFF_project\comtrade_new\DID_country"
    foreach sector in `sector_list' {
        import delimited "./panels-trade-war/`region'-`sector'.csv"
        egen time1 = group(date)
        xtset time1 group
        reghdfe value did, absorb(time1 group)
        outreg2 using "./temp/tradewar-`region'-`sector'.txt"
        clear
    }
}


------------------------Brazil---------------------------------
C:\shares_TARIFF_project\comtrade_new\DID_country
(6 vars, 120 obs)
       panel variable:  time1 (strongly balanced)
        time variable:  group, 0 to 1
                delta:  1 unit
(MWFE estimator converged in 2 iterations)

HDFE Linear regression                            Number of obs   =        120
Absorbing 2 HDFE groups                           F(   1,     58) =       5.06
                                                  Prob > F        =     0.0283
                                                  R-squared       =     0.7174
                                                  Adj R-squared   =     0.4203
                                                  Within R-sq.    =     0.0802
                                                  Root MSE        =     0.4603

------------------------------------------------------------------------------
       value |      Coef.   Std. Err.      t    P>|t|     [95% Conf. In

In [34]:
def get_result(result_fp):
    with open(result_fp, 'r') as fp:
        result_str = fp.read()
        match_str = re.findall(r'did\t(.*?)\n', result_str)[0]
        star_count = match_str.count('*')
        match_str = match_str.replace(',', '')
        if match_str == '-':# 若匹配到空，判定不显著
            significant = -1
            coff = -999
        else:
            if star_count == 0:
                significant = -1
                coff = float(match_str)
            else:
                coff = float(match_str.split('*')[0])
                if star_count == 1:
                    significant = 0.1
                elif star_count == 2:
                    significant = 0.05
                else:
                    significant = 0.01
    return coff, significant

result_list = []
for region in region_list:
    for sector in sector_list:
        result_fp = project_dir / 'temp' / f'tradewar-{region}-{sector}.txt'
        coff, significant = get_result(result_fp)
        result_list.append((region, sector, coff, significant))
result_df = pd.DataFrame(result_list, columns=['region', 'sector', 'coff', 'significance'])



# # 将coff的值区间缩放至 -1,1
# data = result_df['coff']
# data_t = (data-data.mean())/(data.max()-data.min())
# result_df['coff'] = data_t
result_df.to_csv('C:\shares_TARIFF_project\comtrade_new\DID_country\did-result-trade-war-meso.csv', header=True, index=False)

# get panel data - pandemic

In [35]:
country2region_dict = {}
for rg, ct_list in region2countries_dict.items():
    for ct in ct_list:
        country2region_dict[ct] = rg



volume_sum_df = volume_df.groupby(by=['source', 'year', 'month']).sum()
volume_sum_df['region'], volume_sum_df['year'], volume_sum_df['month'] = zip(*volume_sum_df.index)
volume_sum_df.reset_index(drop=True, inplace=True)

# volume_mean_df = volume_sum_df.groupby(by=['year', 'month']).mean()
# volume_mean_df['year'], volume_mean_df['month'] = zip(*volume_mean_df.index)
# volume_mean_df.reset_index(drop=True, inplace=True)



# volume => percentage 转化为占比
years = volume_sum_df['year'].unique()
months = volume_sum_df['month'].unique()
for y in years:
    for m in months:
        query_df = volume_sum_df.query('year == @y and month == @m')[sectors]
        _sum = query_df.sum()
        secter2sum_dict = {s:v for s, v in zip(_sum.index, _sum.tolist())}
        for idx in query_df.index:
            for s, v in secter2sum_dict.items():
                _v = volume_sum_df.loc[idx, s]
                p = _v / v * 100
                volume_sum_df.loc[idx, s] = p


In [36]:
# empty dir
panel_dir = project_dir / 'panels-pandemic'
for fp in panel_dir.rglob('*.csv'):
    os.remove(fp)

In [37]:
exp_years = [2020,2021]
com_years = [2018,2019]


def get_did_panel(volume_sum_df, exp_years, com_years, sector, region):
    
    volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
    volume_mean_df['year'], volume_mean_df['month'] = zip(*volume_mean_df.index)
    volume_mean_df.reset_index(drop=True, inplace=True)
    
    com_df = volume_mean_df[['year', 'month', sector]]
    com_df['name'] = 'mean'
    com_df.loc[com_df.query('year in @com_years').index, 'time'] = 0
    com_df.loc[com_df.query('year in @exp_years').index, 'time'] = 1
    com_df = com_df.dropna()
    com_df['time'] = com_df['time'].astype('int')
    com_df.loc[:, 'group'] = 0


    exp_df = volume_sum_df.query('region == @region')[['region', 'year', 'month', sector]]
    exp_df = exp_df.rename(columns={'region': 'name'})
    exp_df.loc[exp_df.query('year in @com_years').index, 'time'] = 0
    exp_df.loc[exp_df.query('year in @exp_years').index, 'time'] = 1
    exp_df = exp_df.dropna()
    exp_df['time'] = exp_df['time'].astype('int')
    exp_df.loc[:, 'group'] = 1


    concat_df = pd.concat([exp_df, com_df])
    concat_df['did'] = concat_df.apply(lambda x: x['time'] * x['group'], axis=1)
    concat_df['date'] = concat_df.apply(lambda x: '{}-{}'.format(x['year'], x['month']), axis=1)
    del concat_df['year'], concat_df['month']
    concat_df = concat_df.rename(columns={sector: 'value'})
    # concat_df['value'] = np.log(concat_df['value'] + 1)
    return concat_df

for region in top_countries:
    for sector in sectors:
        panel_df = get_did_panel(volume_sum_df, exp_years, com_years, sector, region)
        panel_df.to_csv(panel_dir / f'{region}-{sector}.csv', header=True, index=False)

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[com_df.query('year in @com_years').index, 'time'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['time'] = co

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[com_df.query('year in @com_years').index, 'time'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['time'] = co

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[com_df.query('year in @com_years').index, 'time'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['time'] = co

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[com_df.query('year in @com_years').index, 'time'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['time'] = co

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[com_df.query('year in @com_years').index, 'time'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['time'] = co

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['time'] = com_df['time'].astype('int')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[:, 'group'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exp_df['time'] = exp_df['time'].astype('int')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[com_df.query('year in @com_years').index, 'time'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['time'] = co

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[com_df.query('year in @com_years').index, 'time'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['time'] = com_df['time'].astype('int')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[:, 'group'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.


  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[com_df.query('year in @com_years').index, 'time'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['time'] = co

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[com_df.query('year in @com_years').index, 'time'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['time'] = co

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[com_df.query('year in @com_years').index, 'time'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['time'] = co

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[com_df.query('year in @com_years').index, 'time'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['time'] = co

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[com_df.query('year in @com_years').index, 'time'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['time'] = co

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['time'] = com_df['time'].astype('int')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[:, 'group'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exp_df['time'] = exp_df['time'].astype('int')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[com_df.query('year in @com_years').index, 'time'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['time'] = com_df['time'].astype('int')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[:, 'group'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exp_df['time'] = exp_df['time'].astype('int')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exp_df.loc[:, 'group'] = 1
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is 

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[com_df.query('year in @com_years').index, 'time'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['time'] = co

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exp_df['time'] = exp_df['time'].astype('int')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exp_df.loc[:, 'group'] = 1
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is 

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[com_df.query('year in @com_years').index, 'time'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['time'] = co

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exp_df['time'] = exp_df['time'].astype('int')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exp_df.loc[:, 'group'] = 1
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is 

# did-pandemic

In [38]:
import re
import ipystata

region_list = list(top_countries)
sector_list = sectors


# empty dir
panel_dir = project_dir / 'temp-1'
for fp in panel_dir.rglob('*.txt'):
    os.remove(fp)

In [39]:
%%stata -i sector_list -i region_list
foreach region in `region_list' {
    display "------------------------`region'---------------------------------"
    clear
    cd "C:\shares_TARIFF_project\comtrade_new\DID_country"
    foreach sector in `sector_list' {
        import delimited "./panels-pandemic/`region'-`sector'.csv"
        egen time1 = group(date)
        xtset time1 group
        reghdfe value did, absorb(time1 group)
        outreg2 using "./temp-1/pandemic-`region'-`sector'.txt"
        clear
    }
}


------------------------Brazil---------------------------------
C:\shares_TARIFF_project\comtrade_new\DID_country
(6 vars, 96 obs)
       panel variable:  time1 (strongly balanced)
        time variable:  group, 0 to 1
                delta:  1 unit
(MWFE estimator converged in 2 iterations)

HDFE Linear regression                            Number of obs   =         96
Absorbing 2 HDFE groups                           F(   1,     46) =       0.30
                                                  Prob > F        =     0.5871
                                                  R-squared       =     0.7722
                                                  Adj R-squared   =     0.5296
                                                  Within R-sq.    =     0.0065
                                                  Root MSE        =     0.4831

------------------------------------------------------------------------------
       value |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Int

In [40]:
def get_result(result_fp):
    with open(result_fp, 'r') as fp:
        result_str = fp.read()
        match_str = re.findall(r'did\t(.*?)\n', result_str)[0]
        star_count = match_str.count('*')
        match_str = match_str.replace(',', '')
        if match_str == '-':# 若匹配到空，判定不显著
            significant = -1
            coff = -999
        else:
            if star_count == 0:
                significant = -1
                coff = float(match_str)
            else:
                coff = float(match_str.split('*')[0])
                if star_count == 1:
                    significant = 0.1
                elif star_count == 2:
                    significant = 0.05
                else:
                    significant = 0.01
    return coff, significant

result_list = []
for region in region_list:
    for sector in sector_list:
        result_fp = project_dir / 'temp-1' / f'pandemic-{region}-{sector}.txt'
        coff, significant = get_result(result_fp)
        result_list.append((region, sector, coff, significant))
result_df = pd.DataFrame(result_list, columns=['region', 'sector', 'coff', 'significance'])


# # 将coff的值区间缩放至 -1,1
# data = result_df['coff']
# data_t = (data-data.mean())/(data.max()-data.min())
# result_df['coff'] = data_t
result_df.to_csv('C:\shares_TARIFF_project\comtrade_new\DID_country\did-result-pandemic-meso-true.csv', header=True, index=False)
#result_df.to_csv('C:\\shares_TARIFF_project\\comtrade_new\\DID_country_cancel2018\\did-summary\\did-result-pandemic-meso-true.csv', header=True, index=False)
# result_df

# get panel data -two events

In [47]:
country2region_dict = {}
for rg, ct_list in region2countries_dict.items():
    for ct in ct_list:
        country2region_dict[ct] = rg



volume_sum_df = volume_df.groupby(by=['source', 'year', 'month']).sum()
volume_sum_df['region'], volume_sum_df['year'], volume_sum_df['month'] = zip(*volume_sum_df.index)
volume_sum_df.reset_index(drop=True, inplace=True)

# volume_mean_df = volume_sum_df.groupby(by=['year', 'month']).mean()
# volume_mean_df['year'], volume_mean_df['month'] = zip(*volume_mean_df.index)
# volume_mean_df.reset_index(drop=True, inplace=True)



# volume => percentage 转化为占比
years = volume_sum_df['year'].unique()
months = volume_sum_df['month'].unique()
for y in years:
    for m in months:
        query_df = volume_sum_df.query('year == @y and month == @m')[sectors]
        _sum = query_df.sum()
        secter2sum_dict = {s:v for s, v in zip(_sum.index, _sum.tolist())}
        for idx in query_df.index:
            for s, v in secter2sum_dict.items():
                _v = volume_sum_df.loc[idx, s]
                p = _v / v * 100
                volume_sum_df.loc[idx, s] = p

  volume_sum_df = volume_df.groupby(by=['source', 'year', 'month']).sum()


In [48]:
# empty dir
panel_dir = project_dir / 'panels-two-events'
for fp in panel_dir.rglob('*.csv'):
    os.remove(fp)

In [49]:
exp_years = [2020,2021]
com_years = [2015,2016,2017]


def get_did_panel(volume_sum_df, exp_years, com_years, sector, region):
    
    volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
    volume_mean_df['year'], volume_mean_df['month'] = zip(*volume_mean_df.index)
    volume_mean_df.reset_index(drop=True, inplace=True)
    
    com_df = volume_mean_df[['year', 'month', sector]]
    com_df['name'] = 'mean'
    com_df.loc[com_df.query('year in @com_years').index, 'time'] = 0
    com_df.loc[com_df.query('year in @exp_years').index, 'time'] = 1
    com_df = com_df.dropna()
    com_df['time'] = com_df['time'].astype('int')
    com_df.loc[:, 'group'] = 0


    exp_df = volume_sum_df.query('region == @region')[['region', 'year', 'month', sector]]
    exp_df = exp_df.rename(columns={'region': 'name'})
    exp_df.loc[exp_df.query('year in @com_years').index, 'time'] = 0
    exp_df.loc[exp_df.query('year in @exp_years').index, 'time'] = 1
    exp_df = exp_df.dropna()
    exp_df['time'] = exp_df['time'].astype('int')
    exp_df.loc[:, 'group'] = 1


    concat_df = pd.concat([exp_df, com_df])
    concat_df['did'] = concat_df.apply(lambda x: x['time'] * x['group'], axis=1)
    concat_df['date'] = concat_df.apply(lambda x: '{}-{}'.format(x['year'], x['month']), axis=1)
    del concat_df['year'], concat_df['month']
    concat_df = concat_df.rename(columns={sector: 'value'})
    # concat_df['value'] = np.log(concat_df['value'] + 1)
    return concat_df

for region in top_countries:
    for sector in sectors:
        panel_df = get_did_panel(volume_sum_df, exp_years, com_years, sector, region)
        panel_df.to_csv(panel_dir / f'{region}-{sector}.csv', header=True, index=False)

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[com_df.query('year in @com_years').index, 'time'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['time'] = co

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[com_df.query('year in @com_years').index, 'time'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['time'] = co

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exp_df['time'] = exp_df['time'].astype('int')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exp_df.loc[:, 'group'] = 1
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exp_df['time'] = exp_df['time'].astype('int')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exp_df.loc[:, 'group'] = 1
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is 

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[com_df.query('year in @com_years').index, 'time'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['time'] = co

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[com_df.query('year in @com_years').index, 'time'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['time'] = co

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[com_df.query('year in @com_years').index, 'time'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['time'] = co

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[com_df.query('year in @com_years').index, 'time'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['time'] = co

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exp_df['time'] = exp_df['time'].astype('int')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exp_df.loc[:, 'group'] = 1
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is 

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[com_df.query('year in @com_years').index, 'time'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['time'] = co

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[com_df.query('year in @com_years').index, 'time'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['time'] = co

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[com_df.query('year in @com_years').index, 'time'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['time'] = co

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exp_df['time'] = exp_df['time'].astype('int')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exp_df.loc[:, 'group'] = 1
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['time'] = com_df['time'].astype('int')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[:, 'group'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exp_df['time'] = exp_df['time'].astype('int')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exp_df['time'] = exp_df['time'].astype('int')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exp_df.loc[:, 'group'] = 1
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is 

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[com_df.query('year in @com_years').index, 'time'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['time'] = co

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exp_df['time'] = exp_df['time'].astype('int')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exp_df.loc[:, 'group'] = 1
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exp_df['time'] = exp_df['time'].astype('int')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exp_df.loc[:, 'group'] = 1
  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is 

  volume_mean_df = volume_sum_df.query('region != @region').groupby(by=['year', 'month']).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['name'] = 'mean'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df.loc[com_df.query('year in @com_years').index, 'time'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  com_df['time'] = co

# DID-two-events

In [51]:
import re
import ipystata

region_list = list(top_countries)
sector_list = sectors


# empty dir
panel_dir = project_dir / 'temp-2'
for fp in panel_dir.rglob('*.txt'):
    os.remove(fp)

In [52]:
%%stata -i sector_list -i region_list
foreach region in `region_list' {
    display "------------------------`region'---------------------------------"
    clear
    cd "C:\shares_TARIFF_project\comtrade_new\DID_country"
    foreach sector in `sector_list' {
        import delimited "./panels-two-events/`region'-`sector'.csv"
        egen time1 = group(date)
        xtset time1 group
        reghdfe value did, absorb(time1 group)
        outreg2 using "./temp-2/two-events-`region'-`sector'.txt"
        clear
    }
}


------------------------Brazil---------------------------------
C:\shares_TARIFF_project\comtrade_new\DID_country
(6 vars, 120 obs)
       panel variable:  time1 (strongly balanced)
        time variable:  group, 0 to 1
                delta:  1 unit
(MWFE estimator converged in 2 iterations)

HDFE Linear regression                            Number of obs   =        120
Absorbing 2 HDFE groups                           F(   1,     58) =       8.17
                                                  Prob > F        =     0.0059
                                                  R-squared       =     0.7378
                                                  Adj R-squared   =     0.4621
                                                  Within R-sq.    =     0.1235
                                                  Root MSE        =     0.4633

------------------------------------------------------------------------------
       value |      Coef.   Std. Err.      t    P>|t|     [95% Conf. In

In [54]:
def get_result(result_fp):
    with open(result_fp, 'r') as fp:
        result_str = fp.read()
        match_str = re.findall(r'did\t(.*?)\n', result_str)[0]
        star_count = match_str.count('*')
        match_str = match_str.replace(',', '')
        if match_str == '-':# 若匹配到空，判定不显著
            significant = -1
            coff = -999
        else:
            if star_count == 0:
                significant = -1
                coff = float(match_str)
            else:
                coff = float(match_str.split('*')[0])
                if star_count == 1:
                    significant = 0.1
                elif star_count == 2:
                    significant = 0.05
                else:
                    significant = 0.01
    return coff, significant

result_list = []
for region in region_list:
    for sector in sector_list:
        result_fp = project_dir / 'temp-2' / f'two-events-{region}-{sector}.txt'
        coff, significant = get_result(result_fp)
        result_list.append((region, sector, coff, significant))
result_df = pd.DataFrame(result_list, columns=['region', 'sector', 'coff', 'significance'])


# # 将coff的值区间缩放至 -1,1
# data = result_df['coff']
# data_t = (data-data.mean())/(data.max()-data.min())
# result_df['coff'] = data_t
result_df.to_csv('C:\shares_TARIFF_project\comtrade_new\DID_country\did-result-two-events.csv', header=True, index=False)
#result_df.to_csv('C:\\shares_TARIFF_project\\comtrade_new\\DID_country_cancel2018\\did-summary\\did-result-pandemic-meso-true.csv', header=True, index=False)
# result_df

In [56]:
result_two_events_fp = 'C:\shares_TARIFF_project\comtrade_new\DID_country\did-result-two-events.csv'
result_two_events_df = pd.read_csv(result_two_events_fp)
result_pandemic_fp = 'C:\shares_TARIFF_project\comtrade_new\DID_country\did-result-pandemic-meso-true.csv'
result_pandemic_df = pd.read_csv(result_pandemic_fp)
result_trade_war_fp = 'C:\shares_TARIFF_project\comtrade_new\DID_country\did-result-trade-war-meso.csv'
result_trade_war_df = pd.read_csv(result_trade_war_fp)

import numpy as np

# 如果sign=-1（不显著），设置coff为NaN
def func(x):
    if x['significance'] == -1:
        return np.nan
    else:
        return x['coff']
result_two_events_df['coff'] = result_two_events_df.apply(func, axis=1)
result_pandemic_df['coff'] = result_pandemic_df.apply(func, axis=1)
result_trade_war_df['coff'] = result_trade_war_df.apply(func, axis=1)

merge_df = pd.merge(result_two_events_df,result_pandemic_df,  
         left_on=['region', 'sector'], 
         right_on=['region', 'sector'], 
         how='inner') \
    .rename(columns={'coff_x': 'coff_two_events', 
                     'coff_y': 'coff_pandemic',
                     'significance_x': 'sign_two_events', 
                     'significance_y': 'sign_pandemic'})


merge_df = pd.merge(merge_df, result_trade_war_df, 
         left_on=['region', 'sector'], 
         right_on=['region', 'sector'], 
         how='inner') \
    .rename(columns={'coff': 'coff_trade_war', 
                     'significance': 'sign_trade_war'})





# from sklearn.preprocessing import StandardScaler
# scaler = StandardScaler()
# data = merge_df['coff_tradewar'].tolist()
# data_t = scaler.fit_transform(np.array(data).reshape(-1,1)).reshape(1,-1).tolist()[0]
# merge_df['coff_tradewar'] = data_t


# data = merge_df['coff_pandemic'].tolist()
# data_t = scaler.fit_transform(np.array(data).reshape(-1,1)).reshape(1,-1).tolist()[0]
# merge_df['coff_pandemic'] = data_t

merge_df.to_csv('C:\shares_TARIFF_project\comtrade_new\DID_country\did-result-merge-meso.csv', header=True, index=False)
merge_df.head()

Unnamed: 0,region,sector,coff_two_events,sign_two_events,coff_pandemic,sign_pandemic,coff_trade_war,sign_trade_war
0,Brazil,Agriculture,-0.494,0.01,,-1.0,-0.386,0.05
1,Brazil,Apparel,-0.0529,0.1,,-1.0,-0.0664,0.05
2,Brazil,Chemicals,-0.216,0.01,-0.202,0.01,,-1.0
3,Brazil,Machinery,0.062,0.05,,-1.0,,-1.0
4,Brazil,Materials,,-1.0,,-1.0,,-1.0


In [58]:
for coff_str in ['coff_two_events', 'coff_pandemic','coff_trade_war']:
    pivot_df = merge_df[['region', 'sector', coff_str]].set_index('region') \
        .pivot(columns='sector', values=coff_str)
    pivot_df.to_csv(f'C:\shares_TARIFF_project\comtrade_new\DID_country\pivot-{coff_str}-meso.csv', header=True, index=True)