In [1]:
import numpy as np
import pandas as pd
import scipy as sp
import matplotlib.pyplot as plt

from sklearn import preprocessing
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import KFold, StratifiedKFold, train_test_split
from sklearn.metrics import mean_squared_error, log_loss
import sklearn
import os

from utils.double_ml import *
from utils.analysis import *

In [2]:
RANDOM_SEED = 42

### Import data

In [3]:
wf2020 = make_wf2020()

fixed = ['treat']
city_fixed = []
time_fixed = []
for col in wf2020.columns:
    if 'cities' in col:
        city_fixed.append(col)
    if 'days' in col:
        time_fixed.append(col)
fixed = fixed + city_fixed + time_fixed
        
weather = ['prec', 'snow', 'temp', 'temp2']
city_economic = ['pop_city', 'sec_city', 'gdp_city' , 'pgdp_city', 'firm_city']
city_environmental = ['gonglu', 'emit_ww', 'emit_so1', 'emi_dust1']
out = ["aqi", "l_aqi", "pm", "l_pm"]

In [4]:
day, count, num_cities = get_day_count(wf2020)
treat_day = day[count == max(count)][0]

### Single time period

In [5]:
tau_hat, std_hat = single_period_estimate(wf2020, treat_day=8426, outcome_var='aqi', 
                                          confounder_list=weather + city_fixed + time_fixed,
                                          Q_model_class=LinearRegression, Q_model_params={},
                                          g_model_class=LogisticRegression, g_model_params={'max_iter':1000})

print(f"The estimate is {tau_hat} pm {1.96*std_hat}")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value


The estimate is -35.04651188879948 pm 11.730398758415467


### Multiple time periods

In [6]:
wf2020 = make_wf2020()
tau_hat, std_hat = multi_period_estimate(wf2020, outcome_var='aqi', 
                                         confounder_list=weather + city_fixed + time_fixed,
                                         Q_model_class=RandomForestRegressor, Q_model_params={
                                             'random_state': RANDOM_SEED,
                                             'n_estimators': 100,
                                             'max_depth': 10
                                         },
                                         g_model_class=RandomForestClassifier, g_model_params={
                                             'random_state': RANDOM_SEED,
                                             'n_estimators': 100,
                                             'max_depth': 3
                                         }
                                         )
print('%0.3f pm %0.3f' % (tau_hat, std_hat))

-9.556 pm 2.947


In [42]:
dummy = wf2020['pastweek_mean'] = wf2020.groupby('city_code')['aqi'].transform(
        lambda x: pd.Series.rolling(window=7, min_periods=7).mean()
    )

In [46]:
wf2020['pastweek_mean'] = wf2020.groupby(['daynum', 'city_code']).rolling(window=7, min_periods=7)['aqi'].mean().reset_index()

ValueError: Columns must be same length as key

In [28]:
wf2020.groupby('daynum')['daynum'].apply(display)

365       8401
804       8401
1243      8401
1682      8401
2121      8401
          ... 
142162    8401
142601    8401
143040    8401
143479    8401
144357    8401
Name: 8401, Length: 324, dtype: int64

366       8402
805       8402
1244      8402
1683      8402
2122      8402
          ... 
142163    8402
142602    8402
143041    8402
143480    8402
144358    8402
Name: 8402, Length: 324, dtype: int64

367       8403
806       8403
1245      8403
1684      8403
2123      8403
          ... 
142164    8403
142603    8403
143042    8403
143481    8403
144359    8403
Name: 8403, Length: 324, dtype: int64

368       8404
807       8404
1246      8404
1685      8404
2124      8404
          ... 
142165    8404
142604    8404
143043    8404
143482    8404
144360    8404
Name: 8404, Length: 324, dtype: int64

369       8405
808       8405
1247      8405
1686      8405
2125      8405
          ... 
142166    8405
142605    8405
143044    8405
143483    8405
144361    8405
Name: 8405, Length: 324, dtype: int64

370       8406
809       8406
1248      8406
1687      8406
2126      8406
          ... 
142167    8406
142606    8406
143045    8406
143484    8406
144362    8406
Name: 8406, Length: 324, dtype: int64

371       8407
810       8407
1249      8407
1688      8407
2127      8407
          ... 
142168    8407
142607    8407
143046    8407
143485    8407
144363    8407
Name: 8407, Length: 324, dtype: int64

372       8408
811       8408
1250      8408
1689      8408
2128      8408
          ... 
142169    8408
142608    8408
143047    8408
143486    8408
144364    8408
Name: 8408, Length: 324, dtype: int64

373       8409
812       8409
1251      8409
1690      8409
2129      8409
          ... 
142170    8409
142609    8409
143048    8409
143487    8409
144365    8409
Name: 8409, Length: 324, dtype: int64

374       8410
813       8410
1252      8410
1691      8410
2130      8410
          ... 
142171    8410
142610    8410
143049    8410
143488    8410
144366    8410
Name: 8410, Length: 324, dtype: int64

375       8411
814       8411
1253      8411
1692      8411
2131      8411
          ... 
142172    8411
142611    8411
143050    8411
143489    8411
144367    8411
Name: 8411, Length: 324, dtype: int64

376       8412
815       8412
1254      8412
1693      8412
2132      8412
          ... 
142173    8412
142612    8412
143051    8412
143490    8412
144368    8412
Name: 8412, Length: 324, dtype: int64

377       8413
816       8413
1255      8413
1694      8413
2133      8413
          ... 
142174    8413
142613    8413
143052    8413
143491    8413
144369    8413
Name: 8413, Length: 324, dtype: int64

378       8414
817       8414
1256      8414
1695      8414
2134      8414
          ... 
142175    8414
142614    8414
143053    8414
143492    8414
144370    8414
Name: 8414, Length: 324, dtype: int64

379       8415
818       8415
1257      8415
1696      8415
2135      8415
          ... 
142176    8415
142615    8415
143054    8415
143493    8415
144371    8415
Name: 8415, Length: 324, dtype: int64

380       8416
819       8416
1258      8416
1697      8416
2136      8416
          ... 
142177    8416
142616    8416
143055    8416
143494    8416
144372    8416
Name: 8416, Length: 324, dtype: int64

381       8417
820       8417
1259      8417
1698      8417
2137      8417
          ... 
142178    8417
142617    8417
143056    8417
143495    8417
144373    8417
Name: 8417, Length: 324, dtype: int64

382       8418
821       8418
1260      8418
1699      8418
2138      8418
          ... 
142179    8418
142618    8418
143057    8418
143496    8418
144374    8418
Name: 8418, Length: 324, dtype: int64

383       8419
822       8419
1261      8419
1700      8419
2139      8419
          ... 
142180    8419
142619    8419
143058    8419
143497    8419
144375    8419
Name: 8419, Length: 324, dtype: int64

384       8420
823       8420
1262      8420
1701      8420
2140      8420
          ... 
142181    8420
142620    8420
143059    8420
143498    8420
144376    8420
Name: 8420, Length: 324, dtype: int64

385       8421
824       8421
1263      8421
1702      8421
2141      8421
          ... 
142182    8421
142621    8421
143060    8421
143499    8421
144377    8421
Name: 8421, Length: 324, dtype: int64

386       8422
825       8422
1264      8422
1703      8422
2142      8422
          ... 
142183    8422
142622    8422
143061    8422
143500    8422
144378    8422
Name: 8422, Length: 324, dtype: int64

387       8423
826       8423
1265      8423
1704      8423
2143      8423
          ... 
142184    8423
142623    8423
143062    8423
143501    8423
144379    8423
Name: 8423, Length: 324, dtype: int64

388       8424
827       8424
1266      8424
1705      8424
2144      8424
          ... 
142185    8424
142624    8424
143063    8424
143502    8424
144380    8424
Name: 8424, Length: 324, dtype: int64

389       8425
828       8425
1267      8425
1706      8425
2145      8425
          ... 
142186    8425
142625    8425
143064    8425
143503    8425
144381    8425
Name: 8425, Length: 324, dtype: int64

390       8426
829       8426
1268      8426
1707      8426
2146      8426
          ... 
142187    8426
142626    8426
143065    8426
143504    8426
144382    8426
Name: 8426, Length: 324, dtype: int64

391       8427
830       8427
1269      8427
1708      8427
2147      8427
          ... 
142188    8427
142627    8427
143066    8427
143505    8427
144383    8427
Name: 8427, Length: 324, dtype: int64

392       8428
831       8428
1270      8428
1709      8428
2148      8428
          ... 
142189    8428
142628    8428
143067    8428
143506    8428
144384    8428
Name: 8428, Length: 324, dtype: int64

393       8429
832       8429
1271      8429
1710      8429
2149      8429
          ... 
142190    8429
142629    8429
143068    8429
143507    8429
144385    8429
Name: 8429, Length: 324, dtype: int64

394       8430
833       8430
1272      8430
1711      8430
2150      8430
          ... 
142191    8430
142630    8430
143069    8430
143508    8430
144386    8430
Name: 8430, Length: 324, dtype: int64

395       8431
834       8431
1273      8431
1712      8431
2151      8431
          ... 
142192    8431
142631    8431
143070    8431
143509    8431
144387    8431
Name: 8431, Length: 324, dtype: int64

396       8432
835       8432
1274      8432
1713      8432
2152      8432
          ... 
142193    8432
142632    8432
143071    8432
143510    8432
144388    8432
Name: 8432, Length: 324, dtype: int64

397       8433
836       8433
1275      8433
1714      8433
2153      8433
          ... 
142194    8433
142633    8433
143072    8433
143511    8433
144389    8433
Name: 8433, Length: 324, dtype: int64

398       8434
837       8434
1276      8434
1715      8434
2154      8434
          ... 
142195    8434
142634    8434
143073    8434
143512    8434
144390    8434
Name: 8434, Length: 324, dtype: int64

399       8435
838       8435
1277      8435
1716      8435
2155      8435
          ... 
142196    8435
142635    8435
143074    8435
143513    8435
144391    8435
Name: 8435, Length: 324, dtype: int64

400       8436
839       8436
1278      8436
1717      8436
2156      8436
          ... 
142197    8436
142636    8436
143075    8436
143514    8436
144392    8436
Name: 8436, Length: 324, dtype: int64

401       8437
840       8437
1279      8437
1718      8437
2157      8437
          ... 
142198    8437
142637    8437
143076    8437
143515    8437
144393    8437
Name: 8437, Length: 324, dtype: int64

402       8438
841       8438
1280      8438
1719      8438
2158      8438
          ... 
142199    8438
142638    8438
143077    8438
143516    8438
144394    8438
Name: 8438, Length: 324, dtype: int64

403       8439
842       8439
1281      8439
1720      8439
2159      8439
          ... 
142200    8439
142639    8439
143078    8439
143517    8439
144395    8439
Name: 8439, Length: 324, dtype: int64

404       8440
843       8440
1282      8440
1721      8440
2160      8440
          ... 
142201    8440
142640    8440
143079    8440
143518    8440
144396    8440
Name: 8440, Length: 324, dtype: int64

405       8441
844       8441
1283      8441
1722      8441
2161      8441
          ... 
142202    8441
142641    8441
143080    8441
143519    8441
144397    8441
Name: 8441, Length: 324, dtype: int64

406       8442
845       8442
1284      8442
1723      8442
2162      8442
          ... 
142203    8442
142642    8442
143081    8442
143520    8442
144398    8442
Name: 8442, Length: 324, dtype: int64

407       8443
846       8443
1285      8443
1724      8443
2163      8443
          ... 
142204    8443
142643    8443
143082    8443
143521    8443
144399    8443
Name: 8443, Length: 324, dtype: int64

408       8444
847       8444
1286      8444
1725      8444
2164      8444
          ... 
142205    8444
142644    8444
143083    8444
143522    8444
144400    8444
Name: 8444, Length: 324, dtype: int64

409       8445
848       8445
1287      8445
1726      8445
2165      8445
          ... 
142206    8445
142645    8445
143084    8445
143523    8445
144401    8445
Name: 8445, Length: 324, dtype: int64

410       8446
849       8446
1288      8446
1727      8446
2166      8446
          ... 
142207    8446
142646    8446
143085    8446
143524    8446
144402    8446
Name: 8446, Length: 324, dtype: int64

411       8447
850       8447
1289      8447
1728      8447
2167      8447
          ... 
142208    8447
142647    8447
143086    8447
143525    8447
144403    8447
Name: 8447, Length: 324, dtype: int64

412       8448
851       8448
1290      8448
1729      8448
2168      8448
          ... 
142209    8448
142648    8448
143087    8448
143526    8448
144404    8448
Name: 8448, Length: 324, dtype: int64

413       8449
852       8449
1291      8449
1730      8449
2169      8449
          ... 
142210    8449
142649    8449
143088    8449
143527    8449
144405    8449
Name: 8449, Length: 324, dtype: int64

414       8450
853       8450
1292      8450
1731      8450
2170      8450
          ... 
142211    8450
142650    8450
143089    8450
143528    8450
144406    8450
Name: 8450, Length: 324, dtype: int64

415       8451
854       8451
1293      8451
1732      8451
2171      8451
          ... 
142212    8451
142651    8451
143090    8451
143529    8451
144407    8451
Name: 8451, Length: 324, dtype: int64

416       8452
855       8452
1294      8452
1733      8452
2172      8452
          ... 
142213    8452
142652    8452
143091    8452
143530    8452
144408    8452
Name: 8452, Length: 324, dtype: int64

417       8453
856       8453
1295      8453
1734      8453
2173      8453
          ... 
142214    8453
142653    8453
143092    8453
143531    8453
144409    8453
Name: 8453, Length: 324, dtype: int64

418       8454
857       8454
1296      8454
1735      8454
2174      8454
          ... 
142215    8454
142654    8454
143093    8454
143532    8454
144410    8454
Name: 8454, Length: 324, dtype: int64

419       8455
858       8455
1297      8455
1736      8455
2175      8455
          ... 
142216    8455
142655    8455
143094    8455
143533    8455
144411    8455
Name: 8455, Length: 324, dtype: int64

420       8456
859       8456
1298      8456
1737      8456
2176      8456
          ... 
142217    8456
142656    8456
143095    8456
143534    8456
144412    8456
Name: 8456, Length: 324, dtype: int64

421       8457
860       8457
1299      8457
1738      8457
2177      8457
          ... 
142218    8457
142657    8457
143096    8457
143535    8457
144413    8457
Name: 8457, Length: 324, dtype: int64

422       8458
861       8458
1300      8458
1739      8458
2178      8458
          ... 
142219    8458
142658    8458
143097    8458
143536    8458
144414    8458
Name: 8458, Length: 324, dtype: int64

423       8459
862       8459
1301      8459
1740      8459
2179      8459
          ... 
142220    8459
142659    8459
143098    8459
143537    8459
144415    8459
Name: 8459, Length: 324, dtype: int64

424       8460
863       8460
1302      8460
1741      8460
2180      8460
          ... 
142221    8460
142660    8460
143099    8460
143538    8460
144416    8460
Name: 8460, Length: 324, dtype: int64

425       8461
864       8461
1303      8461
1742      8461
2181      8461
          ... 
142222    8461
142661    8461
143100    8461
143539    8461
144417    8461
Name: 8461, Length: 324, dtype: int64

daynum
8401    None
8402    None
8403    None
8404    None
8405    None
        ... 
8457    None
8458    None
8459    None
8460    None
8461    None
Name: daynum, Length: 61, dtype: object