In [1]:
import warnings
warnings.filterwarnings('ignore')

from IPython.display import display
from IPython.display import HTML
import IPython.core.display as di # Example: di.display_html('<h3>%s:</h3>' % str, raw=True)

# This line will hide code by default when the notebook is exported as HTML
di.display_html('<script>jQuery(function() {if (jQuery("body.notebook_app").length == 0) { jQuery(".input_area").toggle(); jQuery(".prompt").toggle();}});</script>', raw=True)

# This line will add a button to toggle visibility of code blocks, for use with the HTML export version
di.display_html('''<button onclick="jQuery('.input_area').toggle(); jQuery('.prompt').toggle();">Toggle code</button>''', raw=True)

In [2]:
dataset_name = "W16_comb"
df_list = [ "BES_Panel", ]

In [3]:
%matplotlib inline

import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
import pickle, os, gc, re

sns.set();
sns.set_palette("colorblind")

from IPython.display import display, display_html, HTML
plt.rcParams["axes.grid"] = False

import Jupyter_module_loader
from utility import *
import gaussian_kde

import warnings
warnings.filterwarnings('ignore')

import holoviews as hv
from holoviews import opts

encoding = "ISO-8859-1"

In [4]:
# you should clone this git to a subdirectory called 'BES_analysis_code' (in some directory - I call it BES_analysis - doesn't matter though)
# %matplotlib inline
(BES_code_folder, BES_small_data_files, BES_data_folder,
 BES_output_folder, BES_file_manifest, BES_R_data_files) = setup_directories()

global BES_Panel, BES_numeric, BES_reduced, BES_reduced_with_na, BES_non_numeric
data_subfolder = BES_data_folder + dataset_name + os.sep

(manifest, dataset_filename, dataset_description, dataset_citation,
 dataset_start, dataset_stop, dataset_wave) = get_manifest(dataset_name, BES_file_manifest)

for df in df_list:
    if df=="BES_Panel":
        globals()[df]  = pd.read_msgpack(data_subfolder + dataset_filename.replace('.dta','.msgpack'))
    else:
        globals()[df]  = pd.read_msgpack(data_subfolder + df + '.msgpack' )
        globals()[df].replace(-1,np.nan,inplace=True)
  
# (var_type, cat_dictionary, new_old_col_names, old_new_col_names) = get_small_files(data_subfolder, encoding)

# get full set of inferred "cross wave" auth-lib/left-right values and ages
pan_dataset_allr_values = pd.read_csv(BES_small_data_files + "pan_dataset_allr_values"+".csv")
pan_dataset_ages = pd.read_csv( BES_small_data_files + "pan_dataset_ages"+".csv" )

In [5]:
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, show,  output_notebook
from bokeh.layouts import column, row
from bokeh.embed import components
output_notebook()

In [6]:
## Step One: Locate the period of the Major Shift In Sentiment
## - Find all the directly-immigration-related sentiment variables

# split into "about me" (immigSelf)
#            "about party" (immigCon)

In [7]:
immig_var_stub = ["immigEcon","immigCultural","immigSelf","immigrationLevel","immigContributeTake",
                  "immigrantsWelfareState","controlImmig","effectsEUImmigration","euPriorityBalance",
                  "changeImmig","changeImmigLab","govtHandleImmig","labHandleImmig",
                  "asylumMore","euMore","noneuMore","studentsMore","familiesMore"]

In [8]:
list(match(BES_Panel,"|".join(immig_var_stub)).sort_index().index)

In [9]:
[x for x in match(BES_Panel,"|".join(immig_var_stub)).sort_index().index if "W16" in x]

In [10]:
match(BES_Panel,"|".join(immig_var_stub)).sort_index().index

Index(['asylumMoreW13', 'asylumMoreW7', 'asylumMoreW8', 'changeImmigLabW1',
       'changeImmigLabW10', 'changeImmigLabW2', 'changeImmigLabW3',
       'changeImmigLabW4', 'changeImmigLabW7', 'changeImmigW1',
       ...
       'labHandleImmigW2', 'labHandleImmigW3', 'labHandleImmigW4',
       'labHandleImmigW7', 'noneuMoreW13', 'noneuMoreW7', 'noneuMoreW8',
       'studentsMoreW13', 'studentsMoreW7', 'studentsMoreW8'],
      dtype='object', length=113)

In [11]:
def cat_2_num_drop_dk(x):
    if x.dtype.name=='category':
        return x.replace("Don't know",np.nan).cat.remove_unused_categories().cat.codes.replace(-1,np.nan).astype('UInt16')
    else:
        return x

In [12]:
immigSelfW16 = cat_2_num_drop_dk(BES_Panel['immigSelfW16'])

ValueError: Cannot convert non-finite values (NA or inf) to integer

In [None]:
%%time
df = BES_Panel.apply(lambda x: cat_2_num_drop_dk(x))

In [None]:
gc.collect()

In [None]:
df = BES_Panel[BES_Panel.columns[BES_Panel.dtypes=='category']].apply(lambda x: x.cat.codes).copy()
#.apply(lambda x: x.cat.codes)

In [None]:
%%time
mask = immigSelfW16.notnull()

corr_series = df[mask].select_dtypes(include=np.number).astype('float32').corrwith(immigSelfW16[mask].astype('float32'))

In [None]:
list( search(BES_Panel,"Priorities(2?)_immig").index )


In [None]:
search(BES_Panel,"responsibleImmig")

In [None]:
search(BES_Panel,"achieveReduceImmig")


In [None]:
# BES_Panel["conPriorities_immigW1"].value_counts()

In [None]:
corr_series.dropna().sort_values().tail(100)[:-50]

In [None]:
# dealPriorityMarketAccess
# effectsEUUnemployment
# dealPrioritgImmig


# W15 only possibleImmigNI,possibleMarketsImmig,possibleImmigTrade,noDealResults_6
# W4 tryReduceImmigtryReduceImmig

In [None]:
corr_series.dropna().sort_values().head(50)

In [None]:
# use these variables for now, edit Later

In [8]:
immig_var_stub = ["immigEcon","immigCultural","immigSelf","immigrationLevel","immigContributeTake",
                  "immigrantsWelfareState","controlImmig","effectsEUImmigration","euPriorityBalance",
                  "changeImmig","changeImmigLab","govtHandleImmig","labHandleImmig",
                  "asylumMore","euMore","noneuMore","studentsMore","familiesMore"]

In [9]:
df = BES_Panel[ match(BES_Panel,"|".join(immig_var_stub)).index ].copy()

# df["cat_strings"]=
cat_ser = df.apply(lambda x: "|".join(list(map(str,x.cat.categories))))
df.apply(lambda x: "|".join(list(map(str,x.cat.categories)))).value_counts()

Don't know|Getting a little higher|Getting a little lower|Getting a lot higher|Getting a lot lower|Staying about the same    16
1|2|3|4|5|6|7|8|9|Don't know|Many fewer|Many more                                                                            15
Don't know|Fairly badly|Fairly well|Neither well nor badly|Very badly|Very well                                              12
2|3|4|5|6|Bad for economy|Don't know|Good for economy                                                                        11
2|3|4|5|6|Don't know|Enriches cultural life|Undermines cultural life                                                         11
1|2|3|4|5|6|7|8|9|Allow many fewer|Allow many more|Don't know                                                                 9
Agree|Disagree|Don't know|Neither agree nor disagree|Strongly agree|Strongly disagree                                         8
A little control|A lot of control|Complete control|Don't know|No control at all|Some control            

In [10]:
# after fixing
df = BES_Panel[ match(BES_Panel,"|".join(immig_var_stub)).index ].copy()

# df["cat_strings"]=
cat_ser = df.apply(lambda x: "|".join(list(map(str,x.cat.categories))))
df.apply(lambda x: "|".join(list(map(str,x.cat.categories)))).value_counts()

Don't know|Getting a little higher|Getting a little lower|Getting a lot higher|Getting a lot lower|Staying about the same    16
1|2|3|4|5|6|7|8|9|Don't know|Many fewer|Many more                                                                            15
Don't know|Fairly badly|Fairly well|Neither well nor badly|Very badly|Very well                                              12
2|3|4|5|6|Bad for economy|Don't know|Good for economy                                                                        11
2|3|4|5|6|Don't know|Enriches cultural life|Undermines cultural life                                                         11
1|2|3|4|5|6|7|8|9|Allow many fewer|Allow many more|Don't know                                                                 9
Agree|Disagree|Don't know|Neither agree nor disagree|Strongly agree|Strongly disagree                                         8
A little control|A lot of control|Complete control|Don't know|No control at all|Some control            

In [11]:
cat_ser[cat_ser=="1|2|3|4|5|6|7|8|9|Don't know|Many fewer|Many more"]

asylumMoreW7       1|2|3|4|5|6|7|8|9|Don't know|Many fewer|Many more
euMoreW7           1|2|3|4|5|6|7|8|9|Don't know|Many fewer|Many more
noneuMoreW7        1|2|3|4|5|6|7|8|9|Don't know|Many fewer|Many more
studentsMoreW7     1|2|3|4|5|6|7|8|9|Don't know|Many fewer|Many more
familiesMoreW7     1|2|3|4|5|6|7|8|9|Don't know|Many fewer|Many more
asylumMoreW8       1|2|3|4|5|6|7|8|9|Don't know|Many fewer|Many more
euMoreW8           1|2|3|4|5|6|7|8|9|Don't know|Many fewer|Many more
noneuMoreW8        1|2|3|4|5|6|7|8|9|Don't know|Many fewer|Many more
studentsMoreW8     1|2|3|4|5|6|7|8|9|Don't know|Many fewer|Many more
familiesMoreW8     1|2|3|4|5|6|7|8|9|Don't know|Many fewer|Many more
asylumMoreW13      1|2|3|4|5|6|7|8|9|Don't know|Many fewer|Many more
euMoreW13          1|2|3|4|5|6|7|8|9|Don't know|Many fewer|Many more
noneuMoreW13       1|2|3|4|5|6|7|8|9|Don't know|Many fewer|Many more
studentsMoreW13    1|2|3|4|5|6|7|8|9|Don't know|Many fewer|Many more
familiesMoreW13    1|2|3|4|5|6|7|8

In [12]:
# variables with broken order
# Don't know|Getting a little higher|Getting a little lower|Getting a lot higher|Getting a lot lower|Staying about the same
# 1|2|3|4|5|6|7|8|9|Don't know|Many fewer|Many more
# Don't know|Fairly badly|Fairly well|Neither well nor badly|Very badly|Very well

# 2|3|4|5|6|Bad for economy|Don't know|Good for economy
# 2|3|4|5|6|Don't know|Enriches cultural life|Undermines cultural life
# 1|2|3|4|5|6|7|8|9|Allow many fewer|Allow many more|Don't know

# A little control|A lot of control|Complete control|Don't know|No control at all|Some control

In [13]:
list(cat_ser.index)

In [14]:
pd.Series([re.match("(.*?)($|W\d+)",x).groups()[0] for x in cat_ser.index]).value_counts()

immigEcon                 12
immigCultural             12
changeImmig               11
immigSelf                 10
controlImmig               8
effectsEUImmigration       8
immigrantsWelfareState     8
euPriorityBalance          7
govtHandleImmig            6
changeImmigLab             6
labHandleImmig             6
studentsMore               3
euMore                     3
noneuMore                  3
familiesMore               3
asylumMore                 3
immigrationLevel           2
immigContributeTake        2
dtype: int64

In [15]:
# changeImmig_cats = ['Getting a lot lower', 'Getting a little lower','Staying about the same','Getting a little higher', 
#        'Getting a lot higher',  "Don't know" ]
# BES_Panel[ match(BES_Panel, "changeImmig" ).index ] = BES_Panel[ match(BES_Panel, "changeImmig" ).index ].apply(lambda x: x.cat.reorder_categories(changeImmig_cats))



In [16]:
# immigEcon_cats = ['Bad for economy','2', '3', '4', '5', '6', 'Good for economy',  "Don't know"]
# BES_Panel[ match(BES_Panel, "immigEcon" ).index ] = BES_Panel[ match(BES_Panel, "immigEcon" ).index ]\
#     .apply(lambda x: x.cat.reorder_categories(immigEcon_cats))

In [17]:
fix_cat_dict = {}
fix_cat_dict['changeImmig']=['Getting a lot lower', 'Getting a little lower','Staying about the same',
                             'Getting a little higher', 'Getting a lot higher',  "Don't know" ]
fix_cat_dict['immigEcon']=['Bad for economy','2', '3', '4', '5', '6', 'Good for economy',  "Don't know"]
fix_cat_dict['immigCultural']=['Undermines cultural life', '2', '3', '4', '5', '6', 'Enriches cultural life', "Don't know" ]

fix_cat_dict["immig(Self|Con|Lab|TIG|Brexit|Green|UKIP|PC|SNP|LD)W"] = ['Allow many fewer', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'Allow many more', "Don't know"]
fix_cat_dict['controlImmig']=['No control at all', 'A little control', 'Some control', 'A lot of control', 'Complete control', "Don't know"]

fix_cat_dict['immigrantsWelfareState'] = ['Strongly disagree', 'Disagree',  'Neither agree nor disagree','Agree', 'Strongly agree',"Don't know", ]
fix_cat_dict['effectsEUImmigration'] = ['Much lower', 'Lower', 'About the same', 'Higher', 'Much higher', "Don't know"]

fix_cat_dict['euPriorityBalance'] = ['Access to the single market', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'Control immigration', "Don't know"]
fix_cat_dict['(lab|govt)HandleImmig'] = ['Very badly', 'Fairly badly', 'Neither well nor badly', 'Fairly well', 'Very well', "Don't know"]

fix_cat_dict['(students|eu|asylum|families|noneu)More'] = ['Many fewer', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'Many more', "Don't know"]
fix_cat_dict['immigrationLevel'] = ['Decreased a lot', 'Decreased a little', 'Left the same as it is now',
                                    'Increased a little', 'Increased a lot',  "Don't know"]

fix_cat_dict['immigContributeTake'] = ['Get more than they pay', '2.0', '3.0', '4.0',
                                       '5.0', '6.0', 'Pay more than they get', "Don't know"]

In [18]:
def fix_cats(fix_cat_dict):
    for key in fix_cat_dict.keys():
        BES_Panel[ match(BES_Panel, key ).index ] = BES_Panel[ match(BES_Panel,key ).index ]\
            .apply( lambda x: x.cat.set_categories( fix_cat_dict[key], ordered =True ) )
    
fix_cats(fix_cat_dict)

In [19]:
BES_Panel['immigSNPW13'].cat.categories

Index(['Allow many fewer', '1', '2', '3', '4', '5', '6', '7', '8', '9',
       'Allow many more', 'Don't know'],
      dtype='object')

In [20]:
BES_Panel[ match(BES_Panel, "immigContributeTake" ).index ].apply(lambda x: "|".join(list(map(str,x.cat.categories)))).value_counts()

Get more than they pay|2.0|3.0|4.0|5.0|6.0|Pay more than they get|Don't know    2
dtype: int64

In [21]:
"2.0|3.0|4.0|5.0|6.0|Don't know|Get more than they pay|Pay more than they get".split("|")

In [22]:
# we've (checked and fixed and) corrected the order
# 

In [23]:
BES_Panel[ match(BES_Panel,"|".join(immig_var_stub)).index ]

Unnamed: 0,changeImmigW1,changeImmigLabW1,immigEconW1,immigCulturalW1,immigrantsWelfareStateW1,govtHandleImmigW1,labHandleImmigW1,changeImmigW2,changeImmigLabW2,immigrantsWelfareStateW2,...,immigCulturalW15,immigSelfW15,changeImmigW15,controlImmigW16,euPriorityBalanceW16,effectsEUImmigrationW16,immigEconW16,immigCulturalW16,immigSelfW16,changeImmigW16
0,Getting a little higher,Staying about the same,3,Undermines cultural life,Strongly agree,,,Getting a little higher,Staying about the same,Strongly agree,...,,,,,,,,,,
1,Staying about the same,Getting a little higher,5,5,Disagree,,,,,,...,,,,,,,,,,
2,Staying about the same,Staying about the same,5,4,Disagree,,,Getting a little higher,Staying about the same,Disagree,...,5,5,Getting a little lower,A little control,3,Lower,5,4,7,Staying about the same
3,Getting a lot higher,Getting a lot higher,2,Undermines cultural life,Agree,,,Getting a little higher,Getting a lot higher,Agree,...,,,,,,,,,,
4,Getting a little lower,Getting a little higher,4,4,Agree,,,Getting a little higher,Getting a little higher,Neither agree nor disagree,...,,,,,,,,,,
5,Getting a little higher,Staying about the same,4,2,Agree,,,Getting a lot higher,Getting a little higher,Agree,...,,,,,,,,,,
6,Getting a little higher,Getting a little lower,4,Undermines cultural life,Strongly agree,,,Getting a lot higher,Getting a little lower,Strongly agree,...,2,Allow many fewer,Getting a little higher,Some control,5,About the same,3,3,3,Staying about the same
7,Getting a little higher,Getting a lot higher,4,4,Disagree,,,Getting a little higher,Getting a lot higher,Agree,...,5,5,Staying about the same,,,,,,,
8,Don't know,Getting a little higher,4,4,Don't know,,,Getting a little higher,Getting a little higher,Don't know,...,4,5,Getting a little lower,,,,,,,
9,Getting a little higher,Getting a little higher,5,4,Neither agree nor disagree,,,Staying about the same,Getting a little higher,Neither agree nor disagree,...,6,5,Staying about the same,,,,,,,


In [24]:
def weighted_mean(x, **kws):
    val, weight = map(np.asarray, zip(*x))
    return (val * weight).sum() / weight.sum()

max_wave = int(re.match("W(\d+)_",dataset_name).groups()[0])
num_to_wave = {x:"W"+str(x) for x in range(1,max_wave+1)}
wts_for_wave = { "W"+str(y):[x for x in BES_Panel.columns.sort_values(ascending=False) if re.match("wt_(new|full)_W"+str(y)+"$",x)][0] for y in range(1,max_wave+1) }
# wts_for_wave

In [25]:
# fix endtimeW3 bug!
BES_Panel.loc[BES_Panel["endtimeW3"]=='1970-01-01 00:00:00',"endtimeW3"]=BES_Panel[BES_Panel["endtimeW3"]=='1970-01-01 00:00:00']["starttimeW3"].values

# create correct midpoints (technically we should weight these!)
n = 1
for wave_no in range(1,max_wave+1):
    wave = "W"+str(wave_no)
#     print(wave)

    BES_Panel["midpoint"+wave] = pd.qcut(BES_Panel["endtime"+wave]+((BES_Panel["endtime"+wave]-BES_Panel["starttime"+wave])/2),n)
    date_cats_dict = {BES_Panel["midpoint"+wave].cat.categories[x]:(BES_Panel["midpoint"+wave].cat.categories[x].left+ (BES_Panel["midpoint"+wave].cat.categories[x].right - BES_Panel["midpoint"+wave].cat.categories[x].left)/2).strftime("%Y-%m-%d") for x in range(n)}
    BES_Panel["midpoint"+wave] = pd.to_datetime(BES_Panel["midpoint"+wave].replace(date_cats_dict))  

In [32]:
def time_series(var_name,title,subtract_var=False,retain_var=True,specific_dates=True, specific_suffix_set="([a-zA-Z]*)",
                use_midpoints=True,col_name="party",dk_str="Don't know",max_y_size=10.0,min_waves_included=2):

    df=pd.DataFrame()
    df2 =pd.DataFrame()
    df_wts =pd.DataFrame()
    df_dates = pd.DataFrame()
    
    if subtract_var:
        title=title+"\n(where respondents place parties relative to their own preference set at 0.5)"

    wave_list = []
    redist_vars = pd.Series([re.match(var_name+specific_suffix_set+"($|W\d+)",x).groups()[0] for x in BES_Panel.columns\
                             if re.match(var_name+specific_suffix_set+"($|W\d+)",x)]).value_counts()
    redist_vars = redist_vars[redist_vars>=min_waves_included].index

    for subj in redist_vars:
        for wave in ["W"+str(x) for x in range(1,17)]:
            if var_name+subj+wave not in BES_Panel.columns:
                continue
            else:
                wave_list.append(wave)
            df[var_name+"num_"+subj+"_"+wave] = BES_Panel[var_name+subj+wave].replace(dk_str,np.nan).cat.remove_unused_categories().cat.codes/max_y_size
            df2[var_name+"dk_"+subj+"_"+wave] = BES_Panel[var_name+subj+wave]==dk_str
            df2[var_name+"dk_"+subj+"_"+wave][BES_Panel[var_name+subj+wave].isnull()] = np.nan
    for wave in pd.unique(wave_list):        
        df_wts["wt_"+wave] = BES_Panel[wts_for_wave[wave]]
        if use_midpoints:
            df_dates["dt_"+wave] = BES_Panel["midpoint"+wave]
        else:
            df_dates["dt_"+wave] = (BES_Panel["starttime"+wave]+(BES_Panel["endtime"+wave]-BES_Panel["starttime"+wave])/2).apply(lambda x:x.date())

    df_wts = df_wts[df.notnull().any(axis=1)]
    df = df[df.notnull().any(axis=1)]

    df2.columns = df2.columns.str.split('_', expand=True)
    df2 = df2.stack(dropna=False)\
                .reset_index()\
                .rename(columns={'level_1':"wave"})\
                .rename(columns={'level_0':"id"}) 

    df_wts.columns = df_wts.columns.str.split('_', expand=True)
    df_wts = df_wts.stack(dropna=False)\
                .reset_index()\
                .rename(columns={'level_1':"wave"})\
                .rename(columns={'level_0':"id"}) 

    df_dates.columns = df_dates.columns.str.split('_', expand=True)
    df_dates = df_dates.stack(dropna=False)\
                .reset_index()\
                .rename(columns={'level_1':"wave"})\
                .rename(columns={'level_0':"id"}) 

    df.columns = df.columns.str.split('_', expand=True)
    df = df.stack(dropna=False)\
            .reset_index()\
            .rename(columns={'level_1':"wave"})\
            .rename(columns={'level_0':"id"}) 

    df["wt"] = df_wts["wt"]

    df = df.loc[ df[[x for x in df.columns if var_name+"num" in x]].notnull().any(axis=1) ]
    df.loc[:,"wt"] = df.loc[:,"wt"].fillna(1.0).values
    temp_ind_name = "temp_index"

    if specific_dates:
        df["wave"] = df[["id","wave"]].merge(right=df_dates,
                 how="left",left_on=["id","wave"],right_on=["id","wave"])["dt"].values

    df[temp_ind_name] = list(zip(df["wave"],df["wt"]))
    df = df.set_index(temp_ind_name).drop(["id","wave","wt"],axis=1)

    df2["wt"] = df_wts["wt"]

    if subtract_var:
        if retain_var:
            focal_vars = [x for x in df.columns if (var_name+"num" in x) and (subtract_var not in x)]
            df[focal_vars] = df[focal_vars].apply(lambda x: x-df[(var_name+"num",  subtract_var)])+0.5
        else:
            focal_vars = [x for x in df.columns if var_name+"num" in x]
            df[focal_vars] = df[focal_vars].apply(lambda x: x-df[(var_name+"num",  subtract_var)])+0.5
            df.drop((var_name+"num",  subtract_var),axis=1,inplace=True)


    df2 = df2.loc[ df2[[x for x in df2.columns if var_name+"dk" in x]].notnull().any(axis=1) ]
    df2.loc[:,"wt"] = df2.loc[:,"wt"].fillna(1.0).values
    if specific_dates:
        df2["wave"] = df2[["id","wave"]].merge(right=df_dates,
                 how="left",left_on=["id","wave"],right_on=["id","wave"])["dt"].values

    temp_ind_name = "temp_index"
    df2[temp_ind_name] = list(zip(df2["wave"],df2["wt"]))
    df2 = df2.set_index(temp_ind_name).drop(["id","wave","wt"],axis=1)


    flat_df_num = df.stack().reset_index().rename(columns={'level_1':col_name})

    if specific_dates:
        flat_df_num["wave"] = flat_df_num[temp_ind_name].apply(lambda x:x[0])    
    else:
        flat_df_num["wave"] = flat_df_num[temp_ind_name].apply(lambda x:int(x[0].split("W")[1]))


    flat_df_num["wt"] = flat_df_num[temp_ind_name].apply(lambda x:x[1])
    flat_df_num.drop(temp_ind_name,axis=1,inplace=True)
    flat_df_num[col_name] = flat_df_num[col_name].astype('category')

    flat_df_num[var_name+"_wts"] = list(zip(flat_df_num[var_name+"num"],flat_df_num["wt"]))

    flat_df_dk = df2.stack().reset_index().rename(columns={'level_1':col_name,0:"dk"})
    if specific_dates:
        flat_df_dk["wave"] = flat_df_dk[temp_ind_name].apply(lambda x:x[0])    
    else:
        flat_df_dk["wave"] = flat_df_dk[temp_ind_name].apply(lambda x:int(x[0].split("W")[1]))

    flat_df_dk["wt"] = flat_df_dk[temp_ind_name].apply(lambda x:x[1])
    flat_df_dk.drop(temp_ind_name,axis=1,inplace=True)
    flat_df_dk[var_name+"dk"] = flat_df_dk[var_name+"dk"].astype('int')
    flat_df_dk = flat_df_dk.groupby(["wave",col_name]).apply(lambda x: (x["wt"]*x[var_name+"dk"]).sum()/x["wt"].sum() ).reset_index().rename(columns={0:"dk"})

    flat_df_num = flat_df_num.merge(right=flat_df_dk[[col_name,"wave","dk"]],how='left',left_on=[col_name,"wave"],right_on=[col_name,"wave"])
    if not specific_dates:
        flat_df_num["wave"] = flat_df_num["wave"].apply(lambda x: wave_to_date[x])
        flat_df_num["wave"] = pd.to_datetime(flat_df_num["wave"] , format="%b-%y")

    return flat_df_num

In [34]:
var_name = "immig"

title= "\n".join(["Some people think that the UK should allow *many more* immigrants to come to the UK",
                  "to live andothers think that the UK should allow *many fewer* immigrants.",
                  "Where would you place yourself and the parties on this scale?",
                  "Many fewer (0) - Many more (10)"])
specific_suffix_set = "(Self|Con|Lab|TIG|Brexit|Green|UKIP|PC|SNP|LD)"
col_name="party"

flat_df_num = time_series(var_name,title,specific_suffix_set=specific_suffix_set,col_name="party",
                          dk_str="Don't know",max_y_size=10.0,min_waves_included=2)


g = sns.FacetGrid(data=flat_df_num, col=col_name, col_wrap=4, ylim=(0.0, 1.0), legend_out=False);
g.map(sns.lineplot, "wave",var_name+"_wts", estimator=weighted_mean,ci=None, label="Mean answer",);
g.map(sns.lineplot, "wave","dk", color='r', ci=None, label="DK fraction",);
g.map(plt.axvline, x=pd.to_datetime("7 May 2015"), ls='--', c='red',linewidth=1, alpha=.5)# 2015GE
g.map(plt.axvline, x=pd.to_datetime("23 June 2016"), ls='--', c='blue',linewidth=1, alpha=.5)# 2016EUREF
g.map(plt.axvline, x=pd.to_datetime("8 June 2017"), ls='--', c='red',linewidth=1, alpha=.5)# 2017GE

g.add_legend().set_ylabels("").set_titles(col_template="{col_name}")
g.fig.suptitle(title, y=1.0+0.03*len(title.split("\n")));

treatment = "timeseries"
output_subfolder = create_subdir(BES_output_folder, treatment)
g.savefig(output_subfolder +var_name + ".png", bbox_inches='tight')


MemoryError: 

In [28]:
# %debug

In [31]:
%debug

> <ipython-input-29-c365578671f2>(14)<listcomp>()
     12 
     13     wave_list = []
---> 14     redist_vars = pd.Series([re.match(var_name+specific_suffix_set+"($|W\d+)",x).groups()[0] for x in BES_Panel.columns if re.match("("+var_name+"[a-zA-Z]*)($|W\d+)",x)]).value_counts()
     15     redist_vars = redist_vars[redist_vars>=min_waves_included].index
     16 

ipdb> redist_vars 
*** NameError: name 'redist_vars' is not defined
ipdb> re.match(var_name+specific_suffix_set+"($|W\d+)",x)
ipdb> var_name+specific_suffix_set+"($|W\d+)"
'immig(Self|Con|Lab|TIG|Brexit|Green|UKIP|PC|SNP|LD)($|W\\d+)'
ipdb> x#
'immigEconW1'
ipdb> quit
