In [1]:
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import gc
import re

from sklearn import datasets
from sklearn.decomposition import PCA, IncrementalPCA, NMF, TruncatedSVD, FastICA, FactorAnalysis, SparsePCA
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from itertools import cycle
from IPython.display import display
import pickle, os

import seaborn as sns

BES_data_folder = "../BES_analysis_data/"
BES_code_folder = "../BES_analysis_code/"

# from fancyimpute import BiScaler, KNN, NuclearNormMinimization, SoftImpute, MICE
from gaussian_kde import gaussian_kde
from utility import display_components,display_pca_data

In [7]:
wave = "W10_only"

BES_Panel = pd.read_stata(BES_data_folder+"BES2015_W10_v0.3.dta")
print("BES_Panel",  BES_Panel.shape )

BES_numeric = pd.read_hdf( BES_data_folder+"BESnumeric"+wave+".hdf", "BESnumeric"+wave )
print("BES_numeric",  BES_numeric.shape )

BESnon_numeric = pd.read_hdf( BES_data_folder+"BESnon_numeric"+wave+".hdf", "BESnon_numeric"+wave )
print("BESnon_numeric",  BESnon_numeric.shape )

var_type    = pd.read_hdf( BES_data_folder+"var_type"+wave+".hdf", "var_type" )
print("var_type",  var_type.shape )

BES_reduced = pd.read_hdf( BES_data_folder+"BES_reduced"+wave+".hdf", "BES_reduced"+wave )
print("BES_reduced",  BES_reduced.shape )

BES_reduced_with_na = pd.read_hdf( BES_data_folder+"BES_reduced_with_na"+wave+".hdf", "BES_reduced_with_na"+wave )
print("BES_reduced_with_na",  BES_reduced_with_na.shape )

fname = BES_data_folder+"cat_dictionary"+wave+".pkl"
with open(fname, "rb") as f:
    cat_dictionary = pickle.load( f )
    print("cat_dictionary", len(cat_dictionary))
    
fname = BES_data_folder+"new_old_col_names"+wave+".pkl"
with open(fname, "rb") as f:
    new_old_col_names = pickle.load( f )
    print("new_old_col_names", len(new_old_col_names))


BES_Panel (30319, 393)
BES_numeric (30319, 374)
BESnon_numeric (30319, 18)
var_type (393, 1)
BES_reduced (30319, 422)
BES_reduced_with_na (30319, 422)
cat_dictionary 368
new_old_col_names 447


In [5]:
[x for x in BES_numeric.columns if "age" in x]

['ageW1',
 'likeFarageW1',
 'likeFarageW2',
 'likeFarageW3',
 'likeFarageW4',
 'likeFarageW5',
 'likeFarageW6',
 'likeFarageW7',
 'ageW7',
 'age_pdlW8',
 'likeFarageW8',
 'languageSkillsW8',
 'languageSkillsWelshW8',
 'ageW8',
 'ageW9',
 'likeFarageW9',
 'likeFarageW10',
 'integrityFarageW10',
 'competentFarageW10',
 'gayMarriageW10',
 'ageGroup',
 'profile_education_age']

In [7]:
BES_numeric['generalElectionVoteW10']

0        5.0
1        0.0
2        NaN
3        NaN
4        NaN
5        NaN
6        3.0
7        0.0
8        0.0
9        NaN
10       0.0
11       NaN
12       NaN
13       0.0
14       0.0
15       NaN
16       0.0
17       3.0
18       NaN
19       NaN
20       NaN
21       NaN
22       NaN
23       NaN
24       NaN
25       NaN
26       NaN
27       NaN
28       NaN
29       1.0
        ... 
64659    NaN
64660    NaN
64661    NaN
64662    NaN
64663    NaN
64664    NaN
64665    NaN
64666    NaN
64667    NaN
64668    NaN
64669    NaN
64670    NaN
64671    NaN
64672    NaN
64673    NaN
64674    NaN
64675    NaN
64676    NaN
64677    NaN
64678    NaN
64679    NaN
64680    NaN
64681    NaN
64682    NaN
64683    NaN
64684    NaN
64685    NaN
64686    NaN
64687    NaN
64688    NaN
Name: generalElectionVoteW10, dtype: float64

In [6]:
[x for x in BES_numeric.columns if "general" in x]

['generalElectionVoteW1',
 'generalElectionVoteW2',
 'generalElectionVoteW3',
 'generalElectionVoteW4',
 'generalElectionVoteUnsqueezeW4',
 'generalElectionVoteSqueezeW4',
 'generalElectionVoteW5',
 'generalElectionVotePostW5',
 'generalElectionVoteUnsqueezeW5',
 'generalElectionVoteSqueezeW5',
 'generalElectionVoteW6',
 'generalElectionVoteW7',
 'generalElectionVoteW8',
 'generalElectionVoteW9',
 'generalElectionVoteW10',
 'generalElectionCertaintyW1',
 'generalElectionCertaintyW2',
 'generalElectionCertaintyW3',
 'generalElectionCertaintyW4',
 'generalElectionCertaintyUnsqW5',
 'generalElectionCertaintySqW5',
 'generalElectionCertaintyW5',
 'generalElectionCertaintyW9',
 'generalElectionCertaintyW10']

In [4]:
BES_reduced = pd.read_hdf(BES_data_folder+"BES_reducedW10.hdf","BES_reducedW10")
print("BES_reduced",  BES_reduced.shape )

BES_reduced (64689, 2735)


In [9]:
# [x for x in BES_reduced if "generalElectionVote" in x]

['generalElectionVoteW1_British National Party (BNP)',
 'generalElectionVoteW1_Conservative',
 'generalElectionVoteW1_Green Party',
 'generalElectionVoteW1_Labour',
 'generalElectionVoteW1_Liberal Democrat',
 'generalElectionVoteW1_Plaid Cymru',
 'generalElectionVoteW1_Scottish National Party (SNP)',
 'generalElectionVoteW1_United Kingdom Independence Party (UKIP)',
 'generalElectionVoteW2_British National Party (BNP)',
 'generalElectionVoteW2_Conservative',
 'generalElectionVoteW2_Green Party',
 'generalElectionVoteW2_Labour',
 'generalElectionVoteW2_Liberal Democrat',
 'generalElectionVoteW2_Plaid Cymru',
 'generalElectionVoteW2_Scottish National Party (SNP)',
 'generalElectionVoteW2_United Kingdom Independence Party (UKIP)',
 'generalElectionVoteW3_British National Party (BNP)',
 'generalElectionVoteW3_Conservative',
 'generalElectionVoteW3_Green Party',
 'generalElectionVoteW3_Labour',
 'generalElectionVoteW3_Liberal Democrat',
 'generalElectionVoteW3_Plaid Cymru',
 'generalElectio

In [5]:
gc.collect()

0

In [6]:
Lab_or_Con = (BES_reduced['generalElectionVoteW10_Conservative']==1.0) | (BES_reduced['generalElectionVoteW10_Labour']==1.0)

In [7]:
corr_df = BES_reduced[Lab_or_Con].corrwith(BES_reduced[Lab_or_Con]['generalElectionVoteW10_Conservative'])

In [9]:
corr_df.sort_values()

generalElectionVoteW10_Labour                                                              -1.000000
partyIdW10_Labour                                                                          -0.696483
lr1W10__Strongly agree                                                                     -0.538023
xprofile_newspaper_readershipW10_Other local daily morning newspaper                       -0.511501
xprofile_marital_statW10_Living as married                                                 -0.499829
changeEducationLabW10__Getting a lot better                                                -0.462914
ptvLabW10__Very likely                                                                     -0.445353
xprofile_religionW10_No, I do not regard myself as belonging to any particular religion.   -0.442125
renationaliseRailW10__Strongly agree                                                       -0.432665
likeGrnW10__Strongly like                                                                  