In [1]:
import pandas as pd
import os
import time
import numpy as np

In [4]:
os.chdir(r"A:\UW-Madison\GIS SPRING 2023\Geog 575\Final_Project\SPSS_Processed\UKDA-5151-spss\spss\spss19")

In [6]:
df_residence = pd.read_csv(r"A:\UW-Madison\GIS SPRING 2023\Geog 575\Final_Project\SPSS_Processed\UKDA-5151-spss\spss\spss19\bhps_w1\aindresp.csv")

  exec(code_obj, self.user_global_ns, self.user_ns)


### Interested variables for visualization
Look for <code>aindresp.csv</code>. Required variables are: (These are columns names and their corresponding interpretation) <ul><li><code>aopsocc</code> : "Private enterprise is the best way to solve Britain’s economic problems" (S1)</li> <li><code>aopsocd</code> : "Major public services and industries ought to be in state ownership" (S2)</li> <li><code>aopsoce</code> : "It is the government’s responsibility to provide a job for everyone who wants one" (S3)</li> <li><code>afiyr</code> : <b>Self-reported annual income</b> </li> <li><code>afetype</code> : <b>Educational qualification</b></li> <li><code>aplbornc_cc</code> : <b>Country of birth</b></li> <li><code>aage12</code> : <b>Age</b></li> <li><code>ajbstatt</code> : <b>Employment status</b></li> <li><code>aplb4d</code> : <b>County Name</b> </li></ul>

In [7]:
# Loading variables of interest from 1991 BHPS dataset into dataframe
df_residence = df_residence[["aopsocc","aopsocd","aopsoce","afiyr","afetype","aage12","ajbstatt","aplb4d", "aplbornc_cc"]]

In [8]:
# Dropping rows if they donot have any spatial information
df_residence = df_residence[~df_residence["aplb4d"].isin(["don't know", "inapplicable", "missing or wild", "refused"])]

In [9]:
# To view columns
df_residence.head()

Unnamed: 0,aopsocc,aopsocd,aopsoce,afiyr,afetype,aage12,ajbstatt,aplb4d,aplbornc_cc
0,Agree,Not agree/disag,Disagree,3488.5703125,Other trng establmnt,91.0,Retired,City; Westminster,inapplicable
1,Agree,Not agree/disag,Not agree/disag,1789.7335205078125,University,28.0,Unemployed,Ealing,other country
2,Not agree/disagr,Strongly disagr,Strongly disagr,1789.7335205078125,University,26.0,Unemployed,Ealing,other country
3,Strongly disagr,Strongly disagr,Strongly disagr,7200.06005859375,None of the above,58.0,In paid employ,Brent,other country
4,Agree,Strongly disagr,Strongly disagr,12060.0,None of the above,54.0,Self employed,Hammersmith & Fulham,inapplicable


### Several measures are indirect analysis techniques are adopted from Grechyna (2022)
<ul>
    <li>The standard deviation of individual age in county-year</li>
    <li>The change in the fraction of natives among the respondent</li>
    <li>The variability of the highest educational qualifcation</li>
    <li>The variability of the job status in the county-year</li>
    <li>The fraction of employed full time</li>
    <li>Income inequality using Gini index for each county</li>
</ul>

In [10]:
# create an empty dataframe
output_df = pd.DataFrame()

# Group the dataframe by county and calculate standard deviation of age : "aage12"
output_df["std_dev_age"] = df_residence.groupby("aplb4d")["aage12"].agg("std")

# Group the dataframe by county and number of people born in UK : "aplbornc_cc"
output_df["born"] = df_residence.groupby("aplb4d")["aplbornc_cc"]

# Assuming inapplicable as people born in UK to all others
def native_share(x):
    pop_count = list(x["born"][1])
    native_count = pop_count.count("inapplicable")/len(pop_count)
    return native_count

# calculating fraction of natives in each county
output_df["native_share"] = output_df.apply(lambda x: native_share(x), axis = 1)

# Dropping "born" groupby column
output_df = output_df.drop(columns = ["born"], axis = 1)

# Creating afetype_dict based on PDF from 1991 BHPS data
afetype_dict = {"Nursing school etc" : 1, " College of f educ" : 2, " College of f educ" : 3, " Polytechnic" : 4, \
                "University" : 5, "None of the above" : 7, "don't know" : -1, "missing or wild" : -9, \
                    "inapplicable" : -8, "proxy" : -7, "refused" : -2}


# Create a new column "afetype_encoded" in the dataframe by replacing the values with encoded values from the dictionary
df_residence["afetype_encoded"] = df_residence["afetype"].replace(afetype_dict)

# Group the dataframe by county and "afetype_encoded"
output_df["education"] = df_residence.groupby("aplb4d")["afetype_encoded"]

# function to calculate dissimilarity in education
def dissimilarity_education(x):
    # loads tuple element into job
    job = x["education"][1]
    # loads frequency of each education type in county as list
    freq = list(job.value_counts())
    # calculates total number of groups
    s = len(freq)
    dissim_index = 0
    for i in freq:
        # formula of dissimilarity index
        dissim_index += 0.5 * abs(i-(1/s))
    return dissim_index

def county_name(x):
    return x["education"][0]

output_df["education_variability"] = output_df.apply(lambda x: dissimilarity_education(x), axis = 1)
output_df["county"] = output_df.apply(lambda x: county_name(x), axis = 1)

# Dropping "education" groupby column
output_df = output_df.drop(columns = ["education"], axis = 1)

# Creating ajbstatt_dict based on PDF from 1991 BHPS data
ajbstatt_dict = {"Self employed" : 1, "In paid employ" : 2, "Unemployed" : 3, "Retired" : 4, "Family care" : 5, \
                    "FT student" : 6,  "Long term sick/disabled" : 7, "On matern leave" : 8, \
                    "Govt trng scheme" : 9, "Something else" : 10, "don't know" : -1, "missing or wild" : -9, \
                    "inapplicable" : -8, "proxy" : -7, "refused" : -2}


# Create a new column "ajbstatt_encoded" in the dataframe by replacing the values with encoded values from the dictionary
df_residence["ajbstatt_encoded"] = df_residence["ajbstatt"].replace(ajbstatt_dict)

# Group the dataframe by county and calculate the mean and standard deviation of the encoded values
output_df["job"] = df_residence.groupby("aplb4d")["ajbstatt_encoded"]

# function to calculate dissimilarity in job
def dissimilarity_job(x):
    job = x["job"][1]
    freq = list(job.value_counts())
    s = len(freq)
    dissim_index = 0
    for i in freq:
        dissim_index += 0.5 * abs(i-(1/s))
    return dissim_index

# function to calculate fraction of employed people within each county
def frac_employed(x):
    job = list(x["job"][1])
    frac_emp = (job.count(2)/len(job))
    return frac_emp

output_df["job_variability"] = output_df.apply(lambda x: dissimilarity_job(x), axis = 1)
output_df ["frac_employed"] = output_df.apply(lambda x: frac_employed(x), axis = 1)

# Dropping "education" groupby column
output_df = output_df.drop(columns = ["job"], axis = 1)

# replacing all Not-A-Number cases to zero i.e., proxy values
df_residence["afiyr"] = df_residence["afiyr"].replace("proxy",0)

# Group the dataframe by county and calculate median based on self-reported annual income: "afiyr"
output_df["median_income"] = df_residence.groupby("aplb4d")["afiyr"].agg(["median"])

# convert "afiyr" column to float data type
df_residence["afiyr"] = df_residence["afiyr"].astype("float32")

# group incomes by county
output_df["income"] = df_residence.groupby("aplb4d")["afiyr"]

def gini_index(x):
    data = list(x["income"][1])
    # sort the data in ascending order
    sorted_data = np.sort(data)
    # calculate the cumulative sum of the sorted data
    cumsum_data = np.cumsum(sorted_data)
    # calculate the Lorenz curve values
    lorenz_curve = cumsum_data / cumsum_data[-1]
    # calculate the area under the Lorenz curve
    area_lorenz_curve = np.trapz(lorenz_curve, dx=1/len(data))
    # calculate the Gini index
    gini_index = 1 - 2 * area_lorenz_curve
    return gini_index

output_df["gini_index"] = output_df.apply(lambda x : gini_index(x), axis = 1)

# Drop "income" groupby column
output_df = output_df.drop(columns = ["income"], axis = 1)

output_df

Unnamed: 0_level_0,std_dev_age,native_share,education_variability,county,job_variability,frac_employed,median_income,gini_index
aplb4d,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Aberdeen City,15.196623,0.875000,27.5,Aberdeen City,27.5,0.607143,5455.766113,0.519385
Adur; Worthing,21.108135,0.894737,18.5,Adur; Worthing,18.5,0.421053,5003.030029,0.470579
Allerdale; Carlisle,16.277886,0.963303,54.0,Allerdale; Carlisle,54.0,0.605505,6204.619629,0.442821
Alnwick; Berwick; Morpeth; Tynedale,23.060996,1.000000,3.0,Alnwick; Berwick; Morpeth; Tynedale,3.0,0.285714,3715.673340,0.377750
Alyn & Deeside; Delyn; Wrexham Maelor,17.071726,0.975610,61.0,Alyn & Deeside; Delyn; Wrexham Maelor,61.0,0.455285,5108.415039,0.424249
...,...,...,...,...,...,...,...,...
Wirral,19.975800,0.933333,52.0,Wirral,52.0,0.361905,4551.285156,0.563936
Wokingham,15.587291,1.000000,5.0,Wokingham,5.0,0.818182,10648.406250,0.278845
Wolverhampton,17.193862,0.923077,19.0,Wolverhampton,19.0,0.384615,3943.374512,0.465119
Woodspring,20.884911,0.962963,13.0,Woodspring,13.0,0.481481,12391.048828,0.395611


### Quantifying political polarization in the UK based on BHPS data (1991)
<ul>
    <li>Lindqvist and Östling (2010)</li>
    <li>Abramowitz and Saunders (2008) and Boxell et al. (2017)</li>
    <li> Esteban and Ray (1994) and Duca and Saving (2016) </li>
</ul>

In [13]:
# Define the dictionary to encode the "aopsocc" column which is Statement 1 (S1)
# S1: "Private enterprise is the best way to solve Britain’s economic problems."
aopsocc_dict = {"Agree":1, "Disagree":-1 , "Not agree/disagr": 0 ,"Strongly disagr": -2, 
                "don't know": -3 , "Strongly agree": 2 ,"proxy":-3, "missing or wild": -3, 
                "refused":-3, "inapplicable":-3}

# Create a new column "aopsocc_encoded" in the dataframe by replacing the values with encoded values from the dictionary
df_residence["aopsocc_encoded"] = df_residence["aopsocc"].replace(aopsocc_dict)

#remove -3 valued rows from df_residence[nopsocc_encoded]
df_residence = df_residence[df_residence['aopsocc_encoded'] != -3]

# Political polarization measurement by Lindqvist and Ostling (2010)
output_df["Lindqvist_Ostling_S1"] = df_residence.groupby("aplb4d")["aopsocc_encoded"].agg(["var"])

# Create new columns based on absolute sum of encoded values of opinions and counts
output_df["abs_sum"] = df_residence.groupby("aplb4d")["aopsocc_encoded"].apply(lambda x: x.abs().sum())
output_df["counts"] = df_residence.groupby("aplb4d")["aopsocc_encoded"].apply(lambda x: x.count())

# Political polarization measurement by Abramowitz and Saunders (2008)
output_df["Abramowitz_Saunders_S1"] = output_df.apply(lambda x: x["abs_sum"]/x["counts"], axis = 1)

output_df = output_df.drop(columns = ["abs_sum", "counts"], axis = 1)

# Group the dataframe by county and aopsocc_encoded values
output_df["dkp"] = df_residence.groupby("aplb4d")["aopsocc_encoded"]

# Political polarization measurement by Duca and Saving (2016)
def duca_saving(x):
    county_tuple = x["dkp"]
    freq = list(county_tuple[1].value_counts())
    val = list(county_tuple[1].unique())
    norm_freq = [i/sum(freq) for i in freq]
    outer_sum = 0
    for i in range(len(norm_freq)):
        nkit = norm_freq[i]
        inner_sum = 0
        for j in range(len(norm_freq)):
            npit = norm_freq[j]
            dkp = abs(val[i] - val[j])
            inner_sum += npit*dkp
    outer_sum += (nkit**2)*inner_sum
    
    return outer_sum

output_df["Duca_Saving_S1"] = output_df.apply(lambda x: duca_saving(x)*1000, axis = 1)

# Drop "income" groupby column
output_df = output_df.drop(columns = ["dkp"], axis = 1)

output_df

Unnamed: 0_level_0,std_dev_age,native_share,education_variability,county,job_variability,frac_employed,median_income,gini_index,Lindqvist_Ostling_S1,Abramowitz_Saunders_S1,Duca_Saving_S1
aplb4d,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Aberdeen City,15.196623,0.875000,27.5,Aberdeen City,27.5,0.607143,5455.766113,0.519385,1.561224,1.180000,1.080000
Adur; Worthing,21.108135,0.894737,18.5,Adur; Worthing,18.5,0.421053,5003.030029,0.470579,0.789683,0.805556,0.943073
Allerdale; Carlisle,16.277886,0.963303,54.0,Allerdale; Carlisle,54.0,0.605505,6204.619629,0.442821,1.215905,0.953704,5.972794
Alnwick; Berwick; Morpeth; Tynedale,23.060996,1.000000,3.0,Alnwick; Berwick; Morpeth; Tynedale,3.0,0.285714,3715.673340,0.377750,0.809524,0.714286,81.632653
Alyn & Deeside; Delyn; Wrexham Maelor,17.071726,0.975610,61.0,Alyn & Deeside; Delyn; Wrexham Maelor,61.0,0.455285,5108.415039,0.424249,0.922605,0.850877,10.278466
...,...,...,...,...,...,...,...,...,...,...,...
Wirral,19.975800,0.933333,52.0,Wirral,52.0,0.361905,4551.285156,0.563936,0.930842,0.824742,0.143534
Wokingham,15.587291,1.000000,5.0,Wokingham,5.0,0.818182,10648.406250,0.278845,0.890909,0.636364,16.528926
Wolverhampton,17.193862,0.923077,19.0,Wolverhampton,19.0,0.384615,3943.374512,0.465119,1.213213,0.891892,15.813476
Woodspring,20.884911,0.962963,13.0,Woodspring,13.0,0.481481,12391.048828,0.395611,0.840580,0.750000,11.574074


In [15]:
# Define the dictionary to encode the "aopsocd" column which is Statement 2 (S2)
# S2: "Major public services and industries ought to be in state ownership."

aopsocd_dict = {"Agree":1, "Disagree":-1, "Not agree/disagr": 0 ,"Strongly disagr": -2, 
                "don't know": -3 , "Strongly agree": -2 ,"proxy":-3, "missing or wild": -3, 
                "refused":-3, "inapplicable":-3, "Not agree/disag": 0}

# Create a new column "aopsocd_encoded" in the dataframe by replacing the values with encoded values from the dictionary
df_residence["aopsocd_encoded"] = df_residence["aopsocd"].replace(aopsocd_dict)

#remove -3 valued rows from df_residence[nopsocc_encoded]
df_residence = df_residence[df_residence['aopsocd_encoded'] != -3]

# Political polarization measurement by Lindqvist and Ostling (2010)
output_df["Lindqvist_Ostling_S2"] = df_residence.groupby("aplb4d")["aopsocd_encoded"].agg(["var"])

# Create new columns based on absolute sum of encoded values of opinions and counts
output_df["abs_sum"] = df_residence.groupby("aplb4d")["aopsocd_encoded"].apply(lambda x: x.abs().sum())
output_df["counts"] = df_residence.groupby("aplb4d")["aopsocd_encoded"].apply(lambda x: x.count())

# Political polarization measurement by Abramowitz and Saunders (2008)
output_df["Abramowitz_Saunders_S2"] = output_df.apply(lambda x: x["abs_sum"]/x["counts"], axis = 1)

output_df = output_df.drop(columns = ["abs_sum", "counts"], axis = 1)

# Group the dataframe by county and aopsocc_encoded values
output_df["dkp"] = df_residence.groupby("aplb4d")["aopsocd_encoded"]

# Political polarization measurement by Duca and Saving (2016)
def duca_saving(x):
    county_tuple = x["dkp"]
    freq = list(county_tuple[1].value_counts())
    val = list(county_tuple[1].unique())
    norm_freq = [i/sum(freq) for i in freq]
    outer_sum = 0
    for i in range(len(norm_freq)):
        nkit = norm_freq[i]
        inner_sum = 0
        for j in range(len(norm_freq)):
            npit = norm_freq[j]
            dkp = abs(val[i] - val[j])
            inner_sum += npit*dkp
    outer_sum += (nkit**2)*inner_sum
    
    return outer_sum

output_df["Duca_Saving_S2"] = output_df.apply(lambda x: duca_saving(x)*1000, axis = 1)

# Drop "income" groupby column
output_df = output_df.drop(columns = ["dkp"], axis = 1)

output_df

Unnamed: 0_level_0,std_dev_age,native_share,education_variability,county,job_variability,frac_employed,median_income,gini_index,Lindqvist_Ostling_S1,Abramowitz_Saunders_S1,Duca_Saving_S1,Lindqvist_Ostling_S2,Abramowitz_Saunders_S2,Duca_Saving_S2
aplb4d,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Aberdeen City,15.196623,0.875000,27.5,Aberdeen City,27.5,0.607143,5455.766113,0.519385,1.561224,1.180000,1.080000,1.663265,1.102041,4.819420
Adur; Worthing,21.108135,0.894737,18.5,Adur; Worthing,18.5,0.421053,5003.030029,0.470579,0.789683,0.805556,0.943073,0.848485,0.764706,1.628333
Allerdale; Carlisle,16.277886,0.963303,54.0,Allerdale; Carlisle,54.0,0.605505,6204.619629,0.442821,1.215905,0.953704,5.972794,1.093458,0.934579,40.104715
Alnwick; Berwick; Morpeth; Tynedale,23.060996,1.000000,3.0,Alnwick; Berwick; Morpeth; Tynedale,3.0,0.285714,3715.673340,0.377750,0.809524,0.714286,81.632653,1.904762,1.285714,32.069971
Alyn & Deeside; Delyn; Wrexham Maelor,17.071726,0.975610,61.0,Alyn & Deeside; Delyn; Wrexham Maelor,61.0,0.455285,5108.415039,0.424249,0.922605,0.850877,10.278466,1.373053,1.027778,22.557537
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wirral,19.975800,0.933333,52.0,Wirral,52.0,0.361905,4551.285156,0.563936,0.930842,0.824742,0.143534,1.176932,0.957895,25.967634
Wokingham,15.587291,1.000000,5.0,Wokingham,5.0,0.818182,10648.406250,0.278845,0.890909,0.636364,16.528926,0.963636,0.727273,14.274981
Wolverhampton,17.193862,0.923077,19.0,Wolverhampton,19.0,0.384615,3943.374512,0.465119,1.213213,0.891892,15.813476,1.091592,0.918919,19.268355
Woodspring,20.884911,0.962963,13.0,Woodspring,13.0,0.481481,12391.048828,0.395611,0.840580,0.750000,11.574074,1.128623,0.958333,50.925926


In [16]:
# Define the dictionary to encode the "aopsocd" column which is Statement 3 (S3)
# S3: "It is the government’s responsibility to provide a job for everyone who wants one."

aopsoce_dict = {"Agree":1, "Disagree":-1, "Not agree/disagr": 0 ,"Strongly disagr": -2, 
                "don't know": -3 , "Strongly agree": -2 ,"proxy":-3, "missing or wild": -3, 
                "refused":-3, "inapplicable":-3, "Not agree/disag": 0}

# Create a new column "aopsocd_encoded" in the dataframe by replacing the values with encoded values from the dictionary
df_residence["aopsoce_encoded"] = df_residence["aopsoce"].replace(aopsoce_dict)

#remove -3 valued rows from df_residence[nopsocc_encoded]
df_residence = df_residence[df_residence['aopsocc_encoded'] != -3]

# Political polarization measurement by Lindqvist and Ostling (2010)
output_df["Lindqvist_Ostling_S3"] = df_residence.groupby("aplb4d")["aopsoce_encoded"].agg(["var"])

# Create new columns based on absolute sum of encoded values of opinions and counts
output_df["abs_sum"] = df_residence.groupby("aplb4d")["aopsoce_encoded"].apply(lambda x: x.abs().sum())
output_df["counts"] = df_residence.groupby("aplb4d")["aopsoce_encoded"].apply(lambda x: x.count())

# Political polarization measurement by Abramowitz and Saunders (2008)
output_df["Abramowitz_Saunders_S3"] = output_df.apply(lambda x: x["abs_sum"]/x["counts"], axis = 1)

output_df = output_df.drop(columns = ["abs_sum", "counts"], axis = 1)

# Group the dataframe by county and aopsocc_encoded values
output_df["dkp"] = df_residence.groupby("aplb4d")["aopsoce_encoded"]

# Political polarization measurement by Duca and Saving (2016)
def duca_saving(x):
    county_tuple = x["dkp"]
    freq = list(county_tuple[1].value_counts())
    val = list(county_tuple[1].unique())
    norm_freq = [i/sum(freq) for i in freq]
    outer_sum = 0
    for i in range(len(norm_freq)):
        nkit = norm_freq[i]
        inner_sum = 0
        for j in range(len(norm_freq)):
            npit = norm_freq[j]
            dkp = abs(val[i] - val[j])
            inner_sum += npit*dkp
    outer_sum += (nkit**2)*inner_sum
    
    return outer_sum

output_df["Duca_Saving_S3"] = output_df.apply(lambda x: duca_saving(x)*1000, axis = 1)

# Drop "income" groupby column
output_df = output_df.drop(columns = ["dkp"], axis = 1)

output_df

Unnamed: 0_level_0,std_dev_age,native_share,education_variability,county,job_variability,frac_employed,median_income,gini_index,Lindqvist_Ostling_S1,Abramowitz_Saunders_S1,Duca_Saving_S1,Lindqvist_Ostling_S2,Abramowitz_Saunders_S2,Duca_Saving_S2,Lindqvist_Ostling_S3,Abramowitz_Saunders_S3,Duca_Saving_S3
aplb4d,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Aberdeen City,15.196623,0.875000,27.5,Aberdeen City,27.5,0.607143,5455.766113,0.519385,1.561224,1.180000,1.080000,1.663265,1.102041,4.819420,1.875000,1.306122,8.975852
Adur; Worthing,21.108135,0.894737,18.5,Adur; Worthing,18.5,0.421053,5003.030029,0.470579,0.789683,0.805556,0.943073,0.848485,0.764706,1.628333,1.265597,1.058824,16.257887
Allerdale; Carlisle,16.277886,0.963303,54.0,Allerdale; Carlisle,54.0,0.605505,6204.619629,0.442821,1.215905,0.953704,5.972794,1.093458,0.934579,40.104715,1.522659,1.121495,6.269168
Alnwick; Berwick; Morpeth; Tynedale,23.060996,1.000000,3.0,Alnwick; Berwick; Morpeth; Tynedale,3.0,0.285714,3715.673340,0.377750,0.809524,0.714286,81.632653,1.904762,1.285714,32.069971,1.285714,1.142857,29.154519
Alyn & Deeside; Delyn; Wrexham Maelor,17.071726,0.975610,61.0,Alyn & Deeside; Delyn; Wrexham Maelor,61.0,0.455285,5108.415039,0.424249,0.922605,0.850877,10.278466,1.373053,1.027778,22.557537,1.421513,1.083333,17.001505
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wirral,19.975800,0.933333,52.0,Wirral,52.0,0.361905,4551.285156,0.563936,0.930842,0.824742,0.143534,1.176932,0.957895,25.967634,1.265398,1.052632,2.915877
Wokingham,15.587291,1.000000,5.0,Wokingham,5.0,0.818182,10648.406250,0.278845,0.890909,0.636364,16.528926,0.963636,0.727273,14.274981,0.818182,0.818182,45.078888
Wolverhampton,17.193862,0.923077,19.0,Wolverhampton,19.0,0.384615,3943.374512,0.465119,1.213213,0.891892,15.813476,1.091592,0.918919,19.268355,1.447447,1.135135,0.829171
Woodspring,20.884911,0.962963,13.0,Woodspring,13.0,0.481481,12391.048828,0.395611,0.840580,0.750000,11.574074,1.128623,0.958333,50.925926,1.449275,1.083333,19.531250


In [17]:
output_df.reset_index(drop=True, inplace=True)
output_df

Unnamed: 0,std_dev_age,native_share,education_variability,county,job_variability,frac_employed,median_income,gini_index,Lindqvist_Ostling_S1,Abramowitz_Saunders_S1,Duca_Saving_S1,Lindqvist_Ostling_S2,Abramowitz_Saunders_S2,Duca_Saving_S2,Lindqvist_Ostling_S3,Abramowitz_Saunders_S3,Duca_Saving_S3
0,15.196623,0.875000,27.5,Aberdeen City,27.5,0.607143,5455.766113,0.519385,1.561224,1.180000,1.080000,1.663265,1.102041,4.819420,1.875000,1.306122,8.975852
1,21.108135,0.894737,18.5,Adur; Worthing,18.5,0.421053,5003.030029,0.470579,0.789683,0.805556,0.943073,0.848485,0.764706,1.628333,1.265597,1.058824,16.257887
2,16.277886,0.963303,54.0,Allerdale; Carlisle,54.0,0.605505,6204.619629,0.442821,1.215905,0.953704,5.972794,1.093458,0.934579,40.104715,1.522659,1.121495,6.269168
3,23.060996,1.000000,3.0,Alnwick; Berwick; Morpeth; Tynedale,3.0,0.285714,3715.673340,0.377750,0.809524,0.714286,81.632653,1.904762,1.285714,32.069971,1.285714,1.142857,29.154519
4,17.071726,0.975610,61.0,Alyn & Deeside; Delyn; Wrexham Maelor,61.0,0.455285,5108.415039,0.424249,0.922605,0.850877,10.278466,1.373053,1.027778,22.557537,1.421513,1.083333,17.001505
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
281,19.975800,0.933333,52.0,Wirral,52.0,0.361905,4551.285156,0.563936,0.930842,0.824742,0.143534,1.176932,0.957895,25.967634,1.265398,1.052632,2.915877
282,15.587291,1.000000,5.0,Wokingham,5.0,0.818182,10648.406250,0.278845,0.890909,0.636364,16.528926,0.963636,0.727273,14.274981,0.818182,0.818182,45.078888
283,17.193862,0.923077,19.0,Wolverhampton,19.0,0.384615,3943.374512,0.465119,1.213213,0.891892,15.813476,1.091592,0.918919,19.268355,1.447447,1.135135,0.829171
284,20.884911,0.962963,13.0,Woodspring,13.0,0.481481,12391.048828,0.395611,0.840580,0.750000,11.574074,1.128623,0.958333,50.925926,1.449275,1.083333,19.531250


In [18]:
output_df.to_csv(r"C:\Users\ramak\Desktop\Geog573_Lab\Geog_575_final_project\Final_output\polarization1991_data.csv")