In [1]:
import pandas as pd
import os, time
import numpy as np

In [2]:
os.chdir(r"C:\Users\Sidrcs\Downloads\SPSS_Processed\UKDA-5151-spss\spss\spss19")

In [3]:
df_residence = pd.read_csv(r"C:\Users\Sidrcs\Downloads\SPSS_Processed\UKDA-5151-spss\spss\spss19\bhps_w10\jindresp.csv")

  df_residence = pd.read_csv(r"C:\Users\Sidrcs\Downloads\SPSS_Processed\UKDA-5151-spss\spss\spss19\bhps_w10\jindresp.csv")


In [4]:
# Loading variables of interest from 2000 BHPS dataset into dataframe
df_residence = df_residence[["jopsocc","jopsocd","jopsoce","jfiyr","jfetype","jage12","jjbstatt","jregion2", "jplbornc_cc"]]

In [5]:
# Dropping rows if they donot have any spatial information
df_residence = df_residence[~df_residence["jregion2"].isin(["don't know", "inapplicable", "missing or wild", "refused"])]

In [6]:
# To view columns
df_residence.head()

Unnamed: 0,jopsocc,jopsocd,jopsoce,jfiyr,jfetype,jage12,jjbstatt,jregion2,jplbornc_cc
0,Agree,Disagree,Disagree,30849.408203125,inapplicable,66.0,Self employed,London,inapplicable
1,Agree,Neither agree/disagree,Disagree,2553.462890625,inapplicable,63.0,Family care,London,inapplicable
2,proxy and/or phone,proxy and/or phone,proxy and/or phone,proxy and/or phone,proxy and/or phone,45.0,proxy and/or phone,London,proxy and/or phone
3,proxy and/or phone,proxy and/or phone,proxy and/or phone,proxy and/or phone,proxy and/or phone,19.0,proxy and/or phone,London,proxy and/or phone
4,Agree,Agree,Disagree,6754.79931640625,inapplicable,21.0,"FT studt, school",London,inapplicable


In [7]:
# create an empty dataframe
output_df = pd.DataFrame()

# Group the dataframe by county and calculate standard deviation of age : "aage12"
output_df["std_dev_age"] = df_residence.groupby("jregion2")["jage12"].agg("std")

# Group the dataframe by county and number of people born in UK : "aplbornc_cc"
output_df["born"] = df_residence.groupby("jregion2")["jplbornc_cc"]

# Assuming inapplicable as people born in UK to all others
def native_share(x):
    pop_count = list(x["born"][1])
    native_count = pop_count.count("inapplicable")/len(pop_count)
    return native_count

# calculating fraction of natives in each county
output_df["native_share"] = output_df.apply(lambda x: native_share(x), axis = 1)

# Dropping "born" groupby column
output_df = output_df.drop(columns = ["born"], axis = 1)

# Creating afetype_dict based on PDF from 1995 BHPS data
jfetype_dict = {"Nursing school etc" : 1, "College of f educ" : 2, "Other trng establmnt" : 3, "Polytechnic" : 4, \
                "University" : 5, "None of the above" : 7, "don't know" : -1, "missing or wild" : -9, \
                    "inapplicable" : -8, "proxy and/or phone" : -7, "refused" : -2}


# Create a new column "afetype_encoded" in the dataframe by replacing the values with encoded values from the dictionary
df_residence["jfetype_encoded"] = df_residence["jfetype"].replace(jfetype_dict)

# Group the dataframe by county and "afetype_encoded"
output_df["education"] = df_residence.groupby("jregion2")["jfetype_encoded"]

# function to calculate dissimilarity in education
def dissimilarity_education(x):
    # loads tuple element into job
    job = x["education"][1]
    # loads frequency of each education type in county as list
    freq = list(job.value_counts())
    # calculates total number of groups
    s = len(freq)
    dissim_index = 0
    for i in freq:
        # formula of dissimilarity index
        dissim_index += 0.5 * abs(i-(1/s))
    return dissim_index

def region_name(x):
    return x["education"][0]

output_df["education_variability"] = output_df.apply(lambda x: dissimilarity_education(x), axis = 1)
output_df["region_name"] = output_df.apply(lambda x: region_name(x), axis = 1)

# Dropping "education" groupby column
output_df = output_df.drop(columns = ["education"], axis = 1)

# Creating ajbstatt_dict based on PDF from 1991 BHPS data
jjbstatt_dict = {"Self employed" : 1, "In paid employ" : 2, "Unemployed" : 3, "Retired" : 4, "Maternity leave" : 5, \
                    "Family care" : 6,  "FT studt, school" : 7, " LT sick, disabld" : 8, \
                    "Govt trng scheme" : 9, "Something else" : 10, "don't know" : -1, "missing or wild" : -9, \
                    "inapplicable" : -8, "proxy" : -7, "refused" : -2}


# Create a new column "ajbstatt_encoded" in the dataframe by replacing the values with encoded values from the dictionary
df_residence["jjbstatt_encoded"] = df_residence["jjbstatt"].replace(jjbstatt_dict)

# Group the dataframe by county and calculate the mean and standard deviation of the encoded values
output_df["job"] = df_residence.groupby("jregion2")["jjbstatt_encoded"]

# function to calculate dissimilarity in job
def dissimilarity_job(x):
    job = x["job"][1]
    freq = list(job.value_counts())
    s = len(freq)
    dissim_index = 0
    for i in freq:
        dissim_index += 0.5 * abs(i-(1/s))
    return dissim_index

# function to calculate fraction of employed people within each county
def frac_employed(x):
    job = list(x["job"][1])
    frac_emp = (job.count(2)/len(job))
    return frac_emp

output_df["job_variability"] = output_df.apply(lambda x: dissimilarity_job(x), axis = 1)
output_df ["frac_employed"] = output_df.apply(lambda x: frac_employed(x), axis = 1)

# Dropping "education" groupby column
output_df = output_df.drop(columns = ["job"], axis = 1)

# replacing all Not-A-Number cases to zero i.e., proxy values
df_residence["jfiyr"] = df_residence["jfiyr"].replace("proxy and/or phone",0)

# Group the dataframe by county and calculate median based on self-reported annual income: "afiyr"
output_df["median_income"] = df_residence.groupby("jregion2")["jfiyr"].agg(["median"])

# convert "afiyr" column to float data type
df_residence["jfiyr"] = df_residence["jfiyr"].astype("float32")

# group incomes by county
output_df["income"] = df_residence.groupby("jregion2")["jfiyr"]

def gini_index(x):
    data = list(x["income"][1])
    # sort the data in ascending order
    sorted_data = np.sort(data)
    # calculate the cumulative sum of the sorted data
    cumsum_data = np.cumsum(sorted_data)
    # calculate the Lorenz curve values
    lorenz_curve = cumsum_data / cumsum_data[-1]
    # calculate the area under the Lorenz curve
    area_lorenz_curve = np.trapz(lorenz_curve, dx=1/len(data))
    # calculate the Gini index
    gini_index = 1 - 2 * area_lorenz_curve
    return gini_index

output_df["gini_index"] = output_df.apply(lambda x : gini_index(x), axis = 1)

# Drop "income" groupby column
output_df = output_df.drop(columns = ["income"], axis = 1)

output_df

Unnamed: 0_level_0,std_dev_age,native_share,education_variability,region_name,job_variability,frac_employed,median_income,gini_index
jregion2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
East Midlands,18.275323,0.958919,462.0,East Midlands,462.0,0.511351,8678.310547,0.449646
East of England,18.773603,0.959739,459.0,East of England,459.0,0.503808,9591.96582,0.493658
London,18.378665,0.923409,463.0,London,463.0,0.514563,10688.22168,0.505179
North East,18.756243,0.974206,251.5,North East,251.5,0.46627,8281.648926,0.456207
North West,18.88878,0.972397,633.5,North West,633.5,0.495268,9418.571777,0.455936
Scotland,18.180234,0.969923,1745.0,Scotland,1745.0,0.486107,8709.897461,0.453888
South East,18.342758,0.952381,734.5,South East,734.5,0.521769,10013.899414,0.463005
South West,19.18972,0.958635,483.0,South West,483.0,0.490176,8671.80957,0.472033
Wales,18.570539,0.959347,1500.0,Wales,1500.0,0.410863,8300.0,0.468485
West Midlands,18.821393,0.971787,478.0,West Midlands,478.0,0.500522,8520.920898,0.446613


In [8]:
# Define the dictionary to encode the "aopsocc" column which is Statement 1 (S1)
# S1: "Private enterprise is the best way to solve Britain’s economic problems."
jopsocc_dict = {"Strongly agree": 1,"Agree":2, "Neither agree/disagree": 3 , "Disagree":4 , "Strongly disagree": 5,
                "don't know": -1 , "missing or wild": -9, "inapplicable":-8,
                "proxy and/or phone":-7, "refused":-2}

# Create a new column "aopsocc_encoded" in the dataframe by replacing the values with encoded values from the dictionary
df_residence["jopsocc_encoded"] = df_residence["jopsocc"].replace(jopsocc_dict)

# Political polarization measurement by Lindqvist and Ostling (2010)
output_df["Lindqvist_Ostling_S1"] = df_residence.groupby("jregion2")["jopsocc_encoded"].agg(["var"])

# Create new columns based on absolute sum of encoded values of opinions and counts
output_df["abs_sum"] = df_residence.groupby("jregion2")["jopsocc_encoded"].apply(lambda x: x.abs().sum())
output_df["counts"] = df_residence.groupby("jregion2")["jopsocc_encoded"].apply(lambda x: x.count())

# Political polarization measurement by Abramowitz and Saunders (2008)
output_df["Abramowitz_Saunders_S1"] = output_df.apply(lambda x: x["abs_sum"]/x["counts"], axis = 1)

output_df = output_df.drop(columns = ["abs_sum", "counts"], axis = 1)

# Group the dataframe by county and jopsocc_encoded values
output_df["dkp"] = df_residence.groupby("jregion2")["jopsocc_encoded"]

# Political polarization measurement by Duca and Saving (2016)
def duca_saving(x):
    county_tuple = x["dkp"]
    freq = list(county_tuple[1].value_counts())
    val = list(county_tuple[1].unique())
    norm_freq = [i/sum(freq) for i in freq]
    outer_sum = 0
    for i in range(len(norm_freq)):
        nkit = norm_freq[i]
        inner_sum = 0
        for j in range(len(norm_freq)):
            npit = norm_freq[j]
            dkp = abs(val[i] - val[j])
            inner_sum += npit*dkp
    outer_sum += (nkit**2)*inner_sum
    
    return outer_sum

output_df["Duca_Saving_S1"] = output_df.apply(lambda x: duca_saving(x)*1000, axis = 1)

# Drop "income" groupby column
output_df = output_df.drop(columns = ["dkp"], axis = 1)

output_df

Unnamed: 0_level_0,std_dev_age,native_share,education_variability,region_name,job_variability,frac_employed,median_income,gini_index,Lindqvist_Ostling_S1,Abramowitz_Saunders_S1,Duca_Saving_S1
jregion2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
East Midlands,18.275323,0.958919,462.0,East Midlands,462.0,0.511351,8678.310547,0.449646,5.253813,3.07027,0.915799
East of England,18.773603,0.959739,459.0,East of England,459.0,0.503808,9591.96582,0.493658,4.607416,2.993471,1.124444
London,18.378665,0.923409,463.0,London,463.0,0.514563,10688.22168,0.505179,6.72966,3.223301,3.909184
North East,18.756243,0.974206,251.5,North East,251.5,0.46627,8281.648926,0.456207,4.002635,3.079365,0.514903
North West,18.88878,0.972397,633.5,North West,633.5,0.495268,9418.571777,0.455936,4.268719,2.992114,6.244049
Scotland,18.180234,0.969923,1745.0,Scotland,1745.0,0.486107,8709.897461,0.453888,3.963309,3.156116,1.237197
South East,18.342758,0.952381,734.5,South East,734.5,0.521769,10013.899414,0.463005,5.120099,3.065306,0.837568
South West,19.18972,0.958635,483.0,South West,483.0,0.490176,8671.80957,0.472033,4.616125,3.01241,2.45048
Wales,18.570539,0.959347,1500.0,Wales,1500.0,0.410863,8300.0,0.468485,5.279607,3.126624,3.590548
West Midlands,18.821393,0.971787,478.0,West Midlands,478.0,0.500522,8520.920898,0.446613,3.681433,2.980146,2.403976


In [9]:
# Define the dictionary to encode the "aopsocd" column which is Statement 2 (S2)
# S2: "Major public services and industries ought to be in state ownership."

jopsocd_dict = {"Strongly agree": 1,"Agree":2, "Neither agree/disagree": 3 , "Disagree":4 , "Strongly disagree": 5,
                "don't know": -1 , "missing or wild": -9, "inapplicable":-8,
                "proxy and/or phone":-7, "refused":-2}

# Create a new column "aopsocc_encoded" in the dataframe by replacing the values with encoded values from the dictionary
df_residence["jopsocd_encoded"] = df_residence["jopsocd"].replace(jopsocd_dict)

# Political polarization measurement by Lindqvist and Ostling (2010)
output_df["Lindqvist_Ostling_S2"] = df_residence.groupby("jregion2")["jopsocd_encoded"].agg(["var"])

# Create new columns based on absolute sum of encoded values of opinions and counts
output_df["abs_sum"] = df_residence.groupby("jregion2")["jopsocd_encoded"].apply(lambda x: x.abs().sum())
output_df["counts"] = df_residence.groupby("jregion2")["jopsocd_encoded"].apply(lambda x: x.count())

# Political polarization measurement by Abramowitz and Saunders (2008)
output_df["Abramowitz_Saunders_S2"] = output_df.apply(lambda x: x["abs_sum"]/x["counts"], axis = 1)

output_df = output_df.drop(columns = ["abs_sum", "counts"], axis = 1)

# Group the dataframe by county and jopsocd_encoded values
output_df["dkp"] = df_residence.groupby("jregion2")["jopsocd_encoded"]

# Political polarization measurement by Duca and Saving (2016)
def duca_saving(x):
    county_tuple = x["dkp"]
    freq = list(county_tuple[1].value_counts())
    val = list(county_tuple[1].unique())
    norm_freq = [i/sum(freq) for i in freq]
    outer_sum = 0
    for i in range(len(norm_freq)):
        nkit = norm_freq[i]
        inner_sum = 0
        for j in range(len(norm_freq)):
            npit = norm_freq[j]
            dkp = abs(val[i] - val[j])
            inner_sum += npit*dkp
    outer_sum += (nkit**2)*inner_sum
    
    return outer_sum

output_df["Duca_Saving_S2"] = output_df.apply(lambda x: duca_saving(x)*1000, axis = 1)

# Drop "income" groupby column
output_df = output_df.drop(columns = ["dkp"], axis = 1)

output_df

Unnamed: 0_level_0,std_dev_age,native_share,education_variability,region_name,job_variability,frac_employed,median_income,gini_index,Lindqvist_Ostling_S1,Abramowitz_Saunders_S1,Duca_Saving_S1,Lindqvist_Ostling_S2,Abramowitz_Saunders_S2,Duca_Saving_S2
jregion2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
East Midlands,18.275323,0.958919,462.0,East Midlands,462.0,0.511351,8678.310547,0.449646,5.253813,3.07027,0.915799,5.346419,3.014054,4.935704
East of England,18.773603,0.959739,459.0,East of England,459.0,0.503808,9591.96582,0.493658,4.607416,2.993471,1.124444,4.751783,3.112078,3.207456
London,18.378665,0.923409,463.0,London,463.0,0.514563,10688.22168,0.505179,6.72966,3.223301,3.909184,6.582373,3.07767,2.389909
North East,18.756243,0.974206,251.5,North East,251.5,0.46627,8281.648926,0.456207,4.002635,3.079365,0.514903,3.91019,3.011905,1.882896
North West,18.88878,0.972397,633.5,North West,633.5,0.495268,9418.571777,0.455936,4.268719,2.992114,6.244049,4.208074,2.909306,7.111593
Scotland,18.180234,0.969923,1745.0,Scotland,1745.0,0.486107,8709.897461,0.453888,3.963309,3.156116,1.237197,3.631168,2.839015,1.319741
South East,18.342758,0.952381,734.5,South East,734.5,0.521769,10013.899414,0.463005,5.120099,3.065306,0.837568,5.171106,3.060544,2.077067
South West,19.18972,0.958635,483.0,South West,483.0,0.490176,8671.80957,0.472033,4.616125,3.01241,2.45048,4.684634,3.074457,2.546031
Wales,18.570539,0.959347,1500.0,Wales,1500.0,0.410863,8300.0,0.468485,5.279607,3.126624,3.590548,4.830441,2.816728,3.010883
West Midlands,18.821393,0.971787,478.0,West Midlands,478.0,0.500522,8520.920898,0.446613,3.681433,2.980146,2.403976,3.828992,2.960293,2.437776


In [11]:
# Define the dictionary to encode the "aopsocd" column which is Statement 3 (S3)
# S3: "It is the government’s responsibility to provide a job for everyone who wants one."

jopsoce_dict = {"Strongly agree": 2,"Agree":1, "Neither agree/disagree": 0, "Disagree":-1 , "Strongly disagree": -2,
                "don't know": -3, "missing or wild": -3, "inapplicable": -3, "proxy and/or phone":-3, "refused":-3}

# Create a new column "aopsocd_encoded" in the dataframe by replacing the values with encoded values from the dictionary
df_residence["jopsoce_encoded"] = df_residence["jopsoce"].replace(jopsoce_dict)
df_residence = df_residence[df_residence["jopsoce_encoded"] != -3]

# Political polarization measurement by Lindqvist and Ostling (2010)
output_df["Lindqvist_Ostling_S3"] = df_residence.groupby("jregion2")["jopsoce_encoded"].agg(["var"])

# Create new columns based on absolute sum of encoded values of opinions and counts
output_df["abs_sum"] = df_residence.groupby("jregion2")["jopsoce_encoded"].apply(lambda x: x.abs().sum())
output_df["counts"] = df_residence.groupby("jregion2")["jopsoce_encoded"].apply(lambda x: x.count())

# Political polarization measurement by Abramowitz and Saunders (2008)
output_df["Abramowitz_Saunders_S3"] = output_df.apply(lambda x: x["abs_sum"]/x["counts"], axis = 1)

output_df = output_df.drop(columns = ["abs_sum", "counts"], axis = 1)

# Group the dataframe by county and aopsocc_encoded values
output_df["dkp"] = df_residence.groupby("jregion2")["jopsoce_encoded"]

# Political polarization measurement by Duca and Saving (2016)
def duca_saving(x):
    county_tuple = x["dkp"]
    freq = list(county_tuple[1].value_counts())
    val = list(county_tuple[1].unique())
    norm_freq = [i/sum(freq) for i in freq]
    outer_sum = 0
    for i in range(len(norm_freq)):
        nkit = norm_freq[i]
        inner_sum = 0
        for j in range(len(norm_freq)):
            npit = norm_freq[j]
            dkp = abs(val[i] - val[j])
            inner_sum += npit*dkp
    outer_sum += (nkit**2)*inner_sum
    
    return outer_sum

output_df["Duca_Saving_S3"] = output_df.apply(lambda x: duca_saving(x)*1000, axis = 1)

# Drop "income" groupby column
output_df = output_df.drop(columns = ["dkp"], axis = 1)

output_df

Unnamed: 0_level_0,std_dev_age,native_share,education_variability,region_name,job_variability,frac_employed,median_income,gini_index,Lindqvist_Ostling_S1,Abramowitz_Saunders_S1,Duca_Saving_S1,Lindqvist_Ostling_S2,Abramowitz_Saunders_S2,Duca_Saving_S2,Lindqvist_Ostling_S3,Abramowitz_Saunders_S3,Duca_Saving_S3
jregion2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
East Midlands,18.275323,0.958919,462.0,East Midlands,462.0,0.511351,8678.310547,0.449646,5.253813,3.07027,0.915799,5.346419,3.014054,4.935704,1.092008,0.930524,1.814755
East of England,18.773603,0.959739,459.0,East of England,459.0,0.503808,9591.96582,0.493658,4.607416,2.993471,1.124444,4.751783,3.112078,3.207456,1.054024,0.87858,1.921014
London,18.378665,0.923409,463.0,London,463.0,0.514563,10688.22168,0.505179,6.72966,3.223301,3.909184,6.582373,3.07767,2.389909,1.15084,0.936195,4.789184
North East,18.756243,0.974206,251.5,North East,251.5,0.46627,8281.648926,0.456207,4.002635,3.079365,0.514903,3.91019,3.011905,1.882896,1.116564,1.012245,0.673342
North West,18.88878,0.972397,633.5,North West,633.5,0.495268,9418.571777,0.455936,4.268719,2.992114,6.244049,4.208074,2.909306,7.111593,1.157032,0.976132,3.00952
Scotland,18.180234,0.969923,1745.0,Scotland,1745.0,0.486107,8709.897461,0.453888,3.963309,3.156116,1.237197,3.631168,2.839015,1.319741,1.11796,0.973614,1.286461
South East,18.342758,0.952381,734.5,South East,734.5,0.521769,10013.899414,0.463005,5.120099,3.065306,0.837568,5.171106,3.060544,2.077067,1.084235,0.891911,4.812944
South West,19.18972,0.958635,483.0,South West,483.0,0.490176,8671.80957,0.472033,4.616125,3.01241,2.45048,4.684634,3.074457,2.546031,1.082817,0.891892,3.514938
Wales,18.570539,0.959347,1500.0,Wales,1500.0,0.410863,8300.0,0.468485,5.279607,3.126624,3.590548,4.830441,2.816728,3.010883,1.167812,0.990182,2.823559
West Midlands,18.821393,0.971787,478.0,West Midlands,478.0,0.500522,8520.920898,0.446613,3.681433,2.980146,2.403976,3.828992,2.960293,2.437776,1.155041,0.968547,2.189734


In [11]:
output_df.reset_index(drop=True, inplace=True)
output_df

Unnamed: 0,std_dev_age,native_share,education_variability,region_name,job_variability,frac_employed,median_income,gini_index,Lindqvist_Ostling_S1,Abramowitz_Saunders_S1,Duca_Saving_S1,Lindqvist_Ostling_S2,Abramowitz_Saunders_S2,Duca_Saving_S2,Lindqvist_Ostling_S3,Abramowitz_Saunders_S3,Duca_Saving_S3
0,18.275323,0.958919,462.0,East Midlands,462.0,0.511351,8678.310547,0.449646,5.253813,3.07027,0.000916,5.346419,3.014054,0.004936,4.832573,2.927568,0.000474
1,18.773603,0.959739,459.0,East of England,459.0,0.503808,9591.96582,0.493658,4.607416,2.993471,0.001124,4.751783,3.112078,0.003207,4.659702,3.031556,0.00063
2,18.378665,0.923409,463.0,London,463.0,0.514563,10688.22168,0.505179,6.72966,3.223301,0.003909,6.582373,3.07767,0.00239,6.60639,3.080906,0.000546
3,18.756243,0.974206,251.5,North East,251.5,0.46627,8281.648926,0.456207,4.002635,3.079365,0.000515,3.91019,3.011905,0.001883,3.263072,2.670635,3.7e-05
4,18.88878,0.972397,633.5,North West,633.5,0.495268,9418.571777,0.455936,4.268719,2.992114,0.006244,4.208074,2.909306,0.007112,3.729493,2.774448,0.002166
5,18.180234,0.969923,1745.0,Scotland,1745.0,0.486107,8709.897461,0.453888,3.963309,3.156116,0.001237,3.631168,2.839015,0.00132,3.217043,2.750788,0.000227
6,18.342758,0.952381,734.5,South East,734.5,0.521769,10013.899414,0.463005,5.120099,3.065306,0.000838,5.171106,3.060544,0.002077,5.184116,3.118367,0.000222
7,19.18972,0.958635,483.0,South West,483.0,0.490176,8671.80957,0.472033,4.616125,3.01241,0.00245,4.684634,3.074457,0.002546,4.523064,3.102378,0.000154
8,18.570539,0.959347,1500.0,Wales,1500.0,0.410863,8300.0,0.468485,5.279607,3.126624,0.003591,4.830441,2.816728,0.003011,4.55425,2.810063,0.000558
9,18.821393,0.971787,478.0,West Midlands,478.0,0.500522,8520.920898,0.446613,3.681433,2.980146,0.002404,3.828992,2.960293,0.002438,3.507413,2.848485,0.000745


In [12]:
output_df.to_csv(r"C:\Users\Sidrcs\Documents\Github\Geog_575_Final_Project\data\polarization2000_data.csv")