### Political Polarization Calculations using BHPS 2000 data

In [1]:
import pandas as pd
import os
import time
import numpy as np

In [2]:
os.chdir(r"A:\UW-Madison\GIS SPRING 2023\Geog 575\Final_Project\SPSS_Processed\UKDA-5151-spss\spss\spss19")

In [3]:
df_residence = pd.read_csv(r"A:\UW-Madison\GIS SPRING 2023\Geog 575\Final_Project\SPSS_Processed\UKDA-5151-spss\spss\spss19\bhps_w10\jindresp.csv")

  exec(code_obj, self.user_global_ns, self.user_ns)


### Interested variables for visualization
Look for <code>jindresp.csv</code> | Required variables are: (These are columns names and their corresponding interpretation) <ul><li><code>jopsocc</code> : "Private enterprise is the best way to solve Britain’s economic problems" (S1)</li> <li><code>jopsocd</code> : "Major public services and industries ought to be in state ownership" (S2)</li> <li><code>jopsoce</code> : "It is the government’s responsibility to provide a job for everyone who wants one" (S3)</li> <li><code>jfiyr</code> : <b>Self-reported annual income</b> </li> <li><code>jfetype</code> : <b>Educational qualification</b></li> <li><code>jplbornc_cc</code> : <b>Country of birth</b></li> <li><code>jage12</code> : <b>Age</b></li> <li><code>jjbstatt</code> : <b>Employment status</b></li> <li><code>jregion2</code> : <b>UK region name</b> </li></ul>

In [5]:
# Loading variables of interest from 2000 BHPS dataset into dataframe
df_residence = df_residence[["jopsocc","jopsocd","jopsoce","jfiyr","jfetype","jage12","jjbstatt","jregion2", "jplbornc_cc"]]

In [6]:
# Dropping rows if they donot have any spatial information
df_residence = df_residence[~df_residence["jregion2"].isin(["don't know", "inapplicable", "missing or wild", "refused"])]

In [7]:
# To view columns
df_residence.head()

Unnamed: 0,jopsocc,jopsocd,jopsoce,jfiyr,jfetype,jage12,jjbstatt,jregion2,jplbornc_cc
0,Agree,Disagree,Disagree,30849.408203125,inapplicable,66.0,Self employed,London,inapplicable
1,Agree,Neither agree/disagree,Disagree,2553.462890625,inapplicable,63.0,Family care,London,inapplicable
2,proxy and/or phone,proxy and/or phone,proxy and/or phone,proxy and/or phone,proxy and/or phone,45.0,proxy and/or phone,London,proxy and/or phone
3,proxy and/or phone,proxy and/or phone,proxy and/or phone,proxy and/or phone,proxy and/or phone,19.0,proxy and/or phone,London,proxy and/or phone
4,Agree,Agree,Disagree,6754.79931640625,inapplicable,21.0,"FT studt, school",London,inapplicable


In [8]:
df_residence['jopsocc'].value_counts()

Neither agree/disagree    5819
Disagree                  4014
Agree                     3189
don't know                 873
Strongly disagree          579
proxy and/or phone         510
Strongly agree             429
Name: jopsocc, dtype: int64

In [9]:
# create an empty dataframe
output_df = pd.DataFrame()

# Group the dataframe by county and calculate standard deviation of age : "jage12"
output_df["std_dev_age"] = df_residence.groupby("jregion2")["jage12"].agg("std")

# Group the dataframe by county and number of people born in UK : "jplbornc_cc"
output_df["born"] = df_residence.groupby("jregion2")["jplbornc_cc"]

# Assuming inapplicable as people born in UK to all others
def native_share(x):
    pop_count = list(x["born"][1])
    native_count = pop_count.count("inapplicable")/len(pop_count)
    return native_count

# calculating fraction of natives in each county
output_df["native_share"] = output_df.apply(lambda x: native_share(x), axis = 1)

# Dropping "born" groupby column
output_df = output_df.drop(columns = ["born"], axis = 1)

# Creating afetype_dict based on PDF from 2000 BHPS data
jfetype_dict = {"Nursing school etc" : 1, "College of f educ" : 2, "Other trng establmnt" : 3, "Polytechnic" : 4, \
                "University" : 5, "None of the above" : 7, "don't know" : -1, "missing or wild" : -9, \
                    "inapplicable" : -8, "proxy and/or phone" : -7, "refused" : -2}


# Create a new column "jfetype_encoded" in the dataframe by replacing the values with encoded values from the dictionary
#df_residence["jfetype_encoded"] = df_residence["jfetype"].replace(jfetype_dict)
df_residence.loc[:, "jfetype_encoded"] = df_residence["jfetype"].replace(jfetype_dict)

# Group the dataframe by county and "jfetype_encoded"
output_df["education"] = df_residence.groupby("jregion2")["jfetype_encoded"]

# function to calculate dissimilarity in education
def dissimilarity_education(x):
    # loads tuple element into job
    job = x["education"][1]
    # loads frequency of each education type in county as list
    freq = list(job.value_counts())
    # calculates total number of groups
    s = len(freq)
    dissim_index = 0
    for i in freq:
        # formula of dissimilarity index
        dissim_index += 0.5 * abs(i-(1/s))
    return dissim_index

def region_name(x):
    return x["education"][0]

output_df["education_variability"] = output_df.apply(lambda x: dissimilarity_education(x), axis = 1)
output_df["region_name"] = output_df.apply(lambda x: region_name(x), axis = 1)

# Dropping "education" groupby column
output_df = output_df.drop(columns = ["education"], axis = 1)

# Creating ajbstatt_dict based on PDF from 2000 BHPS data
jjbstatt_dict = {"Self employed" : 1, "In paid employ" : 2, "Unemployed" : 3, "Retired" : 4, "Maternity leave" : 5, \
                    "Family care" : 6,  "FT studt, school" : 7, " LT sick, disabld" : 8, \
                    "Govt trng scheme" : 9, "Something else" : 10, "don't know" : -1, "missing or wild" : -9, \
                    "inapplicable" : -8, "proxy" : -7, "refused" : -2}


# Create a new column "jjbstatt_encoded" in the dataframe by replacing the values with encoded values from the dictionary
df_residence.loc[:, "jjbstatt_encoded"] = df_residence["jjbstatt"].replace(jjbstatt_dict)

# Group the dataframe by county and calculate the mean and standard deviation of the encoded values
output_df["job"] = df_residence.groupby("jregion2")["jjbstatt_encoded"]

# function to calculate dissimilarity in job
def dissimilarity_job(x):
    job = x["job"][1]
    freq = list(job.value_counts())
    s = len(freq)
    dissim_index = 0
    for i in freq:
        dissim_index += 0.5 * abs(i-(1/s))
    return dissim_index

# function to calculate fraction of employed people within each county
def frac_employed(x):
    job = list(x["job"][1])
    frac_emp = (job.count(2)/len(job))
    return frac_emp

output_df["job_variability"] = output_df.apply(lambda x: dissimilarity_job(x), axis = 1)
output_df ["frac_employed"] = output_df.apply(lambda x: frac_employed(x), axis = 1)

# Dropping "education" groupby column
output_df = output_df.drop(columns = ["job"], axis = 1)

# replacing all Not-A-Number cases to zero i.e., proxy values
#df_residence["jfiyr"] = df_residence["jfiyr"].replace("proxy and/or phone",0)
df_residence.loc[:, "jfiyr"] = df_residence["jfiyr"].replace("proxy and/or phone",0).astype("float32")

# Group the dataframe by county and calculate median based on self-reported annual income: "jfiyr"
output_df["median_income"] = df_residence.groupby("jregion2")["jfiyr"].agg(["median"])

# convert "afiyr" column to float data type
#df_residence["jfiyr"] = df_residence["jfiyr"].astype("float32")

# group incomes by county
output_df["income"] = df_residence.groupby("jregion2")["jfiyr"]

def gini_index(x):
    data = list(x["income"][1])
    # sort the data in ascending order
    sorted_data = np.sort(data)
    # calculate the cumulative sum of the sorted data
    cumsum_data = np.cumsum(sorted_data)
    # calculate the Lorenz curve values
    lorenz_curve = cumsum_data / cumsum_data[-1]
    # calculate the area under the Lorenz curve
    area_lorenz_curve = np.trapz(lorenz_curve, dx=1/len(data))
    # calculate the Gini index
    gini_index = 1 - 2 * area_lorenz_curve
    return gini_index

output_df["gini_index"] = output_df.apply(lambda x : gini_index(x), axis = 1)

# Drop "income" groupby column
output_df = output_df.drop(columns = ["income"], axis = 1)

output_df

Unnamed: 0_level_0,std_dev_age,native_share,education_variability,region_name,job_variability,frac_employed,median_income,gini_index
jregion2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
East Midlands,18.275323,0.958919,462.0,East Midlands,462.0,0.511351,8678.310547,0.449646
East of England,18.773603,0.959739,459.0,East of England,459.0,0.503808,9591.96582,0.493658
London,18.378665,0.923409,463.0,London,463.0,0.514563,10688.22168,0.505179
North East,18.756243,0.974206,251.5,North East,251.5,0.46627,8281.648438,0.456207
North West,18.88878,0.972397,633.5,North West,633.5,0.495268,9418.572266,0.455936
Scotland,18.180234,0.969923,1745.0,Scotland,1745.0,0.486107,8709.897461,0.453888
South East,18.342758,0.952381,734.5,South East,734.5,0.521769,10013.899414,0.463005
South West,19.18972,0.958635,483.0,South West,483.0,0.490176,8671.80957,0.472033
Wales,18.570539,0.959347,1500.0,Wales,1500.0,0.410863,8300.0,0.468485
West Midlands,18.821393,0.971787,478.0,West Midlands,478.0,0.500522,8520.920898,0.446613


In [10]:
# Define the dictionary to encode the "jopsocc" column which is Statement 1 (S1)
# S1: "Private enterprise is the best way to solve Britain’s economic problems."
jopsocc_dict = {"Strongly agree": 2,"Agree":1, "Neither agree/disagree":0, "Disagree":-1 , "Strongly disagree": -2,
                "Not agree, disagree": -3 , 
                "don't know": -3 , "missing or wild": -3, "inapplicable":-3,
                "proxy and/or phone":-3, "refused":-3}

In [15]:
# Create a new column "jopsocc_encoded" in the dataframe by replacing the values with encoded values from the dictionary
#df_residence["jopsocc_encoded"] = df_residence["jopsocc"].replace(jopsocc_dict)
df_residence.loc[:, "jopsocc_encoded"] = df_residence["jopsocc"].replace(jopsocc_dict)

df_residence["jopsocc_encoded"].value_counts()

 0    5819
-1    4014
 1    3189
-2     579
 2     429
Name: jopsocc_encoded, dtype: int64

In [16]:
# Define the dictionary to encode the "jopsocc" column which is Statement 1 (S1)
# S1: "Private enterprise is the best way to solve Britain’s economic problems."
jopsocc_dict = {"Strongly agree": 2,"Agree":1, "Neither agree/disagree":0, "Disagree":-1 , "Strongly disagree": -2,
                "Not agree, disagree": -3 , 
                "don't know": -3 , "missing or wild": -3, "inapplicable":-3,
                "proxy and/or phone":-3, "refused":-3}


# Create a new column "jopsocc_encoded" in the dataframe by replacing the values with encoded values from the dictionary
#df_residence["jopsocc_encoded"] = df_residence["jopsocc"].replace(jopsocc_dict)
df_residence.loc[:, "jopsocc_encoded"] = df_residence["jopsocc"].replace(jopsocc_dict)

#remove -3 valued rows from df_residence[jopsocc_encoded]
df_residence = df_residence[df_residence['jopsocc_encoded'] != -3]

# Political polarization measurement by Lindqvist and Ostling (2010)
output_df["Lindqvist_Ostling_S1"] = df_residence.groupby("jregion2")["jopsocc_encoded"].agg(["var"])

# Create new columns based on absolute sum of encoded values of opinions and counts
output_df["abs_sum"] = df_residence.groupby("jregion2")["jopsocc_encoded"].apply(lambda x: x.abs().sum())
output_df["counts"] = df_residence.groupby("jregion2")["jopsocc_encoded"].apply(lambda x: x.count())

# Political polarization measurement by Abramowitz and Saunders (2008)
output_df["Abramowitz_Saunders_S1"] = output_df.apply(lambda x: x["abs_sum"]/x["counts"], axis = 1)

output_df = output_df.drop(columns = ["abs_sum", "counts"], axis = 1)

# Group the dataframe by county and jopsocc_encoded values
output_df["dkp"] = df_residence.groupby("jregion2")["jopsocc_encoded"]

# Political polarization measurement by Duca and Saving (2016)
def duca_saving(x):
    county_tuple = x["dkp"]
    freq = list(county_tuple[1].value_counts())
    val = list(county_tuple[1].unique())
    norm_freq = [i/sum(freq) for i in freq]
    outer_sum = 0
    for i in range(len(norm_freq)):
        nkit = norm_freq[i]
        inner_sum = 0
        for j in range(len(norm_freq)):
            npit = norm_freq[j]
            dkp = abs(val[i] - val[j])
            inner_sum += npit*dkp
    outer_sum += (nkit**2)*inner_sum
    
    return outer_sum

output_df["Duca_Saving_S1"] = output_df.apply(lambda x: duca_saving(x)*1000, axis = 1)

# Drop "income" groupby column
output_df = output_df.drop(columns = ["dkp"], axis = 1)

output_df

Unnamed: 0_level_0,std_dev_age,native_share,education_variability,region_name,job_variability,frac_employed,median_income,gini_index,Lindqvist_Ostling_S1,Abramowitz_Saunders_S1,Duca_Saving_S1
jregion2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
East Midlands,18.275323,0.958919,462.0,East Midlands,462.0,0.511351,8678.310547,0.449646,0.678167,0.58263,0.946555
East of England,18.773603,0.959739,459.0,East of England,459.0,0.503808,9591.96582,0.493658,0.74515,0.618992,1.015369
London,18.378665,0.923409,463.0,London,463.0,0.514563,10688.22168,0.505179,0.82421,0.683649,2.306835
North East,18.756243,0.974206,251.5,North East,251.5,0.46627,8281.648438,0.456207,0.770024,0.666667,0.574462
North West,18.88878,0.972397,633.5,North West,633.5,0.495268,9418.572266,0.455936,0.8371,0.660854,2.728407
Scotland,18.180234,0.969923,1745.0,Scotland,1745.0,0.486107,8709.897461,0.453888,0.820432,0.699466,1.579434
South East,18.342758,0.952381,734.5,South East,734.5,0.521769,10013.899414,0.463005,0.646346,0.568468,0.430655
South West,19.18972,0.958635,483.0,South West,483.0,0.490176,8671.80957,0.472033,0.767579,0.631461,2.223928
Wales,18.570539,0.959347,1500.0,Wales,1500.0,0.410863,8300.0,0.468485,0.879592,0.71875,2.375814
West Midlands,18.821393,0.971787,478.0,West Midlands,478.0,0.500522,8520.920898,0.446613,0.735306,0.612867,1.605844


In [17]:
# Define the dictionary to encode the "jopsocd" column which is Statement 2 (S2)
# S2: "Major public services and industries ought to be in state ownership."

jopsocd_dict = {"Strongly agree": 2,"Agree":1, "Neither agree/disagree":0, "Disagree":-1 , "Strongly disagree": -2,
                "Not agree, disagree": -3 , 
                "don't know": -3 , "missing or wild": -3, "inapplicable":-3,
                "proxy and/or phone":-3, "refused":-3}

# Create a new column "jopsocc_encoded" in the dataframe by replacing the values with encoded values from the dictionary
#df_residence["jopsocd_encoded"] = df_residence["jopsocd"].replace(jopsocd_dict)
df_residence.loc[:, "jopsocd_encoded"] = df_residence["jopsocd"].replace(jopsocd_dict)

#remove -3 valued rows from df_residence[jopsocc_encoded]
df_residence = df_residence[df_residence['jopsocd_encoded'] != -3]

# Political polarization measurement by Lindqvist and Ostling (2010)
output_df["Lindqvist_Ostling_S2"] = df_residence.groupby("jregion2")["jopsocd_encoded"].agg(["var"])

# Create new columns based on absolute sum of encoded values of opinions and counts
output_df["abs_sum"] = df_residence.groupby("jregion2")["jopsocd_encoded"].apply(lambda x: x.abs().sum())
output_df["counts"] = df_residence.groupby("jregion2")["jopsocd_encoded"].apply(lambda x: x.count())

# Political polarization measurement by Abramowitz and Saunders (2008)
output_df["Abramowitz_Saunders_S2"] = output_df.apply(lambda x: x["abs_sum"]/x["counts"], axis = 1)

output_df = output_df.drop(columns = ["abs_sum", "counts"], axis = 1)

# Group the dataframe by county and jopsocc_encoded values
output_df["dkp"] = df_residence.groupby("jregion2")["jopsocd_encoded"]

# Political polarization measurement by Duca and Saving (2016)
def duca_saving(x):
    county_tuple = x["dkp"]
    freq = list(county_tuple[1].value_counts())
    val = list(county_tuple[1].unique())
    norm_freq = [i/sum(freq) for i in freq]
    outer_sum = 0
    for i in range(len(norm_freq)):
        nkit = norm_freq[i]
        inner_sum = 0
        for j in range(len(norm_freq)):
            npit = norm_freq[j]
            dkp = abs(val[i] - val[j])
            inner_sum += npit*dkp
    outer_sum += (nkit**2)*inner_sum
    
    return outer_sum

output_df["Duca_Saving_S2"] = output_df.apply(lambda x: duca_saving(x)*1000, axis = 1)

# Drop "income" groupby column
output_df = output_df.drop(columns = ["dkp"], axis = 1)

In [18]:
# Define the dictionary to encode the "jopsocd" column which is Statement 3 (S3)
# S3: "It is the government’s responsibility to provide a job for everyone who wants one."

jopsoce_dict = {"Strongly agree": 2,"Agree":1, "Neither agree/disagree":0, "Disagree":-1 , "Strongly disagree": -2,
                "Not agree, disagree": -3 , 
                "don't know": -3 , "missing or wild": -3, "inapplicable":-3,
                "proxy and/or phone":-3, "refused":-3}

# Create a new column "jopsocd_encoded" in the dataframe by replacing the values with encoded values from the dictionary
#df_residence["jopsoce_encoded"] = df_residence["jopsoce"].replace(jopsoce_dict)
df_residence.loc[:, "jopsoce_encoded"] = df_residence["jopsoce"].replace(jopsoce_dict)

#remove -3 valued rows from df_residence[jopsocc_encoded]
df_residence = df_residence[df_residence['jopsoce_encoded'] != -3]

# Political polarization measurement by Lindqvist and Ostling (2010)
output_df["Lindqvist_Ostling_S3"] = df_residence.groupby("jregion2")["jopsoce_encoded"].agg(["var"])

# Create new columns based on absolute sum of encoded values of opinions and counts
output_df["abs_sum"] = df_residence.groupby("jregion2")["jopsoce_encoded"].apply(lambda x: x.abs().sum())
output_df["counts"] = df_residence.groupby("jregion2")["jopsoce_encoded"].apply(lambda x: x.count())

# Political polarization measurement by Abramowitz and Saunders (2008)
output_df["Abramowitz_Saunders_S3"] = output_df.apply(lambda x: x["abs_sum"]/x["counts"], axis = 1)

output_df = output_df.drop(columns = ["abs_sum", "counts"], axis = 1)

# Group the dataframe by county and eopsocc_encoded values
output_df["dkp"] = df_residence.groupby("jregion2")["jopsoce_encoded"]

# Political polarization measurement by Duca and Saving (2016)
def duca_saving(x):
    county_tuple = x["dkp"]
    freq = list(county_tuple[1].value_counts())
    val = list(county_tuple[1].unique())
    norm_freq = [i/sum(freq) for i in freq]
    outer_sum = 0
    for i in range(len(norm_freq)):
        nkit = norm_freq[i]
        inner_sum = 0
        for j in range(len(norm_freq)):
            npit = norm_freq[j]
            dkp = abs(val[i] - val[j])
            inner_sum += npit*dkp
    outer_sum += (nkit**2)*inner_sum
    
    return outer_sum

output_df["Duca_Saving_S3"] = output_df.apply(lambda x: duca_saving(x)*1000, axis = 1)

# Drop "income" groupby column
output_df = output_df.drop(columns = ["dkp"], axis = 1)

output_df

Unnamed: 0_level_0,std_dev_age,native_share,education_variability,region_name,job_variability,frac_employed,median_income,gini_index,Lindqvist_Ostling_S1,Abramowitz_Saunders_S1,Duca_Saving_S1,Lindqvist_Ostling_S2,Abramowitz_Saunders_S2,Duca_Saving_S2,Lindqvist_Ostling_S3,Abramowitz_Saunders_S3,Duca_Saving_S3
jregion2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
East Midlands,18.275323,0.958919,462.0,East Midlands,462.0,0.511351,8678.310547,0.449646,0.678167,0.58263,0.946555,0.880662,0.724691,1.416502,1.10594,0.92698,1.983359
East of England,18.773603,0.959739,459.0,East of England,459.0,0.503808,9591.96582,0.493658,0.74515,0.618992,1.015369,0.894472,0.724057,4.722817,1.055868,0.877069,2.056166
London,18.378665,0.923409,463.0,London,463.0,0.514563,10688.22168,0.505179,0.82421,0.683649,2.306835,0.989267,0.789474,3.302728,1.160025,0.9375,5.162416
North East,18.756243,0.974206,251.5,North East,251.5,0.46627,8281.648438,0.456207,0.770024,0.666667,0.574462,0.955132,0.770089,4.428563,1.141374,1.002232,0.817068
North West,18.88878,0.972397,633.5,North West,633.5,0.495268,9418.572266,0.455936,0.8371,0.660854,2.728407,0.97148,0.766071,3.674518,1.162476,0.972172,2.838004
Scotland,18.180234,0.969923,1745.0,Scotland,1745.0,0.486107,8709.897461,0.453888,0.820432,0.699466,1.579434,0.973958,0.790968,1.885656,1.125558,0.965997,1.349256
South East,18.342758,0.952381,734.5,South East,734.5,0.521769,10013.899414,0.463005,0.646346,0.568468,0.430655,0.907561,0.744776,1.966269,1.095113,0.894461,5.264233
South West,19.18972,0.958635,483.0,South West,483.0,0.490176,8671.80957,0.472033,0.767579,0.631461,2.223928,0.95881,0.747429,4.491554,1.061933,0.881881,3.028365
Wales,18.570539,0.959347,1500.0,Wales,1500.0,0.410863,8300.0,0.468485,0.879592,0.71875,2.375814,1.044828,0.848289,2.14493,1.170458,0.983193,3.074358
West Midlands,18.821393,0.971787,478.0,West Midlands,478.0,0.500522,8520.920898,0.446613,0.735306,0.612867,1.605844,0.947477,0.759302,1.999748,1.152402,0.96028,2.401548


In [19]:
output_df.reset_index(drop=True, inplace=True)
output_df

Unnamed: 0,std_dev_age,native_share,education_variability,region_name,job_variability,frac_employed,median_income,gini_index,Lindqvist_Ostling_S1,Abramowitz_Saunders_S1,Duca_Saving_S1,Lindqvist_Ostling_S2,Abramowitz_Saunders_S2,Duca_Saving_S2,Lindqvist_Ostling_S3,Abramowitz_Saunders_S3,Duca_Saving_S3
0,18.275323,0.958919,462.0,East Midlands,462.0,0.511351,8678.310547,0.449646,0.678167,0.58263,0.946555,0.880662,0.724691,1.416502,1.10594,0.92698,1.983359
1,18.773603,0.959739,459.0,East of England,459.0,0.503808,9591.96582,0.493658,0.74515,0.618992,1.015369,0.894472,0.724057,4.722817,1.055868,0.877069,2.056166
2,18.378665,0.923409,463.0,London,463.0,0.514563,10688.22168,0.505179,0.82421,0.683649,2.306835,0.989267,0.789474,3.302728,1.160025,0.9375,5.162416
3,18.756243,0.974206,251.5,North East,251.5,0.46627,8281.648438,0.456207,0.770024,0.666667,0.574462,0.955132,0.770089,4.428563,1.141374,1.002232,0.817068
4,18.88878,0.972397,633.5,North West,633.5,0.495268,9418.572266,0.455936,0.8371,0.660854,2.728407,0.97148,0.766071,3.674518,1.162476,0.972172,2.838004
5,18.180234,0.969923,1745.0,Scotland,1745.0,0.486107,8709.897461,0.453888,0.820432,0.699466,1.579434,0.973958,0.790968,1.885656,1.125558,0.965997,1.349256
6,18.342758,0.952381,734.5,South East,734.5,0.521769,10013.899414,0.463005,0.646346,0.568468,0.430655,0.907561,0.744776,1.966269,1.095113,0.894461,5.264233
7,19.18972,0.958635,483.0,South West,483.0,0.490176,8671.80957,0.472033,0.767579,0.631461,2.223928,0.95881,0.747429,4.491554,1.061933,0.881881,3.028365
8,18.570539,0.959347,1500.0,Wales,1500.0,0.410863,8300.0,0.468485,0.879592,0.71875,2.375814,1.044828,0.848289,2.14493,1.170458,0.983193,3.074358
9,18.821393,0.971787,478.0,West Midlands,478.0,0.500522,8520.920898,0.446613,0.735306,0.612867,1.605844,0.947477,0.759302,1.999748,1.152402,0.96028,2.401548


In [20]:
output_df.to_csv(r"C:\Users\ramak\Desktop\Geog573_Lab\Geog_575_final_project\Final_output\polarization2000_data.csv")