In [1]:
import glob
import pandas as pd
import numpy as np
import os
import csv
import sys

import random

In [4]:
#Read CSV file containing the persons from the North East of England
df_persons_NE_Household_composition__dir = r'C:\Users\b9055315\Documents\PhD_PROJECT\Synthetic_population_developement\SPENSER\Data\NE_only' # use your path
df_persons_NE_Household_composition__file = os.path.join(df_persons_NE_Household_composition__dir, "df_persons_NE_clean.csv")
df_persons_NE_Household_composition = pd.read_csv(df_persons_NE_Household_composition__file, index_col=None, header=0)

In [6]:
#Read CSV file containing the households from the North East of England
df_households_NE_extended__dir = r'C:\Users\b9055315\Documents\PhD_PROJECT\Synthetic_population_developement\SPENSER\Data\NE_only' # use your path
df_households_NE_extended_file = os.path.join(df_households_NE_extended__dir, "df_households_NE_clean.csv")
df_households_NE_extended = pd.read_csv(df_households_NE_extended_file, index_col=None, header=0)


In [7]:
#Read CSV file containing the classification of the OA:
Rural_Urban_OA_dir = r'C:\Users\b9055315\Documents\PhD_PROJECT\Synthetic_population_developement\Data\ONS\Rural_Urban_OAlevels\RUC11_OA11_EW' # use your path
df_Rural_Urban_OA_file = os.path.join(Rural_Urban_OA_dir, "RUC11_OA11_EW.csv")
df_Rural_Urban_OA = pd.read_csv(df_Rural_Urban_OA_file, index_col=None, header=0)

In [9]:
#Join df_persons_NE_Household_composition with df_Rural_Urban_OA in order to get the type of rural/urban area per OA area
df_persons_NE_Household_composition = df_persons_NE_Household_composition.merge(df_Rural_Urban_OA, left_on='Area_OA_x', right_on='OA11CD', how='left')

In [10]:
# Remove unnecesary columns
df_persons_NE_Household_composition= df_persons_NE_Household_composition.drop(['OA11CD', 'RUC11CD', 'BOUND_CHGIND', 'ASSIGN_CHGIND', 'ASSIGN_CHREASON'], axis = 1)

In [11]:
#Calculate the number of different urban classifications values
df_persons_NE_Household_composition['RUC11'].value_counts()

Urban major conurbation                                     1152170
Urban city and town                                          993870
Rural town and fringe                                        311330
Rural village                                                 65767
Rural hamlets and isolated dwellings                          42269
Rural town and fringe in a sparse setting                     28963
Rural village in a sparse setting                             19501
Rural hamlets and isolated dwellings in a sparse setting      18721
Urban city and town in a sparse setting                       12926
Name: RUC11, dtype: int64

In [12]:
#Replace RUC11 values for the ones that appear in the nts9901.ods table 
#(Full car driving licence holders by gender, region and Rural-Urban Classification1: 17 years old and over, England, 2002/03 and 2020)
#(4 categories only Urban Conurbation, Urban city and town, Rural Village, Hamlet and Isolated Dwelling, Rural town and fringe)

#Urban Conurbation
df_persons_NE_Household_composition.loc[df_persons_NE_Household_composition.RUC11 == 'Urban major conurbation', 'RUC11'] = 'Urban_Conurbation'
df_persons_NE_Household_composition.loc[df_persons_NE_Household_composition.RUC11 == 'Urban minor conurbation', 'RUC11'] = 'Urban_Conurbation'

#Urban city and town
df_persons_NE_Household_composition.loc[df_persons_NE_Household_composition.RUC11 == 'Urban city and town', 'RUC11'] = 'Urban_city_and_town'
df_persons_NE_Household_composition.loc[df_persons_NE_Household_composition.RUC11 == 'Urban city and town in a sparse setting', 'RUC11'] = 'Urban_city_and_town'

#Rural Village, Hamlet and Isolated Dwelling
df_persons_NE_Household_composition.loc[df_persons_NE_Household_composition.RUC11 == 'Rural hamlets and isolated dwellings', 'RUC11'] = 'Rural_Village_Hamlet_and_Isolated_Dwelling'
df_persons_NE_Household_composition.loc[df_persons_NE_Household_composition.RUC11 == 'Rural hamlets and isolated dwellings in a sparse setting', 'RUC11'] = 'Rural_Village_Hamlet_and_Isolated_Dwelling'
df_persons_NE_Household_composition.loc[df_persons_NE_Household_composition.RUC11 == 'Rural village', 'RUC11'] = 'Rural_Village_Hamlet_and_Isolated_Dwelling'
df_persons_NE_Household_composition.loc[df_persons_NE_Household_composition.RUC11 == 'Rural village in a sparse setting', 'RUC11'] = 'Rural_Village_Hamlet_and_Isolated_Dwelling'

#Rural town and fringe
df_persons_NE_Household_composition.loc[df_persons_NE_Household_composition.RUC11 == 'Rural town and fringe', 'RUC11'] = 'Rural_town_and_fringe'
df_persons_NE_Household_composition.loc[df_persons_NE_Household_composition.RUC11 == 'Rural town and fringe in a sparse setting', 'RUC11'] = 'Rural_town_and_fringe'


In [13]:
# AGAIN:
#Calculate the number of different urban classifications values
df_persons_NE_Household_composition['RUC11'].value_counts()

Urban_Conurbation                             1152170
Urban_city_and_town                           1006796
Rural_town_and_fringe                          340293
Rural_Village_Hamlet_and_Isolated_Dwelling     146258
Name: RUC11, dtype: int64

In [14]:
# Create a new empty column for the driving license (bool)
df_persons_NE_Household_composition["Driving_license"] = np.nan
df_persons_NE_Household_composition["Driving_license"] = df_persons_NE_Household_composition["Driving_license"].astype('bool')   
df_persons_NE_Household_composition["Driving_license"] = False

In [15]:
## FORCE ONLY ONE PERSON PER HOUSEHOLD TO BE ASSIGNED A DRIVING LICENCE IF HE/SHE LIVES IN A HOUSEHOLD WITH AT LEAST ONE CAR
# Create a dataframe only containing people older than 17 and with at least one car in the hosuehold
df_persons_household_car_17plus = df_persons_NE_Household_composition.loc[(df_persons_NE_Household_composition['LC4202_C_CARSNO_x'] > 1) & (df_persons_NE_Household_composition['Age'] > 17)]
df_persons_household_car_17plus.sample(frac=1)

# Create a dataframe containing only one person per household (based on 'VALUES FROM NTS0201' PK value) with at least one car
df_persons_driving_licence_forced = df_persons_household_car_17plus.drop_duplicates(subset = 'HID_AreaOA_x')



## SELECT THE REMAINING DRIVING LICENCE TO PEOPLE BASED ON NATIONAL TRAVEL SURVEY TABLES (NTS9901 AND NTS0201)
# List with the gender values (1:male, 2:female)
gender_list = [1,2]

## VALUES FROM NTS9901
# Save in a list the different values of urban/rural areas
#rural_urban_areas_names_list = ['Urban_Conurbation', 'Urban_city_and_town', 'Rural_town_and_fringe', 'Rural_Village_Hamlet_and_Isolated_Dwelling']

## VALUES FROM NTS0201
#List containg the range values of groups of age
age_range_list = [(18,20), (21,29), (30,39), (40,49), (50,59), (60,69), (70,120)]

# Values given by me to force to choose specific people when there is not a car, one car and more than one car available in the household
household_car_weight_list= [0.2,0.3,0.5]

## VALUES FROM NTS0201
# % of men by age that have a driving licence
men_driving_percentage_list = [0.34,0.65,0.83,0.89,0.89,0.90,0.81]

## VALUES FROM NTS0201
# % of women by age that have a driving licence [ADAPTED TO MATCH THE % IN THE NORTH EAST!!]
women_driving_percentage_list = [0.31,0.54,0.66,0.74,0.74,0.70,0.49]

#Create an empty list where the small blocks of dataframes will be stored
df_persons_temp = []



for gender in gender_list:
    
    # create a dataframe only containing males (1) or females (2)
    if gender ==1:
        df_gender = df_persons_NE_Household_composition.loc[(df_persons_NE_Household_composition['Sex'] == gender)]

    elif gender == 2:
        df_gender = df_persons_NE_Household_composition.loc[(df_persons_NE_Household_composition['Sex'] == gender)]
        
    
    for age_range in age_range_list:
            
        # Choose the values of the percentage of people driving based on their age and GENDER:
        if age_range == (18,20):
            if gender == 1:
                driving_percentage = men_driving_percentage_list[0]
            else:
                driving_percentage = women_driving_percentage_list[0]
        elif age_range == (21,29):
            if gender == 1:
                driving_percentage = men_driving_percentage_list[1]
            else:
                driving_percentage = women_driving_percentage_list[1]
        elif age_range == (30,39):
            if gender == 1:
                driving_percentage = men_driving_percentage_list[2]
            else:
                driving_percentage = women_driving_percentage_list[2]
        elif age_range == (40,49):
            if gender == 1:
                driving_percentage = men_driving_percentage_list[3]
            else:
                driving_percentage = women_driving_percentage_list[3]
        elif age_range == (50,59):
            if gender == 1:
                driving_percentage = men_driving_percentage_list[4]
            else:
                driving_percentage = women_driving_percentage_list[4]
        elif age_range == (60,69):
            if gender == 1:
                driving_percentage = men_driving_percentage_list[5]
            else:
                driving_percentage = women_driving_percentage_list[5]
        elif age_range == (70,120):
            if gender == 1: 
                driving_percentage = men_driving_percentage_list[6]
            else:
                driving_percentage = women_driving_percentage_list[6] 
                
                
        # Create a dataframe containing only the previous selected gender and one specific rural/urban area
        globals()[f"df_{gender}"] = df_gender
        
        # Create a dataframe containing only the previous selected gender and rural/urban area with a specific range of age (e.g. 17-20) 
        globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}"] = globals()[f"df_{gender}"].loc[((globals()[f"df_{gender}"]['Age'] >= age_range[0])) &((globals()[f"df_{gender}"]['Age'] <= age_range[1]))]
            
            
        # Dataframe containing only those people assigned a driving licence (forced) in a specific range of age (e.g. 17-20), sex and urban/rural location
        globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_forced"] = df_persons_driving_licence_forced.loc[(df_persons_driving_licence_forced['Age'] >= age_range[0]) & (df_persons_driving_licence_forced['Age'] <= age_range[1]) & (df_persons_driving_licence_forced['Sex'] == gender)]
            
            
        # Remove those forced people to have a driving licence and keep only those who can be assigned a driving licence based on age, sex and urban/rural location attribute values
        # Concatenate the dataframe containing all persons from the same sex, urban/rural area and range of year, with the ones from these categories that have been selected to get a driving licence    
        globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_remaining"] = (pd.concat([globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}"], globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_forced"]]))

        # Remove duplicates and keep only those who were not assigned a driving licence
        globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_remaining"] = globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_remaining"].drop_duplicates(keep = False)
            
        #print(len(globals()[f"df_{gender}_{area}_{age_range[0]}_{age_range[1]}_remaining"]))
              
            
        # Split the previous dataframe in three: people with no car in the household, people with one car and people with more than one
            
        ## No car
        globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_remaining_nocar"] = globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_remaining"].loc[((globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_remaining"]['LC4202_C_CARSNO_x'] == 1))]
            
        ##One car
        globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_remaining_onecar"] = globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_remaining"].loc[((globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_remaining"]['LC4202_C_CARSNO_x'] == 2))]
            
        ##One car plus
        globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_remaining_onecarplus"] = globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_remaining"].loc[((globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_remaining"]['LC4202_C_CARSNO_x'] == 3))]
                           
            

        if int(round(len(globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}"])*household_car_weight_list[0]*driving_percentage - len(globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_forced"]), 0)) > 0:
                
            # Choose a specific number of people who live in households without car based on values from household_car_weight_list MINUS the number of forced people assigned with a driving licence previously gropued by sex, age and urban/rural area (dataframe: "df_persons_driving_licence_forced"), (just because they live in a household with at least one car)
            if int(round(len(globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}"])*household_car_weight_list[0]*driving_percentage - len(globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_forced"])/3, 0)) <= len(globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_remaining_nocar"]):
                df_no_car_chosen = globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_remaining_nocar"].sample(int(round(len(globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}"])*household_car_weight_list[0]*driving_percentage, 0)) - int(round(len(globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_forced"])/3, 0)))
            else:
                df_no_car_chosen = globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_remaining_nocar"]
            
        else:
            df_no_car_chosen = pd.DataFrame()

            
        if int(round(len(globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}"])*household_car_weight_list[1]*driving_percentage - len(globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_forced"]), 0)) > 0:

        # Choose a specific number of people who live in households with ONE car based on values from household_car_weight_list
            if int(round(len(globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}"])*household_car_weight_list[1]*driving_percentage - len(globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_forced"])/3, 0)) <= len(globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_remaining_onecar"]):
                df_no_car_chosen = globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_remaining_onecar"].sample(int(round(len(globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}"])*household_car_weight_list[1]*driving_percentage, 0)) - int(round(len(globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_forced"])/3, 0))) 
            else:
                df_one_car_chosen = globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_remaining_onecar"]
  
        else:
            df_one_car_chosen = pd.DataFrame()
            
        if int(round(len(globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}"])*household_car_weight_list[2]*driving_percentage - len(globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_forced"]), 0)) > 0:
                
        # Choose a specific number of people who live in households with more than one car based on values from household_car_weight_list
            if int(round(len(globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}"])*household_car_weight_list[2]*driving_percentage - len(globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_forced"])/3, 0)) <= len(globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_remaining_onecarplus"]):
                df_one_car_plus_chosen = globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_remaining_onecarplus"].sample(int(round(len(globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}"])*household_car_weight_list[2]*driving_percentage, 0)) - int(round(len(globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_forced"])/3, 0))) 
            else:
                df_one_car_plus_chosen = globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_remaining_onecarplus"]
                                                                                                                 
        else:
            df_one_car_plus_chosen = pd.DataFrame()



        # Concatenate the previous selected people AND the forced people
        # THESE ARE THE ONES SELECTED BASED (N SEX, LOCATION AND AGE) AND THE PREVIOUS FORCED ONES
        # THESE ARE THE ONES THAT WILL HAVE A DRIVING LICENCE (UP TO NOW)
        df_persons_driving_selection = (pd.concat([df_no_car_chosen, df_one_car_chosen, df_one_car_plus_chosen, globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}_forced"]]))

                                             
        # Check the number of chosen people to be assigned and compared to the value that should be reached.
        # If the value is lower, the remaining should be collected randomly from the dataframe that contains people with the 
        # same gender and age range
        if int(round(len(df_persons_driving_selection),0)) < int(round((len(globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}"]) * driving_percentage),0)):
            leftovers = int(len(globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}"]) * driving_percentage) - len(df_persons_driving_selection)
         
        # Concatenate the dataframe containing all persons from the same sex, urban/rural area and range of year, with the ones from these categories that have been selected to get a driving licence (forced and chosen)
        df_with_duplicates = (pd.concat([globals()[f"df_{gender}_{age_range[0]}_{age_range[1]}"], df_persons_driving_selection]))
            
        # Remove duplicates and keep only those who were not assigned a driving licence
        df_leftovers = df_with_duplicates.drop_duplicates(keep = False)
            
        # If leftover value is > 0, then new drivers (value of leftovers) have to be selected and assigne then a driving licence
        if int(len(df_leftovers)) > 0:
            df_leftovers_driving = df_leftovers.sample(int(leftovers))
                
            # Concatenate the dataframe containing all persons from the same sex, urban/rural area and range of year, with the ones from these categories that have been selected to get a driving licence
            df_leftovers_with_duplicates = (pd.concat([df_leftovers, df_leftovers_driving]))
                
            # Remove duplicates and keep only those who were not assigned a driving licence
            df_people_no_driving_licence = df_leftovers_with_duplicates.drop_duplicates(keep = False)
            
            # Concatenate this new drivers with the previous ones:
            df_persons_driving_selection = (pd.concat([df_persons_driving_selection, df_leftovers_driving]))
                
            # Update attribute value "Driving_licence" = True to those who have been assigned a driving licence before
            df_persons_driving_selection["Driving_license"] = True
        else:
            df_persons_driving_selection["Driving_license"] = True
                
                     
        #Concatenate the df_persons_driving_selection and the 'leftovers' that were not assigned a driving licence
        # This new dataframe should have all persons of the specific gender, urban/rural area and range of age as the orignal dataframe
        df_persons = (pd.concat([df_persons_driving_selection, df_people_no_driving_licence]))
            
        #Append the dataframe into the temporal list
        df_persons_temp.append(df_persons)
        
#concatenate all persons (lists of the 'df_persons_NE_OA_HID_temp' list) in one dataframe
df_persons_NE_after_driving = pd.concat(df_persons_temp, axis=0, ignore_index=True)

# Create a new dataframe with only the people under 18 years old 
#(this group has not been considered before and has to be included (without driving licence) in order to 
# have the total number of people in the final dataset)
df_people_under18 = df_persons_NE_Household_composition.loc[(df_persons_NE_Household_composition['Age']< 18)]                

# Concatenate previous dataframe generated with the people under 18:
df_persons_NE_after_driving = (pd.concat([df_persons_NE_after_driving, df_people_under18]))


In [16]:
# Number of people with driving licence
aa = df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Driving_license'] == True)]

len(aa)

1529912

In [17]:
# ANALYSIS of people with/without driving licence and access to car in the household

df_persons_car_and_driving_licence = df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Age'] > 17) & (df_persons_NE_after_driving['LC4202_C_CARSNO_x'] > 1) &(df_persons_NE_after_driving['Driving_license'] == True)]
a = len(df_persons_car_and_driving_licence)


df_persons_car_and_NOdriving_licence = df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Age'] > 17) & (df_persons_NE_after_driving['LC4202_C_CARSNO_x'] > 1) &(df_persons_NE_after_driving['Driving_license'] == False)]
b = len(df_persons_car_and_NOdriving_licence)

df_persons_Nocar_and_driving_licence = df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Age'] > 17) & (df_persons_NE_after_driving['LC4202_C_CARSNO_x'] == 1) &(df_persons_NE_after_driving['Driving_license'] == True)]
c = len(df_persons_Nocar_and_driving_licence)

df_persons_drivingLicence = df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Age'] > 17) & (df_persons_NE_after_driving['Driving_license'] == True)]
d = len(df_persons_drivingLicence)

print('persons WITH driving license and at least one car in the household: ', a)
print('persons WITHOUT driving license and at least one car in the household: ', b)
print('person WITH driving license but NO car in the household: ', c)
print('TOTAL people with driving license: ', d)


perct_people_with_drivingLicence_AND_car_in_household = a/d *100
perct_people_with_drivingLicence_BUT_NO_car_in_household = c/d *100

print('% persons WITH driving license and at least one car in the household: ', perct_people_with_drivingLicence_AND_car_in_household)
print('% person WITH driving license but NO car in the household: ', perct_people_with_drivingLicence_BUT_NO_car_in_household)

persons WITH driving license and at least one car in the household:  1227728
persons WITHOUT driving license and at least one car in the household:  246908
person WITH driving license but NO car in the household:  302184
TOTAL people with driving license:  1529912
% persons WITH driving license and at least one car in the household:  80.24827571781906
% person WITH driving license but NO car in the household:  19.751724282180934


In [18]:
a= len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 1) & (df_persons_NE_after_driving['Age'] >= 18) & (df_persons_NE_after_driving['Age'] <= 20) & (df_persons_NE_after_driving['Driving_license'] == True)])

b=len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 1) & (df_persons_NE_after_driving['Age'] >= 21) & (df_persons_NE_after_driving['Age'] <= 29) & (df_persons_NE_after_driving['Driving_license'] == True)])

c=len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 1) & (df_persons_NE_after_driving['Age'] >= 30) & (df_persons_NE_after_driving['Age'] <= 39) & (df_persons_NE_after_driving['Driving_license'] == True)])

d=len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 1) & (df_persons_NE_after_driving['Age'] >= 40) & (df_persons_NE_after_driving['Age'] <= 49) & (df_persons_NE_after_driving['Driving_license'] == True)])

e=len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 1) & (df_persons_NE_after_driving['Age'] >= 50) & (df_persons_NE_after_driving['Age'] <= 59) & (df_persons_NE_after_driving['Driving_license'] == True)])

f=len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 1) & (df_persons_NE_after_driving['Age'] >= 60) & (df_persons_NE_after_driving['Age'] <= 69) & (df_persons_NE_after_driving['Driving_license'] == True)])

g=len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 1) & (df_persons_NE_after_driving['Age'] > 70) & (df_persons_NE_after_driving['Driving_license'] == True)])


aa= len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 1) & (df_persons_NE_after_driving['Age'] >= 18) & (df_persons_NE_after_driving['Age'] <= 20) & (df_persons_NE_after_driving['Driving_license'] == False)])

bb=len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 1) & (df_persons_NE_after_driving['Age'] >= 21) & (df_persons_NE_after_driving['Age'] <= 29) & (df_persons_NE_after_driving['Driving_license'] == False)])

cc=len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 1) & (df_persons_NE_after_driving['Age'] >= 30) & (df_persons_NE_after_driving['Age'] <= 39) & (df_persons_NE_after_driving['Driving_license'] == False)])

dd=len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 1) & (df_persons_NE_after_driving['Age'] >= 40) & (df_persons_NE_after_driving['Age'] <= 49) & (df_persons_NE_after_driving['Driving_license'] == False)])

ee=len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 1) & (df_persons_NE_after_driving['Age'] >= 50) & (df_persons_NE_after_driving['Age'] <= 59) & (df_persons_NE_after_driving['Driving_license'] == False)])

ff=len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 1) & (df_persons_NE_after_driving['Age'] >= 60) & (df_persons_NE_after_driving['Age'] <= 69) & (df_persons_NE_after_driving['Driving_license'] == False)])

gg=len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 1) & (df_persons_NE_after_driving['Age'] > 70) & (df_persons_NE_after_driving['Driving_license'] == False)])


print(a,b,c,d,e,f,g,aa,bb,cc,dd,ee,ff,gg)

15591 106250 135124 135113 162177 141735 131633 30266 57213 27677 16700 20045 15749 30810


In [19]:
a= len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 2) & (df_persons_NE_after_driving['Age'] >= 18) & (df_persons_NE_after_driving['Age'] <= 20) & (df_persons_NE_after_driving['Driving_license'] == True)])

b=len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 2) & (df_persons_NE_after_driving['Age'] >= 21) & (df_persons_NE_after_driving['Age'] <= 29) & (df_persons_NE_after_driving['Driving_license'] == True)])

c=len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 2) & (df_persons_NE_after_driving['Age'] >= 30) & (df_persons_NE_after_driving['Age'] <= 39) & (df_persons_NE_after_driving['Driving_license'] == True)])

d=len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 2) & (df_persons_NE_after_driving['Age'] >= 40) & (df_persons_NE_after_driving['Age'] <= 49) & (df_persons_NE_after_driving['Driving_license'] == True)])

e=len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 2) & (df_persons_NE_after_driving['Age'] >= 50) & (df_persons_NE_after_driving['Age'] <= 59) & (df_persons_NE_after_driving['Driving_license'] == True)])

f=len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 2) & (df_persons_NE_after_driving['Age'] >= 60) & (df_persons_NE_after_driving['Age'] <= 69) & (df_persons_NE_after_driving['Driving_license'] == True)])

g=len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 2) & (df_persons_NE_after_driving['Age'] > 70) & (df_persons_NE_after_driving['Driving_license'] == True)])


aa= len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 2) & (df_persons_NE_after_driving['Age'] >= 18) & (df_persons_NE_after_driving['Age'] <= 20) & (df_persons_NE_after_driving['Driving_license'] == False)])

bb=len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 2) & (df_persons_NE_after_driving['Age'] >= 21) & (df_persons_NE_after_driving['Age'] <= 29) & (df_persons_NE_after_driving['Driving_license'] == False)])

cc=len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 2) & (df_persons_NE_after_driving['Age'] >= 30) & (df_persons_NE_after_driving['Age'] <= 39) & (df_persons_NE_after_driving['Driving_license'] == False)])

dd=len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 2) & (df_persons_NE_after_driving['Age'] >= 40) & (df_persons_NE_after_driving['Age'] <= 49) & (df_persons_NE_after_driving['Driving_license'] == False)])

ee=len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 2) & (df_persons_NE_after_driving['Age'] >= 50) & (df_persons_NE_after_driving['Age'] <= 59) & (df_persons_NE_after_driving['Driving_license'] == False)])

ff=len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 2) & (df_persons_NE_after_driving['Age'] >= 60) & (df_persons_NE_after_driving['Age'] <= 69) & (df_persons_NE_after_driving['Driving_license'] == False)])

gg=len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 2) & (df_persons_NE_after_driving['Age'] > 70) & (df_persons_NE_after_driving['Driving_license'] == False)])


print(a,b,c,d,e,f,g,aa,bb,cc,dd,ee,ff,gg)

13495 83874 110998 117755 142343 116379 99522 30040 71450 57182 41374 50013 49877 101423


In [20]:
a= len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 2) & (df_persons_NE_after_driving['RUC11'] == 'Urban_Conurbation') & (df_persons_NE_after_driving['Driving_license'] == True) & (df_persons_NE_after_driving['Age'] > 17)])

b = len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 2) & (df_persons_NE_after_driving['RUC11'] == 'Urban_Conurbation') & (df_persons_NE_after_driving['Driving_license'] == False) & (df_persons_NE_after_driving['Age'] > 17)])

c = len(df_persons_NE_after_driving.loc[(df_persons_NE_after_driving['Sex'] == 2) & (df_persons_NE_after_driving['RUC11'] == 'Urban_Conurbation') & (df_persons_NE_after_driving['Age'] > 17)])

print(a, b, c)


289194 190563 479757


In [21]:
aaa = len(df_persons_NE_after_driving.loc[ (df_persons_NE_after_driving['RUC11'] == 'Rural_town_and_fringe') & (df_persons_NE_after_driving['Driving_license'] == True) & (df_persons_NE_after_driving['Age'] > 17)])

bbb = len(df_persons_NE_after_driving.loc[ (df_persons_NE_after_driving['RUC11'] == 'Rural_town_and_fringe') & (df_persons_NE_after_driving['Driving_license'] == False) & (df_persons_NE_after_driving['Age'] > 17)])

ccc = len(df_persons_NE_after_driving.loc[ (df_persons_NE_after_driving['RUC11'] == 'Rural_town_and_fringe') & (df_persons_NE_after_driving['Age'] > 17)])

print(aaa, bbb, ccc)

207767 70291 278058


In [22]:
rr = len(df_persons_NE_after_driving.loc[ (df_persons_NE_after_driving['Driving_license'] == True) & (df_persons_NE_after_driving['Age'] > 17)])

ss = len(df_persons_NE_after_driving.loc[ (df_persons_NE_after_driving['Driving_license'] == False) & (df_persons_NE_after_driving['Age'] > 17)])

print(rr,ss)

1529912 611530


In [23]:
#Export the df_persons_NE_household_composition_updated

df_persons_NE_drivinglicence_20222306_correct = df_persons_NE_after_driving

df_persons_NE_drivinglicence_20222306_correct.to_csv(r'C:\Users\b9055315\Documents\PhD_PROJECT\Synthetic_population_developement\SPENSER\Data\2019\Driving_licence\df_persons_NE_drivinglicence_20222306_correct.csv', encoding='utf-8', header=True)
