In [1]:
import pandas as pd
import os

In [2]:
os.chdir("/Users/momoko/Desktop/NZ_Crime/Data/Origin_Data")
file_chdir = os.getcwd()

csv_list = []
for root, dirs, files in os.walk(file_chdir):
    for file in files:
        if os.path.splitext(file)[1] == '.csv':
            csv_list.append(file)
            
csv_list

['Manawatu.csv',
 'Waikato.csv',
 'Gisborne.csv',
 'Southland.csv',
 'Wellington.csv',
 'Marlborough.csv',
 'Tasman.csv',
 'Auckland.csv',
 'Hawke.csv',
 'Canterbury.csv',
 'Otago.csv',
 'Northland.csv',
 'Nelson.csv',
 'WestCoast.csv',
 'Taranaki.csv',
 'BayofPlenty.csv']

In [3]:
# concat all csv to one dataframe
dfs = {}
for csv in csv_list:
    df_name = csv.split(".")[0]
    df = pd.read_csv(csv)
    dfs[df_name] = df

df = pd.concat(dfs)
df.head()

Unnamed: 0.1,Unnamed: 1,Unnamed: 0,Area Unit,ANZSOC Division,Year,Region,Victimisations
Manawatu,0,0,Dannevirke East.,Theft and Related Offences,2023,Manawatu Region,1
Manawatu,1,1,Norsewood-Herbertville.,Theft and Related Offences,2023,Manawatu Region,1
Manawatu,2,2,Pahiatua.,Acts Intended to Cause Injury,2023,Manawatu Region,1
Manawatu,3,3,Woodville.,Acts Intended to Cause Injury,2023,Manawatu Region,1
Manawatu,4,4,Westbrook.,Acts Intended to Cause Injury,2023,Manawatu Region,1


In [4]:
# Just keep useful columns
df = df[["Area Unit", "ANZSOC Division", "Year", "Region", "Victimisations"]]
df.index = range(len(df))
df['Area Unit'] = df['Area Unit'].str.rstrip('.')
df.head()

Unnamed: 0,Area Unit,ANZSOC Division,Year,Region,Victimisations
0,Dannevirke East,Theft and Related Offences,2023,Manawatu Region,1
1,Norsewood-Herbertville,Theft and Related Offences,2023,Manawatu Region,1
2,Pahiatua,Acts Intended to Cause Injury,2023,Manawatu Region,1
3,Woodville,Acts Intended to Cause Injury,2023,Manawatu Region,1
4,Westbrook,Acts Intended to Cause Injury,2023,Manawatu Region,1


In [5]:
df['Region'].unique()


array(['Manawatu Region', 'Waikato Region', 'Gisborne Region',
       'Southland Region', 'Wellington Region', 'Marlborough Region',
       'Tasman Region', 'Auckland Region', 'Hawke Region',
       'Canterbury Region', 'Otago Region', 'Northland', 'Nelson Region',
       'West Coast Region', 'Taranaki Region', 'Bay of Plenty Region'],
      dtype=object)

In [6]:
df['ANZSOC Division'].unique()

array(['Theft and Related Offences', 'Acts Intended to Cause Injury',
       'Unlawful Entry With Intent/Burglary, Break and Enter',
       'Robbery, Extortion and Related Offences',
       'Abduction, Harassment and Other Related Offences Against a Person',
       'Sexual Assault and Related Offences'], dtype=object)

In [7]:
# calculate annually victimisations of each area unit
annual_area_victimisations = df.groupby(['Year', 'Area Unit'])['Victimisations'].sum().reset_index()

annual_area_victimisations['annual_area_victimisations'] = annual_area_victimisations['Victimisations']
annual_area_victimisations = annual_area_victimisations[['Year', 'Area Unit', 'annual_area_victimisations']]

annual_area_victimisations = pd.merge(df, annual_area_victimisations,on = ['Year', 'Area Unit'])
annual_area_victimisations = annual_area_victimisations[['Year', 'Area Unit', 'Region', 'annual_area_victimisations']]

annual_area_victimisations.head()

Unnamed: 0,Year,Area Unit,Region,annual_area_victimisations
0,2023,Dannevirke East,Manawatu Region,387
1,2023,Dannevirke East,Manawatu Region,387
2,2023,Dannevirke East,Manawatu Region,387
3,2023,Dannevirke East,Manawatu Region,387
4,2023,Dannevirke East,Manawatu Region,387


In [8]:
# Calculate annual victimisations of each area unit by crime types
annual_area_victimisations_by_types = df.groupby(['Area Unit', 'ANZSOC Division', 'Year'])['Victimisations'].sum().reset_index()

annual_area_victimisations_by_types['annual_area_victimisations_by_types'] = annual_area_victimisations_by_types['Victimisations']
annual_area_victimisations_by_types = annual_area_victimisations_by_types[['Area Unit', 'ANZSOC Division', 'Year', 'annual_area_victimisations_by_types']]

annual_area_victimisations_by_types.head()

Unnamed: 0,Area Unit,ANZSOC Division,Year,annual_area_victimisations_by_types
0,Abbey Caves,Acts Intended to Cause Injury,2020,2
1,Abbey Caves,"Robbery, Extortion and Related Offences",2022,1
2,Abbey Caves,Theft and Related Offences,2020,17
3,Abbey Caves,Theft and Related Offences,2021,2
4,Abbey Caves,Theft and Related Offences,2022,1


In [10]:

df_pivot = annual_area_victimisations_by_types.pivot_table(
    index=['Area Unit', 'Year'],  
    columns='ANZSOC Division',  
    values='annual_area_victimisations_by_types',  
    fill_value=0  
).reset_index()

df_pivot


ANZSOC Division,Area Unit,Year,"Abduction, Harassment and Other Related Offences Against a Person",Acts Intended to Cause Injury,"Robbery, Extortion and Related Offences",Sexual Assault and Related Offences,Theft and Related Offences,"Unlawful Entry With Intent/Burglary, Break and Enter"
0,Abbey Caves,2020,0,2,0,0,17,0
1,Abbey Caves,2021,0,0,0,0,2,0
2,Abbey Caves,2022,0,0,1,0,1,0
3,Abbey Caves,2023,0,0,0,0,6,4
4,Abbotsford,2020,0,1,0,0,1,1
...,...,...,...,...,...,...,...,...
7432,Yaldhurst,2023,0,13,0,6,459,79
7433,Yatton Park,2020,0,23,1,0,13,10
7434,Yatton Park,2021,0,19,1,0,18,10
7435,Yatton Park,2022,0,17,2,0,23,10


In [14]:
# dictionary
new_column_names = {
    'Area Unit': 'Area Unit',  
    'Year': 'Year', 
    'Abduction, Harassment and Other Related Offences Against a Person': 'Abduction_Harassment_Related',
    'Acts Intended to Cause Injury': 'Acts_Intended_to_Cause_Injury',
    'Robbery, Extortion and Related Offences': 'Robbery_Extortion_Related',
    'Sexual Assault and Related Offences': 'Sexual_Assault_Related',
    'Theft and Related Offences': 'Theft_Related',
    'Unlawful Entry With Intent/Burglary, Break and Enter': 'Unlawful_Entry_With_Intent',
}

# rename columns' name
df_pivot_renamed = df_pivot.rename(columns=new_column_names)

df_pivot_renamed.head()

ANZSOC Division,Area Unit,Year,Abduction_Harassment_Related,Acts_Intended_to_Cause_Injury,Robbery_Extortion_Related,Sexual_Assault_Related,Theft_Related,Unlawful_Entry_With_Intent
0,Abbey Caves,2020,0,2,0,0,17,0
1,Abbey Caves,2021,0,0,0,0,2,0
2,Abbey Caves,2022,0,0,1,0,1,0
3,Abbey Caves,2023,0,0,0,0,6,4
4,Abbotsford,2020,0,1,0,0,1,1


In [15]:
area_unit = pd.merge(annual_area_victimisations, df_pivot_renamed, on = ['Area Unit', 'Year'])
area_unit.head()


Unnamed: 0,Year,Area Unit,Region,annual_area_victimisations,Abduction_Harassment_Related,Acts_Intended_to_Cause_Injury,Robbery_Extortion_Related,Sexual_Assault_Related,Theft_Related,Unlawful_Entry_With_Intent
0,2023,Dannevirke East,Manawatu Region,387,0,35,4,6,295,47
1,2023,Dannevirke East,Manawatu Region,387,0,35,4,6,295,47
2,2023,Dannevirke East,Manawatu Region,387,0,35,4,6,295,47
3,2023,Dannevirke East,Manawatu Region,387,0,35,4,6,295,47
4,2023,Dannevirke East,Manawatu Region,387,0,35,4,6,295,47


In [16]:
# Read geo data of area unit
area_unit_geo_data = pd.read_csv("/Users/momoko/Desktop/NZ_Crime/Data/GeoData/statsnz-area-unit-2013-CSV/area-unit-2013.csv")
area_unit_geo_data.rename(columns = {'AU2013_V1_00_NAME':'Area Unit'}, inplace = True)
area_unit_geo_data.head()

Unnamed: 0,WKT,AU2013_V1_00,Area Unit,AREA_SQ_KM,LAND_AREA_SQ_KM,Shape_Length
0,"MULTIPOLYGON (((1636527.0069 6150940.0624,1636...",500202,Karikari Peninsula-Maungataniwha,791.188485,784.3952,285586.495714
1,"MULTIPOLYGON (((1648130.4696 6128680.4221,1648...",500203,Taipa Bay-Mangonui,5.545054,5.545054,25711.852169
2,"MULTIPOLYGON (((1622142.1514 6120287.4396,1623...",500204,Herekino,396.511344,391.067273,172051.055675
3,"MULTIPOLYGON (((1615173.0158 6110225.8463,1615...",500205,Ahipara,8.000104,8.000104,20638.880335
4,"MULTIPOLYGON (((1601317.2322 6194284.5759,1601...",500206,North Cape,690.237222,626.035844,177953.473387


In [17]:
# merge victimisation data and geo data 
area_unit = pd.merge(area_unit_geo_data, area_unit, on = 'Area Unit')
area_unit.head()

Unnamed: 0,WKT,AU2013_V1_00,Area Unit,AREA_SQ_KM,LAND_AREA_SQ_KM,Shape_Length,Year,Region,annual_area_victimisations,Abduction_Harassment_Related,Acts_Intended_to_Cause_Injury,Robbery_Extortion_Related,Sexual_Assault_Related,Theft_Related,Unlawful_Entry_With_Intent
0,"MULTIPOLYGON (((1636527.0069 6150940.0624,1636...",500202,Karikari Peninsula-Maungataniwha,791.188485,784.3952,285586.495714,2023,Northland,137,0,14,1,2,40,80
1,"MULTIPOLYGON (((1636527.0069 6150940.0624,1636...",500202,Karikari Peninsula-Maungataniwha,791.188485,784.3952,285586.495714,2023,Northland,137,0,14,1,2,40,80
2,"MULTIPOLYGON (((1636527.0069 6150940.0624,1636...",500202,Karikari Peninsula-Maungataniwha,791.188485,784.3952,285586.495714,2023,Northland,137,0,14,1,2,40,80
3,"MULTIPOLYGON (((1636527.0069 6150940.0624,1636...",500202,Karikari Peninsula-Maungataniwha,791.188485,784.3952,285586.495714,2023,Northland,137,0,14,1,2,40,80
4,"MULTIPOLYGON (((1636527.0069 6150940.0624,1636...",500202,Karikari Peninsula-Maungataniwha,791.188485,784.3952,285586.495714,2023,Northland,137,0,14,1,2,40,80


In [19]:
print(area_unit.columns)

Index(['WKT', 'AU2013_V1_00', 'Area Unit', 'AREA_SQ_KM', 'LAND_AREA_SQ_KM',
       'Shape_Length', 'Year', 'Region', 'annual_area_victimisations',
       'Abduction_Harassment_Related', 'Acts_Intended_to_Cause_Injury',
       'Robbery_Extortion_Related', 'Sexual_Assault_Related', 'Theft_Related',
       'Unlawful_Entry_With_Intent'],
      dtype='object')


In [20]:
area_unit = area_unit[['WKT', 'Area Unit', 'AREA_SQ_KM', 'LAND_AREA_SQ_KM', 'Shape_Length', 'Region', 'annual_area_victimisations', 'Abduction_Harassment_Related', 'Acts_Intended_to_Cause_Injury', 'Robbery_Extortion_Related', 'Sexual_Assault_Related', 'Theft_Related', 'Unlawful_Entry_With_Intent', 'Year']]

area_unit = area_unit.drop_duplicates()

area_unit['Year'] = area_unit['Year'].astype(int)
area_unit['annual_area_victimisations'] = area_unit['annual_area_victimisations'].astype(int)
area_unit['Abduction_Harassment_Related'] = area_unit['Abduction_Harassment_Related'].astype(int)
area_unit['Acts_Intended_to_Cause_Injury'] = area_unit['Acts_Intended_to_Cause_Injury'].astype(int)
area_unit['Robbery_Extortion_Related'] = area_unit['Robbery_Extortion_Related'].astype(int)
area_unit['Sexual_Assault_Related'] = area_unit['Sexual_Assault_Related'].astype(int)
area_unit['Theft_Related'] = area_unit['Theft_Related'].astype(int)
area_unit['Unlawful_Entry_With_Intent'] = area_unit['Unlawful_Entry_With_Intent'].astype(int)

area_unit.head()

Unnamed: 0,WKT,Area Unit,AREA_SQ_KM,LAND_AREA_SQ_KM,Shape_Length,Region,annual_area_victimisations,Abduction_Harassment_Related,Acts_Intended_to_Cause_Injury,Robbery_Extortion_Related,Sexual_Assault_Related,Theft_Related,Unlawful_Entry_With_Intent,Year
0,"MULTIPOLYGON (((1636527.0069 6150940.0624,1636...",Karikari Peninsula-Maungataniwha,791.188485,784.3952,285586.495714,Northland,137,0,14,1,2,40,80,2023
132,"MULTIPOLYGON (((1636527.0069 6150940.0624,1636...",Karikari Peninsula-Maungataniwha,791.188485,784.3952,285586.495714,Northland,70,1,19,0,1,19,30,2020
168,"MULTIPOLYGON (((1636527.0069 6150940.0624,1636...",Karikari Peninsula-Maungataniwha,791.188485,784.3952,285586.495714,Northland,77,1,14,0,1,33,28,2022
200,"MULTIPOLYGON (((1636527.0069 6150940.0624,1636...",Karikari Peninsula-Maungataniwha,791.188485,784.3952,285586.495714,Northland,64,0,16,0,3,21,24,2021
234,"MULTIPOLYGON (((1648130.4696 6128680.4221,1648...",Taipa Bay-Mangonui,5.545054,5.545054,25711.852169,Northland,207,0,19,1,0,131,56,2023


In [21]:
# Save date of area unit as a csv file
area_unit.to_csv("/Users/momoko/Desktop/NZ_Crime/Data/Final_Data/Final_area_unit.csv")


In [22]:
# Calculate anuual crime data of each region by crime types
annual_region_victimisations_by_types = df.groupby(['Region', 'ANZSOC Division', 'Year'])['Victimisations'].sum().reset_index()

annual_region_victimisations_by_types['annual_region_victimisations_by_types'] = annual_region_victimisations_by_types['Victimisations']
annual_region_victimisations_by_types = annual_region_victimisations_by_types[['Region','ANZSOC Division','Year','annual_region_victimisations_by_types']]

annual_region_victimisations_by_types.head()

Unnamed: 0,Region,ANZSOC Division,Year,annual_region_victimisations_by_types
0,Auckland Region,"Abduction, Harassment and Other Related Offenc...",2020,46
1,Auckland Region,"Abduction, Harassment and Other Related Offenc...",2021,80
2,Auckland Region,"Abduction, Harassment and Other Related Offenc...",2022,52
3,Auckland Region,"Abduction, Harassment and Other Related Offenc...",2023,70
4,Auckland Region,Acts Intended to Cause Injury,2020,6064


In [23]:
region_pivot = annual_region_victimisations_by_types.pivot_table(
    index=['Region', 'Year'],  
    columns='ANZSOC Division',  
    values='annual_region_victimisations_by_types',  
    fill_value=0  
).reset_index()

region_pivot

ANZSOC Division,Region,Year,"Abduction, Harassment and Other Related Offences Against a Person",Acts Intended to Cause Injury,"Robbery, Extortion and Related Offences",Sexual Assault and Related Offences,Theft and Related Offences,"Unlawful Entry With Intent/Burglary, Break and Enter"
0,Auckland Region,2020,46,6064,834,366,28377,6027
1,Auckland Region,2021,80,13696,1785,852,32423,13722
2,Auckland Region,2022,52,8034,1186,468,45747,9299
3,Auckland Region,2023,70,9778,1794,741,82877,21322
4,Bay of Plenty Region,2020,11,1851,141,66,7707,1185
...,...,...,...,...,...,...,...,...
59,Wellington Region,2023,14,3218,470,264,21188,5627
60,West Coast Region,2020,0,142,4,9,300,108
61,West Coast Region,2021,1,152,0,33,178,72
62,West Coast Region,2022,0,158,10,22,264,106


In [24]:
# dictionary
new_columns = {
    'Region': 'Region',  
    'Year': 'Year', 
    'Abduction, Harassment and Other Related Offences Against a Person': 'Abduction_Harassment_Related',
    'Acts Intended to Cause Injury': 'Acts_Intended_to_Cause_Injury',
    'Robbery, Extortion and Related Offences': 'Robbery_Extortion_Related',
    'Sexual Assault and Related Offences': 'Sexual_Assault_Related',
    'Theft and Related Offences': 'Theft_Related',
    'Unlawful Entry With Intent/Burglary, Break and Enter': 'Unlawful_Entry_With_Intent',
}

# rename columns' name
region_pivot_renamed = region_pivot.rename(columns=new_columns)

region_pivot_renamed.head()

ANZSOC Division,Region,Year,Abduction_Harassment_Related,Acts_Intended_to_Cause_Injury,Robbery_Extortion_Related,Sexual_Assault_Related,Theft_Related,Unlawful_Entry_With_Intent
0,Auckland Region,2020,46,6064,834,366,28377,6027
1,Auckland Region,2021,80,13696,1785,852,32423,13722
2,Auckland Region,2022,52,8034,1186,468,45747,9299
3,Auckland Region,2023,70,9778,1794,741,82877,21322
4,Bay of Plenty Region,2020,11,1851,141,66,7707,1185


In [25]:
# Calculate anuual crime data of each region
annual_region_victimisations = df.groupby(['Region', 'Year'])['Victimisations'].sum().reset_index()

annual_region_victimisations['annual_region_victimisations'] = annual_region_victimisations['Victimisations']
annual_region_victimisations = annual_region_victimisations[['Region','Year','annual_region_victimisations']]

region = pd.merge(region_pivot_renamed, annual_region_victimisations, on = ['Region', 'Year'])
region.head()

Unnamed: 0,Region,Year,Abduction_Harassment_Related,Acts_Intended_to_Cause_Injury,Robbery_Extortion_Related,Sexual_Assault_Related,Theft_Related,Unlawful_Entry_With_Intent,annual_region_victimisations
0,Auckland Region,2020,46,6064,834,366,28377,6027,41714
1,Auckland Region,2021,80,13696,1785,852,32423,13722,62558
2,Auckland Region,2022,52,8034,1186,468,45747,9299,64786
3,Auckland Region,2023,70,9778,1794,741,82877,21322,116582
4,Bay of Plenty Region,2020,11,1851,141,66,7707,1185,10961


In [26]:
# Read geo data of regions
region_geo_data = pd.read_csv("/Users/momoko/Desktop/NZ_Crime/Data/GeoData/statsnz-regional-council-2023-generalised-CSV/regional-council-2023-generalised.csv")
region_geo_data.rename(columns = {'REGC2023_V1_00_NAME':'Region'}, inplace = True)
region_geo_data.head()

Unnamed: 0,WKT,REGC2023_V1_00,Region,REGC2023_V1_00_NAME_ASCII,LAND_AREA_SQ_KM,AREA_SQ_KM,Shape_Length
0,"MULTIPOLYGON (((1611941.3123 6214121.2253,1613...",1,Northland Region,Northland Region,12507.139052,30084.273236,811359.8
1,"MULTIPOLYGON (((1788533.2652 6047342.7999,1789...",2,Auckland Region,Auckland Region,4941.164248,16156.206271,660973.5
2,"MULTIPOLYGON (((1871103.9568 5970628.8849,1871...",3,Waikato Region,Waikato Region,23900.953428,34888.83171,1268420.0
3,"MULTIPOLYGON (((1911825.0341 5859943.0545,1912...",4,Bay of Plenty Region,Bay of Plenty Region,12071.549623,21883.742229,1093737.0
4,"MULTIPOLYGON (((2063779.7816 5855283.2052,2064...",5,Gisborne Region,Gisborne Region,8385.064982,13989.056001,695918.5


In [27]:
region_geo_data['Region'].unique()

array(['Northland Region', 'Auckland Region', 'Waikato Region',
       'Bay of Plenty Region', 'Gisborne Region', "Hawke's Bay Region",
       'Taranaki Region', 'Manawatū-Whanganui Region',
       'Wellington Region', 'West Coast Region', 'Canterbury Region',
       'Otago Region', 'Southland Region', 'Tasman Region',
       'Nelson Region', 'Marlborough Region', 'Area Outside Region'],
      dtype=object)

In [28]:
region_geo_data['Region'] = region_geo_data['Region'].replace('Manawatū-Whanganui Region', 'Manawatu Region')
region_geo_data['Region'] = region_geo_data['Region'].replace("Hawke's Bay Region", "Hawke Region")
region_geo_data['Region'] = region_geo_data['Region'].replace('Northland Region', 'Northland')
region_geo_data['Region'].unique()

array(['Northland', 'Auckland Region', 'Waikato Region',
       'Bay of Plenty Region', 'Gisborne Region', 'Hawke Region',
       'Taranaki Region', 'Manawatu Region', 'Wellington Region',
       'West Coast Region', 'Canterbury Region', 'Otago Region',
       'Southland Region', 'Tasman Region', 'Nelson Region',
       'Marlborough Region', 'Area Outside Region'], dtype=object)

In [29]:
region['Region'].unique()

array(['Auckland Region', 'Bay of Plenty Region', 'Canterbury Region',
       'Gisborne Region', 'Hawke Region', 'Manawatu Region',
       'Marlborough Region', 'Nelson Region', 'Northland', 'Otago Region',
       'Southland Region', 'Taranaki Region', 'Tasman Region',
       'Waikato Region', 'Wellington Region', 'West Coast Region'],
      dtype=object)

In [30]:
# merge victimisation data and geo data 
region = pd.merge(region, region_geo_data, on = 'Region')
region

Unnamed: 0,Region,Year,Abduction_Harassment_Related,Acts_Intended_to_Cause_Injury,Robbery_Extortion_Related,Sexual_Assault_Related,Theft_Related,Unlawful_Entry_With_Intent,annual_region_victimisations,WKT,REGC2023_V1_00,REGC2023_V1_00_NAME_ASCII,LAND_AREA_SQ_KM,AREA_SQ_KM,Shape_Length
0,Auckland Region,2020,46,6064,834,366,28377,6027,41714,"MULTIPOLYGON (((1788533.2652 6047342.7999,1789...",2,Auckland Region,4941.164248,16156.206271,6.609735e+05
1,Auckland Region,2021,80,13696,1785,852,32423,13722,62558,"MULTIPOLYGON (((1788533.2652 6047342.7999,1789...",2,Auckland Region,4941.164248,16156.206271,6.609735e+05
2,Auckland Region,2022,52,8034,1186,468,45747,9299,64786,"MULTIPOLYGON (((1788533.2652 6047342.7999,1789...",2,Auckland Region,4941.164248,16156.206271,6.609735e+05
3,Auckland Region,2023,70,9778,1794,741,82877,21322,116582,"MULTIPOLYGON (((1788533.2652 6047342.7999,1789...",2,Auckland Region,4941.164248,16156.206271,6.609735e+05
4,Bay of Plenty Region,2020,11,1851,141,66,7707,1185,10961,"MULTIPOLYGON (((1911825.0341 5859943.0545,1912...",4,Bay of Plenty Region,12071.549623,21883.742229,1.093737e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,Wellington Region,2023,14,3218,470,264,21188,5627,30781,"MULTIPOLYGON (((1783549.0901 5490093.7101,1783...",9,Wellington Region,8049.474240,15945.317606,5.791960e+05
60,West Coast Region,2020,0,142,4,9,300,108,563,"MULTIPOLYGON (((1536071.5819 5480250.3799,1536...",12,West Coast Region,23245.518393,36339.584151,1.582117e+06
61,West Coast Region,2021,1,152,0,33,178,72,436,"MULTIPOLYGON (((1536071.5819 5480250.3799,1536...",12,West Coast Region,23245.518393,36339.584151,1.582117e+06
62,West Coast Region,2022,0,158,10,22,264,106,560,"MULTIPOLYGON (((1536071.5819 5480250.3799,1536...",12,West Coast Region,23245.518393,36339.584151,1.582117e+06


In [19]:
region['Region'].unique()

array(['Auckland Region', 'Bay of Plenty Region', 'Canterbury Region',
       'Gisborne Region', 'Hawke Region', 'Manawatu Region',
       'Marlborough Region', 'Nelson Region', 'Northland', 'Otago Region',
       'Southland Region', 'Taranaki Region', 'Tasman Region',
       'Waikato Region', 'Wellington Region', 'West Coast Region'],
      dtype=object)

In [32]:
region = region[['WKT', 'Region', 'LAND_AREA_SQ_KM', 'AREA_SQ_KM', 'Shape_Length', 'annual_region_victimisations', 'Abduction_Harassment_Related', 'Acts_Intended_to_Cause_Injury', 'Robbery_Extortion_Related', 'Sexual_Assault_Related', 'Theft_Related', 'Unlawful_Entry_With_Intent','Year']]

region = region.drop_duplicates()

region['Year'] = region['Year'].astype(int)
region['annual_region_victimisations'] = region['annual_region_victimisations'].astype(int)
region['Abduction_Harassment_Related'] = region['Abduction_Harassment_Related'].astype(int)
region['Acts_Intended_to_Cause_Injury'] = region['Acts_Intended_to_Cause_Injury'].astype(int)
region['Robbery_Extortion_Related'] = region['Robbery_Extortion_Related'].astype(int)
region['Sexual_Assault_Related'] = region['Sexual_Assault_Related'].astype(int)
region['Theft_Related'] = region['Theft_Related'].astype(int)
region['Unlawful_Entry_With_Intent'] = region['Unlawful_Entry_With_Intent'].astype(int)
region.head()

Unnamed: 0,WKT,Region,LAND_AREA_SQ_KM,AREA_SQ_KM,Shape_Length,annual_region_victimisations,Abduction_Harassment_Related,Acts_Intended_to_Cause_Injury,Robbery_Extortion_Related,Sexual_Assault_Related,Theft_Related,Unlawful_Entry_With_Intent,Year
0,"MULTIPOLYGON (((1788533.2652 6047342.7999,1789...",Auckland Region,4941.164248,16156.206271,660973.5,41714,46,6064,834,366,28377,6027,2020
1,"MULTIPOLYGON (((1788533.2652 6047342.7999,1789...",Auckland Region,4941.164248,16156.206271,660973.5,62558,80,13696,1785,852,32423,13722,2021
2,"MULTIPOLYGON (((1788533.2652 6047342.7999,1789...",Auckland Region,4941.164248,16156.206271,660973.5,64786,52,8034,1186,468,45747,9299,2022
3,"MULTIPOLYGON (((1788533.2652 6047342.7999,1789...",Auckland Region,4941.164248,16156.206271,660973.5,116582,70,9778,1794,741,82877,21322,2023
4,"MULTIPOLYGON (((1911825.0341 5859943.0545,1912...",Bay of Plenty Region,12071.549623,21883.742229,1093737.0,10961,11,1851,141,66,7707,1185,2020


In [33]:
# Save date of region as a csv file
region.to_csv("/Users/momoko/Desktop/NZ_Crime/Data/Final_Data/Final_region.csv")