## Data Wrangling Project: Looking at the Pets of New Zealand 

## This project looked at all of the pets in New Zealands Regions 

## At the end of this notebook we will end with 4 dataframes:


One for the pet information for all the regions 


One for the population information for all three regions 


One for the income information for all three regions 


One for the home ownership information for all three regions  

We had CSVs from all of the regions in New Zealand that contained information about Pets counts, population information median income and home ownership 


The first step is to extract this information from the CSVs and make a dataframe for each of this information

To do this we sorted the information about different districts in New Zealnd (61) into folders for different regions in New Zealand. 

In [1]:
library(tidyverse)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.0 ──

[32m✔[39m [34mggplot2[39m 3.3.2     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.0.3     [32m✔[39m [34mdplyr  [39m 1.0.2
[32m✔[39m [34mtidyr  [39m 1.1.2     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.3.1     [32m✔[39m [34mforcats[39m 0.5.0

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



## Define functions to extract the information we want from the CSVs

In [2]:
#Extract the pets information from the CSV
#This loops through each file name in a file list and extracts information about the counts of dogs 
#in that csv
pet_reader = function(file_list,df){
        for (i in 1:length(file_list)){
              data = read_csv(file_list[i])
              pets_data = data[data$Topic == "Pets",]
              pets_data$Topic = NULL
              pets_data$NullReason = NULL
              pets_data$Source = NULL
              pets_data$ValueUnit = NULL
              pets_data$ValueLabel = NULL
              pets_data$Date = NULL
              pets_data$DateLabel = NULL  
    
              #this is specific to the pets portion, this removes the acc data
              pets_counts = pets_data[c(-1,-2,-3),]
              pets_counts
  
  
              total_registered_dogs = pets_counts[pets_counts$Measure =="Registered dogs — Total registered dogs",]
    
  
              df <- rbind(df, total_registered_dogs) #for each iteration, bind the new data to the building dataset

        }
    return(df)
}

In [3]:
#This is a similar function but focuses on reading jus tthe population data from CSV
pop_reader = function(file_list, df){

        for (i in 1:length(file_list)){
          data = read.csv(file_list[i])
          #e.g. if we wanted household information we would change the topic to be "Households" below
          demo_data = data[data$Topic == "Demographics",]

          demo_data$Topic = NULL
          demo_data$NullReason = NULL
          demo_data$Source = NULL
          demo_data$ValueUnit = NULL
          demo_data$ValueLabel = NULL
          demo_data$Date = NULL
          demo_data$DateLabel = NULL  

          population = demo_data[demo_data$Measure == "Population - residents at 2013 census",]  
          df <- rbind(df, population) #for each iteration, bind the new data to the building dataset
  #dataset <- rbind(dataset, population)  
    }
        return(df)

}

In [4]:
home_reader = function(file_list, df){
        for (i in 1:length(file_list)){
          data = read.csv(file_list[i])
          #e.g. if we wanted household information we would change the topic to be "Households" below
          home_data = data[data$Topic == "Households",]

          home_data$Topic = NULL
          home_data$NullReason = NULL
          home_data$Source = NULL
          home_data$ValueUnit = NULL
          home_data$ValueLabel = NULL  
          home_data$Date = NULL
          home_data$DateLabel = NULL  

          ownership = home_data[home_data$Measure == "Home ownership by households",]  
          df <- rbind(df, ownership) #for each iteration, bind the new data to the building dataset
        }
        return(df)

}

In [5]:

income_reader = function(file_list, df){
        for (i in 1:length(file_list)){
          data = read.csv(file_list[i])
          #e.g. if we wanted household information we would change the topic to be "Households" below
          income_data = data[data$Topic == "Income",]

          income_data$Topic = NULL
          income_data$NullReason = NULL
          income_data$Source = NULL
          income_data$ValueUnit = NULL
          income_data$ValueLabel = NULL  
          income_data$Date = NULL
          income_data$DateLabel = NULL

          personal_income = income_data[income_data$Measure == "Median personal income ($)",]  
          df <- rbind(df, personal_income) #for each iteration, bind the new data to the building dataset
        }
        return(df)

}

## Auckland To Hawkes Bay - Extract the information that we want from the CSVs

In [6]:
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Auckland")

In [7]:
#Read all of the file names in a path (which is where we have saved all of the CSVs for a region)
file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Auckland")
file_list

In [8]:
#Make an empty dataframe for the 4 bits of information that we want to extract from the CSV
auckland_pet <- data.frame()
auckland_pop <- data.frame()
auckland_homes <- data.frame()
auckland_income <- data.frame()

In [9]:
auckland_pets = pet_reader(file_list, auckland_pet)
auckland_pets

Parsed with column specification:
cols(
  `Territorial Authority` = [31mcol_character()[39m,
  Topic = [31mcol_character()[39m,
  Measure = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  Value = [32mcol_double()[39m,
  ValueUnit = [31mcol_character()[39m,
  ValueLabel = [31mcol_character()[39m,
  NullReason = [31mcol_character()[39m,
  Date = [31mcol_character()[39m,
  DateLabel = [31mcol_character()[39m,
  Source = [31mcol_character()[39m
)



Territorial Authority,Measure,Category,Value
<chr>,<chr>,<chr>,<dbl>
Auckland,Registered dogs — Total registered dogs,All pure and cross breeds,106078
Auckland,Registered dogs — Total registered dogs,All pure breeds,66078
Auckland,Registered dogs — Total registered dogs,Beagle,668
Auckland,Registered dogs — Total registered dogs,Bichon Frise,1619
Auckland,Registered dogs — Total registered dogs,Boxer,893
Auckland,Registered dogs — Total registered dogs,Brazilian Fila,0
Auckland,Registered dogs — Total registered dogs,"Cattle, Australian",648
Auckland,Registered dogs — Total registered dogs,"Chihuahua, Long Coat",607
Auckland,Registered dogs — Total registered dogs,"Chihuahua, Smooth Coat",1006
Auckland,Registered dogs — Total registered dogs,"Collie, Bearded",273


In [10]:
auckland_pop = pop_reader(file_list, auckland_pop)
auckland_pop

Territorial.Authority,Measure,Category,Value
<fct>,<fct>,<fct>,<dbl>
Auckland,Population - residents at 2013 census,Total people,1415550


In [11]:
auckland_homes = home_reader(file_list, auckland_homes)
auckland_homes

Unnamed: 0_level_0,Territorial.Authority,Measure,Category,Value
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<dbl>
225,Auckland,Home ownership by households,Dwelling Owned or Partly Owned,201408
226,Auckland,Home ownership by households,Dwelling Not Owned and Not Held in a Family Trust,168705
227,Auckland,Home ownership by households,Dwelling Held in a Family Trust,67533
228,Auckland,Home ownership by households,Not Elsewhere Included,31848


In [12]:
auckland_income = income_reader(file_list, auckland_income)
auckland_income

Unnamed: 0_level_0,Territorial.Authority,Measure,Category,Value
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<dbl>
128,Auckland,Median personal income ($),,29600


We can repeat the steps we have taken with the Auckland region with all of the other regions in New Zealand

## Bay of Plenty

In [13]:
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Bay of Plenty")

file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Bay of Plenty")
file_list

In [14]:
bop_pet <- data.frame()
bop_pop <- data.frame()
bop_homes <- data.frame()
bop_income <- data.frame()

In [15]:
bop_pet = pet_reader(file_list, bop_pet)
bop_pet

bop_pop = pop_reader(file_list, bop_pop)
bop_pop

bop_homes = home_reader(file_list, bop_homes)
bop_homes

bop_income = income_reader(file_list, bop_income)
bop_income

Parsed with column specification:
cols(
  `Territorial Authority` = [31mcol_character()[39m,
  Topic = [31mcol_character()[39m,
  Measure = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  Value = [32mcol_double()[39m,
  ValueUnit = [31mcol_character()[39m,
  ValueLabel = [31mcol_character()[39m,
  NullReason = [31mcol_character()[39m,
  Date = [31mcol_character()[39m,
  DateLabel = [31mcol_character()[39m,
  Source = [31mcol_character()[39m
)

Parsed with column specification:
cols(
  `Territorial Authority` = [31mcol_character()[39m,
  Topic = [31mcol_character()[39m,
  Measure = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  Value = [32mcol_double()[39m,
  ValueUnit = [31mcol_character()[39m,
  ValueLabel = [31mcol_character()[39m,
  NullReason = [31mcol_character()[39m,
  Date = [31mcol_character()[39m,
  DateLabel = [31mcol_character()[39m,
  Source = [31mcol_character()[39m
)

Parsed with column specifi

Territorial Authority,Measure,Category,Value
<chr>,<chr>,<chr>,<dbl>
Kawerau,Registered dogs — Total registered dogs,All pure and cross breeds,1219
Kawerau,Registered dogs — Total registered dogs,All pure breeds,570
Kawerau,Registered dogs — Total registered dogs,Beagle,1
Kawerau,Registered dogs — Total registered dogs,Bichon Frise,14
Kawerau,Registered dogs — Total registered dogs,Boxer,4
Kawerau,Registered dogs — Total registered dogs,Brazilian Fila,0
Kawerau,Registered dogs — Total registered dogs,"Cattle, Australian",10
Kawerau,Registered dogs — Total registered dogs,"Chihuahua, Long Coat",20
Kawerau,Registered dogs — Total registered dogs,"Chihuahua, Smooth Coat",12
Kawerau,Registered dogs — Total registered dogs,"Collie, Bearded",3


Territorial.Authority,Measure,Category,Value
<fct>,<fct>,<fct>,<dbl>
Kawerau,Population - residents at 2013 census,Total people,6363
Opotiki,Population - residents at 2013 census,Total people,8436
Rotorua,Population - residents at 2013 census,Total people,65280
Tauranga,Population - residents at 2013 census,Total people,114789
Western Bay of Plenty,Population - residents at 2013 census,Total people,43692
Whakatane,Population - residents at 2013 census,Total people,32688


Unnamed: 0_level_0,Territorial.Authority,Measure,Category,Value
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<dbl>
220,Kawerau,Home ownership by households,Dwelling Owned or Partly Owned,1197
221,Kawerau,Home ownership by households,Dwelling Not Owned and Not Held in a Family Trust,810
222,Kawerau,Home ownership by households,Dwelling Held in a Family Trust,123
223,Kawerau,Home ownership by households,Not Elsewhere Included,246
2201,Opotiki,Home ownership by households,Dwelling Owned or Partly Owned,1305
2211,Opotiki,Home ownership by households,Dwelling Not Owned and Not Held in a Family Trust,1137
2221,Opotiki,Home ownership by households,Dwelling Held in a Family Trust,342
2231,Opotiki,Home ownership by households,Not Elsewhere Included,435
2202,Rotorua,Home ownership by households,Dwelling Owned or Partly Owned,10941
2212,Rotorua,Home ownership by households,Dwelling Not Owned and Not Held in a Family Trust,8412


Unnamed: 0_level_0,Territorial.Authority,Measure,Category,Value
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<dbl>
128,Kawerau,Median personal income ($),,18700
1281,Opotiki,Median personal income ($),,20700
1282,Rotorua,Median personal income ($),,26900
1283,Tauranga,Median personal income ($),,27100
1284,Western Bay of Plenty,Median personal income ($),,26300
1285,Whakatane,Median personal income ($),,25600


## Canterbury

In [16]:
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Canterbury")

file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Canterbury")
file_list

In [17]:
cant_pet <- data.frame()
cant_pop <- data.frame()
cant_homes <- data.frame()
cant_income <- data.frame()

In [18]:
cant_pet = pet_reader(file_list, cant_pet)
cant_pet

cant_pop = pop_reader(file_list, cant_pop)
cant_pop

cant_homes = home_reader(file_list, cant_homes)
cant_homes

cant_income = income_reader(file_list, cant_income)
cant_income

Parsed with column specification:
cols(
  `Territorial Authority` = [31mcol_character()[39m,
  Topic = [31mcol_character()[39m,
  Measure = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  Value = [32mcol_double()[39m,
  ValueUnit = [31mcol_character()[39m,
  ValueLabel = [31mcol_character()[39m,
  NullReason = [31mcol_character()[39m,
  Date = [31mcol_character()[39m,
  DateLabel = [31mcol_character()[39m,
  Source = [31mcol_character()[39m
)

Parsed with column specification:
cols(
  `Territorial Authority` = [31mcol_character()[39m,
  Topic = [31mcol_character()[39m,
  Measure = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  Value = [32mcol_double()[39m,
  ValueUnit = [31mcol_character()[39m,
  ValueLabel = [31mcol_character()[39m,
  NullReason = [31mcol_character()[39m,
  Date = [31mcol_character()[39m,
  DateLabel = [31mcol_character()[39m,
  Source = [31mcol_character()[39m
)

Parsed with column specifi

Territorial Authority,Measure,Category,Value
<chr>,<chr>,<chr>,<dbl>
Ashburton,Registered dogs — Total registered dogs,All pure and cross breeds,6405
Ashburton,Registered dogs — Total registered dogs,All pure breeds,4465
Ashburton,Registered dogs — Total registered dogs,Beagle,32
Ashburton,Registered dogs — Total registered dogs,Bichon Frise,116
Ashburton,Registered dogs — Total registered dogs,Boxer,33
Ashburton,Registered dogs — Total registered dogs,Brazilian Fila,0
Ashburton,Registered dogs — Total registered dogs,"Cattle, Australian",28
Ashburton,Registered dogs — Total registered dogs,"Chihuahua, Long Coat",26
Ashburton,Registered dogs — Total registered dogs,"Chihuahua, Smooth Coat",21
Ashburton,Registered dogs — Total registered dogs,"Collie, Bearded",42


Territorial.Authority,Measure,Category,Value
<fct>,<fct>,<fct>,<dbl>
Ashburton,Population - residents at 2013 census,Total people,31041
Chatham Islands,Population - residents at 2013 census,Total people,600
Christchurch,Population - residents at 2013 census,Total people,341469
Hurunui,Population - residents at 2013 census,Total people,11529
Kaikoura,Population - residents at 2013 census,Total people,3552
Mackenzie,Population - residents at 2013 census,Total people,4158
Selwyn,Population - residents at 2013 census,Total people,44595
Timaru,Population - residents at 2013 census,Total people,43932
Waimakariri,Population - residents at 2013 census,Total people,49989
Waimate,Population - residents at 2013 census,Total people,7536


Unnamed: 0_level_0,Territorial.Authority,Measure,Category,Value
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<dbl>
220,Ashburton,Home ownership by households,Dwelling Owned or Partly Owned,5910
221,Ashburton,Home ownership by households,Dwelling Not Owned and Not Held in a Family Trust,3867
222,Ashburton,Home ownership by households,Dwelling Held in a Family Trust,1833
223,Ashburton,Home ownership by households,Not Elsewhere Included,594
202,Chatham Islands,Home ownership by households,Dwelling Owned or Partly Owned,102
203,Chatham Islands,Home ownership by households,Dwelling Not Owned and Not Held in a Family Trust,105
204,Chatham Islands,Home ownership by households,Dwelling Held in a Family Trust,21
205,Chatham Islands,Home ownership by households,Not Elsewhere Included,18
2201,Christchurch,Home ownership by households,Dwelling Owned or Partly Owned,63468
2211,Christchurch,Home ownership by households,Dwelling Not Owned and Not Held in a Family Trust,43008


Unnamed: 0_level_0,Territorial.Authority,Measure,Category,Value
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<dbl>
128,Ashburton,Median personal income ($),,32900
1281,Chatham Islands,Median personal income ($),,30800
1282,Christchurch,Median personal income ($),,29800
1283,Hurunui,Median personal income ($),,28600
1284,Kaikoura,Median personal income ($),,26500
1285,Mackenzie,Median personal income ($),,29300
1286,Selwyn,Median personal income ($),,36100
1287,Timaru,Median personal income ($),,26900
1288,Waimakariri,Median personal income ($),,30300
1289,Waimate,Median personal income ($),,24800


## Gisbourne

In [19]:
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Gisbourne")

file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Gisbourne")
file_list

In [20]:
gisb_pet <- data.frame()
gisb_pop <- data.frame()
gisb_homes <- data.frame()
gisb_income <- data.frame()

In [21]:
gisb_pet = pet_reader(file_list, gisb_pet)
gisb_pet

gisb_pop = pop_reader(file_list, gisb_pop)
gisb_pop

gisb_homes = home_reader(file_list, gisb_homes)
gisb_homes

gisb_income = income_reader(file_list, gisb_income)
gisb_income

Parsed with column specification:
cols(
  `Territorial Authority` = [31mcol_character()[39m,
  Topic = [31mcol_character()[39m,
  Measure = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  Value = [32mcol_double()[39m,
  ValueUnit = [31mcol_character()[39m,
  ValueLabel = [31mcol_character()[39m,
  NullReason = [31mcol_character()[39m,
  Date = [31mcol_character()[39m,
  DateLabel = [31mcol_character()[39m,
  Source = [31mcol_character()[39m
)



Territorial Authority,Measure,Category,Value
<chr>,<chr>,<chr>,<dbl>
Gisborne,Registered dogs — Total registered dogs,All pure and cross breeds,9746
Gisborne,Registered dogs — Total registered dogs,All pure breeds,6702
Gisborne,Registered dogs — Total registered dogs,Beagle,16
Gisborne,Registered dogs — Total registered dogs,Bichon Frise,36
Gisborne,Registered dogs — Total registered dogs,Boxer,52
Gisborne,Registered dogs — Total registered dogs,Brazilian Fila,0
Gisborne,Registered dogs — Total registered dogs,"Cattle, Australian",79
Gisborne,Registered dogs — Total registered dogs,"Chihuahua, Long Coat",27
Gisborne,Registered dogs — Total registered dogs,"Chihuahua, Smooth Coat",19
Gisborne,Registered dogs — Total registered dogs,"Collie, Bearded",15


Territorial.Authority,Measure,Category,Value
<fct>,<fct>,<fct>,<dbl>
Gisborne,Population - residents at 2013 census,Total people,43653


Unnamed: 0_level_0,Territorial.Authority,Measure,Category,Value
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<dbl>
220,Gisborne,Home ownership by households,Dwelling Owned or Partly Owned,7080
221,Gisborne,Home ownership by households,Dwelling Not Owned and Not Held in a Family Trust,6006
222,Gisborne,Home ownership by households,Dwelling Held in a Family Trust,1632
223,Gisborne,Home ownership by households,Not Elsewhere Included,1275


Unnamed: 0_level_0,Territorial.Authority,Measure,Category,Value
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<dbl>
128,Gisborne,Median personal income ($),,24400


## Hawke's Bay

In [22]:
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Hawke's Bay")

file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Hawke's Bay")
file_list

In [23]:
hb_pet <- data.frame()
hb_pop <- data.frame()
hb_homes <- data.frame()
hb_income <- data.frame()

In [24]:
hb_pet = pet_reader(file_list, hb_pet)
hb_pet

hb_pop = pop_reader(file_list, hb_pop)
hb_pop

hb_homes = home_reader(file_list, hb_homes)
hb_homes

hb_income = income_reader(file_list, hb_income)
hb_income

Parsed with column specification:
cols(
  `Territorial Authority` = [31mcol_character()[39m,
  Topic = [31mcol_character()[39m,
  Measure = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  Value = [32mcol_double()[39m,
  ValueUnit = [31mcol_character()[39m,
  ValueLabel = [31mcol_character()[39m,
  NullReason = [31mcol_character()[39m,
  Date = [31mcol_character()[39m,
  DateLabel = [31mcol_character()[39m,
  Source = [31mcol_character()[39m
)

Parsed with column specification:
cols(
  `Territorial Authority` = [31mcol_character()[39m,
  Topic = [31mcol_character()[39m,
  Measure = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  Value = [32mcol_double()[39m,
  ValueUnit = [31mcol_character()[39m,
  ValueLabel = [31mcol_character()[39m,
  NullReason = [31mcol_character()[39m,
  Date = [31mcol_character()[39m,
  DateLabel = [31mcol_character()[39m,
  Source = [31mcol_character()[39m
)

Parsed with column specifi

Territorial Authority,Measure,Category,Value
<chr>,<chr>,<chr>,<dbl>
Central Hawke's Bay,Registered dogs — Total registered dogs,All pure and cross breeds,4953
Central Hawke's Bay,Registered dogs — Total registered dogs,All pure breeds,4020
Central Hawke's Bay,Registered dogs — Total registered dogs,Beagle,6
Central Hawke's Bay,Registered dogs — Total registered dogs,Bichon Frise,17
Central Hawke's Bay,Registered dogs — Total registered dogs,Boxer,14
Central Hawke's Bay,Registered dogs — Total registered dogs,Brazilian Fila,0
Central Hawke's Bay,Registered dogs — Total registered dogs,"Cattle, Australian",34
Central Hawke's Bay,Registered dogs — Total registered dogs,"Chihuahua, Long Coat",13
Central Hawke's Bay,Registered dogs — Total registered dogs,"Chihuahua, Smooth Coat",31
Central Hawke's Bay,Registered dogs — Total registered dogs,"Collie, Bearded",42


Territorial.Authority,Measure,Category,Value
<fct>,<fct>,<fct>,<dbl>
Central Hawke's Bay,Population - residents at 2013 census,Total people,12720
Hastings,Population - residents at 2013 census,Total people,73245
Napier,Population - residents at 2013 census,Total people,57240
Wairoa,Population - residents at 2013 census,Total people,7890


Unnamed: 0_level_0,Territorial.Authority,Measure,Category,Value
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<dbl>
220,Central Hawke's Bay,Home ownership by households,Dwelling Owned or Partly Owned,2403
221,Central Hawke's Bay,Home ownership by households,Dwelling Not Owned and Not Held in a Family Trust,1437
222,Central Hawke's Bay,Home ownership by households,Dwelling Held in a Family Trust,990
223,Central Hawke's Bay,Home ownership by households,Not Elsewhere Included,273
2201,Hastings,Home ownership by households,Dwelling Owned or Partly Owned,12177
2211,Hastings,Home ownership by households,Dwelling Not Owned and Not Held in a Family Trust,8379
2221,Hastings,Home ownership by households,Dwelling Held in a Family Trust,4347
2231,Hastings,Home ownership by households,Not Elsewhere Included,1788
2202,Napier,Home ownership by households,Dwelling Owned or Partly Owned,10977
2212,Napier,Home ownership by households,Dwelling Not Owned and Not Held in a Family Trust,7416


Unnamed: 0_level_0,Territorial.Authority,Measure,Category,Value
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<dbl>
128,Central Hawke's Bay,Median personal income ($),,26700
1281,Hastings,Median personal income ($),,26500
1282,Napier,Median personal income ($),,26000
1283,Wairoa,Median personal income ($),,21900


## Aggregration now 

Now we must aggregate. THis is becausae we do not want information about individual districts, we want information about the region as a whole. Therefore we can use the aggregrate function to aggregarate the dataframes by a metric we have given it (either sum or mean)

## Compress Pets

In [25]:
compress_pets = function(region){
        df<- aggregate(region['Value'], by=region['Category'], sum) 
       return(df)
}

In [26]:
auckland_pets = compress_pets(auckland_pets)
bop_pet = compress_pets(bop_pet)
cant_pet = compress_pets(cant_pet)
gisb_pet = compress_pets(gisb_pet)
hb_pet = compress_pets(hb_pet)

In [27]:
auckland_pets
bop_pet
cant_pet
gisb_pet
hb_pet

Category,Value
<chr>,<dbl>
All cross breeds,40000
All pure and cross breeds,106078
All pure breeds,66078
Beagle,668
Bichon Frise,1619
Boxer,893
Brazilian Fila,0
"Cattle, Australian",648
"Chihuahua, Long Coat",607
"Chihuahua, Smooth Coat",1006


Category,Value
<chr>,<dbl>
All cross breeds,19612
All pure and cross breeds,39480
All pure breeds,19868
Beagle,108
Bichon Frise,337
Boxer,282
Brazilian Fila,0
"Cattle, Australian",295
"Chihuahua, Long Coat",254
"Chihuahua, Smooth Coat",222


Category,Value
<chr>,<dbl>
All cross breeds,33535
All pure and cross breeds,95107
All pure breeds,61572
Beagle,454
Bichon Frise,1820
Boxer,794
Brazilian Fila,0
"Cattle, Australian",317
"Chihuahua, Long Coat",908
"Chihuahua, Smooth Coat",416


Category,Value
<chr>,<dbl>
All cross breeds,3044
All pure and cross breeds,9746
All pure breeds,6702
Beagle,16
Bichon Frise,36
Boxer,52
Brazilian Fila,0
"Cattle, Australian",79
"Chihuahua, Long Coat",27
"Chihuahua, Smooth Coat",19


Category,Value
<chr>,<dbl>
All cross breeds,8314
All pure and cross breeds,25308
All pure breeds,16994
Beagle,140
Bichon Frise,209
Boxer,142
Brazilian Fila,0
"Cattle, Australian",175
"Chihuahua, Long Coat",132
"Chihuahua, Smooth Coat",75


In [28]:
#Now we want to rename the columns in the dataframe to be more sensible 
#We will rename the category column to Breeds as this is what that column shows
#and rename each value column to the name of the region for the respective region


auckland_pets <- rename(auckland_pets, Breed = Category, Auckland = Value)
bop_pet <- rename(bop_pet, Breed = Category, Bay_of_Plenty = Value)
cant_pet <- rename(cant_pet, Breed = Category, Canterbury = Value)
gisb_pet <- rename(gisb_pet, Breed = Category, Gisbourne = Value)
hb_pet <- rename(hb_pet, Breed = Category, Hawkes_Bay = Value)


auckland_pets
bop_pet
cant_pet
gisb_pet
hb_pet

Breed,Auckland
<chr>,<dbl>
All cross breeds,40000
All pure and cross breeds,106078
All pure breeds,66078
Beagle,668
Bichon Frise,1619
Boxer,893
Brazilian Fila,0
"Cattle, Australian",648
"Chihuahua, Long Coat",607
"Chihuahua, Smooth Coat",1006


Breed,Bay_of_Plenty
<chr>,<dbl>
All cross breeds,19612
All pure and cross breeds,39480
All pure breeds,19868
Beagle,108
Bichon Frise,337
Boxer,282
Brazilian Fila,0
"Cattle, Australian",295
"Chihuahua, Long Coat",254
"Chihuahua, Smooth Coat",222


Breed,Canterbury
<chr>,<dbl>
All cross breeds,33535
All pure and cross breeds,95107
All pure breeds,61572
Beagle,454
Bichon Frise,1820
Boxer,794
Brazilian Fila,0
"Cattle, Australian",317
"Chihuahua, Long Coat",908
"Chihuahua, Smooth Coat",416


Breed,Gisbourne
<chr>,<dbl>
All cross breeds,3044
All pure and cross breeds,9746
All pure breeds,6702
Beagle,16
Bichon Frise,36
Boxer,52
Brazilian Fila,0
"Cattle, Australian",79
"Chihuahua, Long Coat",27
"Chihuahua, Smooth Coat",19


Breed,Hawkes_Bay
<chr>,<dbl>
All cross breeds,8314
All pure and cross breeds,25308
All pure breeds,16994
Beagle,140
Bichon Frise,209
Boxer,142
Brazilian Fila,0
"Cattle, Australian",175
"Chihuahua, Long Coat",132
"Chihuahua, Smooth Coat",75


## Stitch them all together

In [29]:
a_b <- auckland_pets %>%
 left_join(bop_pet, by = "Breed")

a_b_c <- a_b %>%
 left_join(cant_pet, by = "Breed")

a_b_c_g <- a_b_c %>%
 left_join(gisb_pet, by = "Breed")

final_pets <- a_b_c_g %>%
 left_join(hb_pet, by = "Breed")


In [30]:
final_pets


Breed,Auckland,Bay_of_Plenty,Canterbury,Gisbourne,Hawkes_Bay
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
All cross breeds,40000,19612,33535,3044,8314
All pure and cross breeds,106078,39480,95107,9746,25308
All pure breeds,66078,19868,61572,6702,16994
Beagle,668,108,454,16,140
Bichon Frise,1619,337,1820,36,209
Boxer,893,282,794,52,142
Brazilian Fila,0,0,0,0,0
"Cattle, Australian",648,295,317,79,175
"Chihuahua, Long Coat",607,254,908,27,132
"Chihuahua, Smooth Coat",1006,222,416,19,75


## Population

In [31]:
compress_pop = function(region){
    df <- aggregate(region["Value"], by = region['Category'], sum)
    return(df)
}

In [32]:
#Do the same as we did for pets but all in one cell. 
#So do the compress function to aggregrate the dataframes to be just regionwide counts
auckland_pop = compress_pop(auckland_pop)
bop_pop = compress_pop(bop_pop)
cant_pop = compress_pop(cant_pop)
gisb_pop = compress_pop(gisb_pop)
hb_pop = compress_pop(hb_pop)

#And rename the columns in the dataframes to be sensible
auckland_pop  <- rename(auckland_pop, Total_People = Category, Auckland = Value) 
bop_pop <- rename(bop_pop, Total_People = Category, Bay_Of_Plenty = Value) 
cant_pop <- rename(cant_pop, Total_People = Category, Canterbury = Value) 
gisb_pop <- rename(gisb_pop, Total_People = Category, Gisbourne = Value) 
hb_pop <- rename(hb_pop, Total_People = Category, Hawks_Bay = Value) 

In [33]:
#Stitch them together
a_b <- auckland_pop %>%
 left_join(bop_pop, by = "Total_People")

a_b_c <- a_b %>%
 left_join(cant_pop, by = "Total_People")

a_b_c_g <- a_b_c %>%
 left_join(gisb_pop, by = "Total_People")

final_pops <- a_b_c_g %>%
 left_join(hb_pop, by = "Total_People")

In [34]:
final_pops

Total_People,Auckland,Bay_Of_Plenty,Canterbury,Gisbourne,Hawks_Bay
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Total people,1415550,271248,559227,43653,151095


## Income

In [35]:
compress_inc = function(region){
        df<- aggregate(region['Value'], by=region['Measure'], mean) 
       return(df)
}

In [36]:
auckland_income = compress_inc(auckland_income)
bop_income = compress_inc(bop_income)
cant_income = compress_inc(cant_income)
gisb_income = compress_inc(gisb_income)
hb_income = compress_inc(hb_income)

In [37]:
auckland_income <- rename(auckland_income, Median_Income = Measure, Auckland = Value) 
bop_income <- rename(bop_income, Median_Income = Measure, Bay_Of_Plenty = Value) 
cant_income <- rename(cant_income, Median_Income = Measure, Canterbury = Value) 
gisb_income <- rename(gisb_income, Median_Income = Measure, Gisbourne = Value) 
hb_income<- rename(hb_income, Median_Income = Measure, Hawks_Bay = Value) 

In [38]:
#Stitch them together
a_b <- auckland_income %>%
 left_join(bop_income, by = "Median_Income")

a_b_c <- a_b %>%
 left_join(cant_income, by = "Median_Income")

a_b_c_g <- a_b_c %>%
 left_join(gisb_income, by = "Median_Income")

final_incomes <- a_b_c_g %>%
 left_join(hb_income, by = "Median_Income")

In [39]:
final_incomes


Median_Income,Auckland,Bay_Of_Plenty,Canterbury,Gisbourne,Hawks_Bay
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Median personal income ($),29600,24216.67,29209.09,24400,25275


## Home Ownership

In [40]:
compress = function(region){
        df<- aggregate(region['Value'], by=region['Category'], sum) 
       return(df)
}

In [41]:
auckland_homes = compress(auckland_homes)
bop_homes = compress(bop_homes)
cant_homes = compress(cant_homes)
gisb_homes = compress(gisb_homes)
hb_homes = compress(hb_homes)

In [42]:
auckland_homes <- rename(auckland_homes, Total_People = Category, Auckland = Value) 
bop_homes <- rename(bop_homes, Total_People = Category, Bay_Of_Plenty = Value) 
cant_homes <- rename(cant_homes, Total_People = Category, Canterbury = Value) 
gisb_homes <- rename(gisb_homes, Total_People = Category, Gisbourne = Value) 
hb_homes<- rename(hb_homes, Total_People = Category, Hawks_Bay = Value) 

In [43]:
#Stitch them together
a_b <- auckland_homes %>%
 left_join(bop_homes, by = "Total_People")

a_b_c <- a_b %>%
 left_join(cant_homes, by = "Total_People")

a_b_c_g <- a_b_c %>%
 left_join(gisb_homes, by = "Total_People")

final_homes <- a_b_c_g %>%
 left_join(hb_homes, by = "Total_People")

In [44]:
final_homes

Total_People,Auckland,Bay_Of_Plenty,Canterbury,Gisbourne,Hawks_Bay
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Dwelling Held in a Family Trust,67533,16167,27543,1632,8694
Dwelling Not Owned and Not Held in a Family Trust,168705,33927,63795,6006,18399
Dwelling Owned or Partly Owned,201408,45918,110733,7080,26919
Not Elsewhere Included,31848,7476,11151,1275,3600


## So now we have 5 of the regions in NZ, we want to get the other regions as well

## Looking at Taranki region - West Coast Region

Get the pets information, the population data, the home ownership and the income

## Taranaki

In [45]:
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Taranaki")

In [46]:
getwd()

In [47]:
#get a list of files in the taranaki region
file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Taranaki")
file_list

In [48]:
#initiate a blank data frame, each iteration of the loop will append the data from the given file to this variable
taranaki_pets <- data.frame()
taranaki_pop <- data.frame()
taranaki_homes <- data.frame()
taranaki_income <- data.frame()

In [49]:
taranaki_pets = pet_reader(file_list, taranaki_pets)
taranaki_pets

taranaki_pop = pop_reader(file_list, taranaki_pop)
taranaki_pop

taranaki_homes = home_reader(file_list, taranaki_homes)
taranaki_homes

taranaki_income = income_reader(file_list, taranaki_income)
taranaki_income

Parsed with column specification:
cols(
  `Territorial Authority` = [31mcol_character()[39m,
  Topic = [31mcol_character()[39m,
  Measure = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  Value = [32mcol_double()[39m,
  ValueUnit = [31mcol_character()[39m,
  ValueLabel = [31mcol_character()[39m,
  NullReason = [31mcol_character()[39m,
  Date = [31mcol_character()[39m,
  DateLabel = [31mcol_character()[39m,
  Source = [31mcol_character()[39m
)

Parsed with column specification:
cols(
  `Territorial Authority` = [31mcol_character()[39m,
  Topic = [31mcol_character()[39m,
  Measure = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  Value = [32mcol_double()[39m,
  ValueUnit = [31mcol_character()[39m,
  ValueLabel = [31mcol_character()[39m,
  NullReason = [31mcol_character()[39m,
  Date = [31mcol_character()[39m,
  DateLabel = [31mcol_character()[39m,
  Source = [31mcol_character()[39m
)

Parsed with column specifi

Territorial Authority,Measure,Category,Value
<chr>,<chr>,<chr>,<dbl>
New Plymouth,Registered dogs — Total registered dogs,All pure and cross breeds,11296
New Plymouth,Registered dogs — Total registered dogs,All pure breeds,6661
New Plymouth,Registered dogs — Total registered dogs,Beagle,38
New Plymouth,Registered dogs — Total registered dogs,Bichon Frise,175
New Plymouth,Registered dogs — Total registered dogs,Boxer,100
New Plymouth,Registered dogs — Total registered dogs,Brazilian Fila,0
New Plymouth,Registered dogs — Total registered dogs,"Cattle, Australian",85
New Plymouth,Registered dogs — Total registered dogs,"Chihuahua, Long Coat",55
New Plymouth,Registered dogs — Total registered dogs,"Chihuahua, Smooth Coat",79
New Plymouth,Registered dogs — Total registered dogs,"Collie, Bearded",43


Territorial.Authority,Measure,Category,Value
<fct>,<fct>,<fct>,<dbl>
New Plymouth,Population - residents at 2013 census,Total people,74187
South Taranaki,Population - residents at 2013 census,Total people,26577
Stratford,Population - residents at 2013 census,Total people,8991


Unnamed: 0_level_0,Territorial.Authority,Measure,Category,Value
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<dbl>
220,New Plymouth,Home ownership by households,Dwelling Owned or Partly Owned,14964
221,New Plymouth,Home ownership by households,Dwelling Not Owned and Not Held in a Family Trust,8340
222,New Plymouth,Home ownership by households,Dwelling Held in a Family Trust,4284
223,New Plymouth,Home ownership by households,Not Elsewhere Included,1617
2201,South Taranaki,Home ownership by households,Dwelling Owned or Partly Owned,4542
2211,South Taranaki,Home ownership by households,Dwelling Not Owned and Not Held in a Family Trust,3588
2221,South Taranaki,Home ownership by households,Dwelling Held in a Family Trust,1479
2231,South Taranaki,Home ownership by households,Not Elsewhere Included,735
2202,Stratford,Home ownership by households,Dwelling Owned or Partly Owned,1602
2212,Stratford,Home ownership by households,Dwelling Not Owned and Not Held in a Family Trust,1053


Unnamed: 0_level_0,Territorial.Authority,Measure,Category,Value
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<dbl>
128,New Plymouth,Median personal income ($),,29100
1281,South Taranaki,Median personal income ($),,29200
1282,Stratford,Median personal income ($),,28300


## Tasman

In [50]:
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Tasman")
getwd()

In [51]:
file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Tasman")
file_list

In [52]:
tasman_pets <- data.frame()
tasman_pop <- data.frame()
tasman_homes <- data.frame()
tasman_income <- data.frame()

In [53]:
tasman_pets = pet_reader(file_list, tasman_pets)
tasman_pets

tasman_pop = pop_reader(file_list, tasman_pop)
tasman_pop

tasman_homes = home_reader(file_list, tasman_homes)
tasman_homes

tasman_income = income_reader(file_list, tasman_income)
tasman_income

Parsed with column specification:
cols(
  `Territorial Authority` = [31mcol_character()[39m,
  Topic = [31mcol_character()[39m,
  Measure = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  Value = [32mcol_double()[39m,
  ValueUnit = [31mcol_character()[39m,
  ValueLabel = [31mcol_character()[39m,
  NullReason = [31mcol_character()[39m,
  Date = [31mcol_character()[39m,
  DateLabel = [31mcol_character()[39m,
  Source = [31mcol_character()[39m
)



Territorial Authority,Measure,Category,Value
<chr>,<chr>,<chr>,<dbl>
Tasman,Registered dogs — Total registered dogs,All pure and cross breeds,10919
Tasman,Registered dogs — Total registered dogs,All pure breeds,6178
Tasman,Registered dogs — Total registered dogs,Beagle,35
Tasman,Registered dogs — Total registered dogs,Bichon Frise,101
Tasman,Registered dogs — Total registered dogs,Boxer,78
Tasman,Registered dogs — Total registered dogs,Brazilian Fila,0
Tasman,Registered dogs — Total registered dogs,"Cattle, Australian",159
Tasman,Registered dogs — Total registered dogs,"Chihuahua, Long Coat",35
Tasman,Registered dogs — Total registered dogs,"Chihuahua, Smooth Coat",47
Tasman,Registered dogs — Total registered dogs,"Collie, Bearded",101


Territorial.Authority,Measure,Category,Value
<fct>,<fct>,<fct>,<dbl>
Tasman,Population - residents at 2013 census,Total people,47154


Unnamed: 0_level_0,Territorial.Authority,Measure,Category,Value
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<dbl>
220,Tasman,Home ownership by households,Dwelling Owned or Partly Owned,10197
221,Tasman,Home ownership by households,Dwelling Not Owned and Not Held in a Family Trust,4356
222,Tasman,Home ownership by households,Dwelling Held in a Family Trust,2850
223,Tasman,Home ownership by households,Not Elsewhere Included,864


Unnamed: 0_level_0,Territorial.Authority,Measure,Category,Value
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<dbl>
128,Tasman,Median personal income ($),,25700


## Waikato

In [54]:
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Waikato")
getwd()

In [55]:
file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Waikato")
file_list

In [56]:
waikato_pets <- data.frame()
waikato_pop <- data.frame()
waikato_homes <- data.frame()
waikato_income <- data.frame()

In [57]:
waikato_pets = pet_reader(file_list, waikato_pets)
waikato_pets

waikato_pop = pop_reader(file_list, waikato_pop)
waikato_pop

waikato_homes = home_reader(file_list, waikato_homes)
tasman_homes

waikato_income = income_reader(file_list, waikato_income)
waikato_income

Parsed with column specification:
cols(
  `Territorial Authority` = [31mcol_character()[39m,
  Topic = [31mcol_character()[39m,
  Measure = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  Value = [32mcol_double()[39m,
  ValueUnit = [31mcol_character()[39m,
  ValueLabel = [31mcol_character()[39m,
  NullReason = [31mcol_character()[39m,
  Date = [31mcol_character()[39m,
  DateLabel = [31mcol_character()[39m,
  Source = [31mcol_character()[39m
)

Parsed with column specification:
cols(
  `Territorial Authority` = [31mcol_character()[39m,
  Topic = [31mcol_character()[39m,
  Measure = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  Value = [32mcol_double()[39m,
  ValueUnit = [31mcol_character()[39m,
  ValueLabel = [31mcol_character()[39m,
  NullReason = [31mcol_character()[39m,
  Date = [31mcol_character()[39m,
  DateLabel = [31mcol_character()[39m,
  Source = [31mcol_character()[39m
)

Parsed with column specifi

Territorial Authority,Measure,Category,Value
<chr>,<chr>,<chr>,<dbl>
Hamilton,Registered dogs — Total registered dogs,Boxer,56
Hamilton,Registered dogs — Total registered dogs,All pure and cross breeds,9737
Hamilton,Registered dogs — Total registered dogs,All pure breeds,4762
Hamilton,Registered dogs — Total registered dogs,Beagle,55
Hamilton,Registered dogs — Total registered dogs,Bichon Frise,134
Hamilton,Registered dogs — Total registered dogs,Brazilian Fila,0
Hamilton,Registered dogs — Total registered dogs,"Cattle, Australian",24
Hamilton,Registered dogs — Total registered dogs,"Chihuahua, Long Coat",111
Hamilton,Registered dogs — Total registered dogs,"Chihuahua, Smooth Coat",63
Hamilton,Registered dogs — Total registered dogs,"Collie, Bearded",19


Territorial.Authority,Measure,Category,Value
<fct>,<fct>,<fct>,<dbl>
Hamilton,Population - residents at 2013 census,Total people,141612
Hauraki,Population - residents at 2013 census,Total people,17811
Matamata-Piako,Population - residents at 2013 census,Total people,31536
Otorohanga,Population - residents at 2013 census,Total people,9138
South Waikato,Population - residents at 2013 census,Total people,22071
Taupo,Population - residents at 2013 census,Total people,32907
Thames-Coromandel,Population - residents at 2013 census,Total people,26178
Waikato,Population - residents at 2013 census,Total people,63378
Waipa,Population - residents at 2013 census,Total people,46668
Waitomo,Population - residents at 2013 census,Total people,8907


Unnamed: 0_level_0,Territorial.Authority,Measure,Category,Value
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<dbl>
220,Tasman,Home ownership by households,Dwelling Owned or Partly Owned,10197
221,Tasman,Home ownership by households,Dwelling Not Owned and Not Held in a Family Trust,4356
222,Tasman,Home ownership by households,Dwelling Held in a Family Trust,2850
223,Tasman,Home ownership by households,Not Elsewhere Included,864


Unnamed: 0_level_0,Territorial.Authority,Measure,Category,Value
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<dbl>
128,Hamilton,Median personal income ($),,27700
1281,Hauraki,Median personal income ($),,23100
1282,Matamata-Piako,Median personal income ($),,29700
1283,Otorohanga,Median personal income ($),,29100
1284,South Waikato,Median personal income ($),,23700
1285,Taupo,Median personal income ($),,28200
1286,Thames-Coromandel,Median personal income ($),,23200
1287,Waikato,Median personal income ($),,30500
1288,Waipa,Median personal income ($),,32000
1289,Waitomo,Median personal income ($),,25600


## Wellington

In [58]:
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Wellington")
getwd()

In [59]:
file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Wellington")
file_list

In [60]:
wellington_pets <- data.frame()
wellington_pop <- data.frame()
wellington_homes <- data.frame()
wellington_income <- data.frame()

In [61]:
wellington_pets = pet_reader(file_list, wellington_pets)
wellington_pets

wellington_pop = pop_reader(file_list, wellington_pop)
wellington_pop

wellington_homes = home_reader(file_list, wellington_homes)
wellington_homes

wellington_income = income_reader(file_list, wellington_income)
wellington_income

Parsed with column specification:
cols(
  `Territorial Authority` = [31mcol_character()[39m,
  Topic = [31mcol_character()[39m,
  Measure = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  Value = [32mcol_double()[39m,
  ValueUnit = [31mcol_character()[39m,
  ValueLabel = [31mcol_character()[39m,
  NullReason = [31mcol_character()[39m,
  Date = [31mcol_character()[39m,
  DateLabel = [31mcol_character()[39m,
  Source = [31mcol_character()[39m
)

Parsed with column specification:
cols(
  `Territorial Authority` = [31mcol_character()[39m,
  Topic = [31mcol_character()[39m,
  Measure = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  Value = [32mcol_double()[39m,
  ValueUnit = [31mcol_character()[39m,
  ValueLabel = [31mcol_character()[39m,
  NullReason = [31mcol_character()[39m,
  Date = [31mcol_character()[39m,
  DateLabel = [31mcol_character()[39m,
  Source = [31mcol_character()[39m
)

Parsed with column specifi

Territorial Authority,Measure,Category,Value
<chr>,<chr>,<chr>,<dbl>
Carterton,Registered dogs — Total registered dogs,All pure and cross breeds,2622
Carterton,Registered dogs — Total registered dogs,All pure breeds,1631
Carterton,Registered dogs — Total registered dogs,Beagle,8
Carterton,Registered dogs — Total registered dogs,Bichon Frise,17
Carterton,Registered dogs — Total registered dogs,Boxer,12
Carterton,Registered dogs — Total registered dogs,Brazilian Fila,0
Carterton,Registered dogs — Total registered dogs,"Cattle, Australian",10
Carterton,Registered dogs — Total registered dogs,"Chihuahua, Long Coat",16
Carterton,Registered dogs — Total registered dogs,"Chihuahua, Smooth Coat",13
Carterton,Registered dogs — Total registered dogs,"Collie, Bearded",12


Territorial.Authority,Measure,Category,Value
<fct>,<fct>,<fct>,<dbl>
Carterton,Population - residents at 2013 census,Total people,8235
Kapiti Coast,Population - residents at 2013 census,Total people,49104
Lower Hutt,Population - residents at 2013 census,Total people,98238
Masterton,Population - residents at 2013 census,Total people,23352
Porirua,Population - residents at 2013 census,Total people,51717
South Wairarapa,Population - residents at 2013 census,Total people,9528
Upper Hutt,Population - residents at 2013 census,Total people,40179
Wellington,Population - residents at 2013 census,Total people,190959
Westland,Population - residents at 2013 census,Total people,8304


Unnamed: 0_level_0,Territorial.Authority,Measure,Category,Value
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<dbl>
221,Carterton,Home ownership by households,Dwelling Owned or Partly Owned,1965
222,Carterton,Home ownership by households,Dwelling Not Owned and Not Held in a Family Trust,822
223,Carterton,Home ownership by households,Dwelling Held in a Family Trust,351
224,Carterton,Home ownership by households,Not Elsewhere Included,159
2211,Kapiti Coast,Home ownership by households,Dwelling Owned or Partly Owned,11472
2221,Kapiti Coast,Home ownership by households,Dwelling Not Owned and Not Held in a Family Trust,4959
2231,Kapiti Coast,Home ownership by households,Dwelling Held in a Family Trust,3054
2241,Kapiti Coast,Home ownership by households,Not Elsewhere Included,990
2212,Lower Hutt,Home ownership by households,Dwelling Owned or Partly Owned,17790
2222,Lower Hutt,Home ownership by households,Dwelling Not Owned and Not Held in a Family Trust,11442


Unnamed: 0_level_0,Territorial.Authority,Measure,Category,Value
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<dbl>
128,Carterton,Median personal income ($),,26700
1281,Kapiti Coast,Median personal income ($),,26900
1282,Lower Hutt,Median personal income ($),,31500
1283,Masterton,Median personal income ($),,25300
1284,Porirua,Median personal income ($),,31400
1285,South Wairarapa,Median personal income ($),,28900
1286,Upper Hutt,Median personal income ($),,32000
1287,Wellington,Median personal income ($),,37900
1288,Westland,Median personal income ($),,28900


## West Coast

In [62]:
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/West Coast")
getwd()

In [63]:
file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/West Coast")
file_list

In [64]:
west_coast_pets <- data.frame()
west_coast_pop <- data.frame()
west_coast_homes <- data.frame()
west_coast_income <- data.frame()

In [65]:
west_coast_pets = pet_reader(file_list, west_coast_pets)
west_coast_pets

west_coast_pop = pop_reader(file_list, west_coast_pop)
west_coast_pop

west_coast_homes = home_reader(file_list, west_coast_homes)
west_coast_homes

west_coast_income = income_reader(file_list, west_coast_income)
west_coast_income

Parsed with column specification:
cols(
  `Territorial Authority` = [31mcol_character()[39m,
  Topic = [31mcol_character()[39m,
  Measure = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  Value = [32mcol_double()[39m,
  ValueUnit = [31mcol_character()[39m,
  ValueLabel = [31mcol_character()[39m,
  NullReason = [31mcol_character()[39m,
  Date = [31mcol_character()[39m,
  DateLabel = [31mcol_character()[39m,
  Source = [31mcol_character()[39m
)

Parsed with column specification:
cols(
  `Territorial Authority` = [31mcol_character()[39m,
  Topic = [31mcol_character()[39m,
  Measure = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  Value = [32mcol_double()[39m,
  ValueUnit = [31mcol_character()[39m,
  ValueLabel = [31mcol_character()[39m,
  NullReason = [31mcol_character()[39m,
  Date = [31mcol_character()[39m,
  DateLabel = [31mcol_character()[39m,
  Source = [31mcol_character()[39m
)



Territorial Authority,Measure,Category,Value
<chr>,<chr>,<chr>,<dbl>
Buller,Registered dogs — Total registered dogs,All pure and cross breeds,2106
Buller,Registered dogs — Total registered dogs,All pure breeds,1150
Buller,Registered dogs — Total registered dogs,Beagle,9
Buller,Registered dogs — Total registered dogs,Bichon Frise,58
Buller,Registered dogs — Total registered dogs,Boxer,11
Buller,Registered dogs — Total registered dogs,Brazilian Fila,0
Buller,Registered dogs — Total registered dogs,"Cattle, Australian",5
Buller,Registered dogs — Total registered dogs,"Chihuahua, Long Coat",6
Buller,Registered dogs — Total registered dogs,"Chihuahua, Smooth Coat",13
Buller,Registered dogs — Total registered dogs,"Collie, Bearded",12


Territorial.Authority,Measure,Category,Value
<fct>,<fct>,<fct>,<dbl>
Buller,Population - residents at 2013 census,Total people,10473
Grey,Population - residents at 2013 census,Total people,13371


Unnamed: 0_level_0,Territorial.Authority,Measure,Category,Value
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<dbl>
220,Buller,Home ownership by households,Dwelling Owned or Partly Owned,2442
221,Buller,Home ownership by households,Dwelling Not Owned and Not Held in a Family Trust,1317
222,Buller,Home ownership by households,Dwelling Held in a Family Trust,339
223,Buller,Home ownership by households,Not Elsewhere Included,339
2201,Grey,Home ownership by households,Dwelling Owned or Partly Owned,2931
2211,Grey,Home ownership by households,Dwelling Not Owned and Not Held in a Family Trust,1464
2221,Grey,Home ownership by households,Dwelling Held in a Family Trust,450
2231,Grey,Home ownership by households,Not Elsewhere Included,510


Unnamed: 0_level_0,Territorial.Authority,Measure,Category,Value
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<dbl>
128,Buller,Median personal income ($),,25200
1281,Grey,Median personal income ($),,26600


## Aggregration now for these. 

## Aggregrate the pet information 

In [66]:
taranaki_pets = compress_pets(taranaki_pets)
tasman_pets = compress_pets(tasman_pets)
waikato_pets = compress_pets(waikato_pets)
wellington_pets = compress_pets(wellington_pets)
west_coast_pets = compress_pets(west_coast_pets)


In [67]:
taranaki_pets <- rename(taranaki_pets, Breed = Category, Taranaki = Value)
tasman_pets <- rename(tasman_pets, Breed = Category, Tasman = Value)
waikato_pets <- rename(waikato_pets, Breed = Category, Waikato = Value)
wellington_pets <- rename(wellington_pets, Breed = Category, Wellington = Value)
west_coast_pets <- rename(west_coast_pets, Breed = Category, West_Coast = Value)


In [68]:
taranaki_pets
tasman_pets
waikato_pets
wellington_pets
west_coast_pets

Breed,Taranaki
<chr>,<dbl>
All cross breeds,6548
All pure and cross breeds,17790
All pure breeds,11242
Beagle,54
Bichon Frise,287
Boxer,157
Brazilian Fila,0
"Cattle, Australian",142
"Chihuahua, Long Coat",123
"Chihuahua, Smooth Coat",101


Breed,Tasman
<chr>,<dbl>
All cross breeds,4741
All pure and cross breeds,10919
All pure breeds,6178
Beagle,35
Bichon Frise,101
Boxer,78
Brazilian Fila,0
"Cattle, Australian",159
"Chihuahua, Long Coat",35
"Chihuahua, Smooth Coat",47


Breed,Waikato
<chr>,<dbl>
All cross breeds,24237
All pure and cross breeds,61601
All pure breeds,37364
Beagle,224
Bichon Frise,515
Boxer,409
Brazilian Fila,0
"Cattle, Australian",702
"Chihuahua, Long Coat",434
"Chihuahua, Smooth Coat",337


Breed,Wellington
<chr>,<dbl>
All cross breeds,22329
All pure and cross breeds,50760
All pure breeds,28431
Beagle,280
Bichon Frise,620
Boxer,291
Brazilian Fila,0
"Cattle, Australian",256
"Chihuahua, Long Coat",351
"Chihuahua, Smooth Coat",231


Breed,West_Coast
<chr>,<dbl>
All cross breeds,2005
All pure and cross breeds,4743
All pure breeds,2738
Beagle,17
Bichon Frise,128
Boxer,33
Brazilian Fila,0
"Cattle, Australian",29
"Chihuahua, Long Coat",16
"Chihuahua, Smooth Coat",49


In [69]:
tar_tas <- taranaki_pets%>%
 left_join(tasman_pets, by = "Breed")

tar_tas_wai<- tar_tas %>%
 left_join(waikato_pets, by = "Breed")

tar_tas_wai_wel <- tar_tas_wai %>%
 left_join(wellington_pets, by = "Breed")

tar_to_wc_pets <- tar_tas_wai_wel %>%
 left_join(west_coast_pets, by = "Breed")



In [70]:
tar_to_wc_pets

Breed,Taranaki,Tasman,Waikato,Wellington,West_Coast
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
All cross breeds,6548,4741,24237,22329,2005
All pure and cross breeds,17790,10919,61601,50760,4743
All pure breeds,11242,6178,37364,28431,2738
Beagle,54,35,224,280,17
Bichon Frise,287,101,515,620,128
Boxer,157,78,409,291,33
Brazilian Fila,0,0,0,0,0
"Cattle, Australian",142,159,702,256,29
"Chihuahua, Long Coat",123,35,434,351,16
"Chihuahua, Smooth Coat",101,47,337,231,49


## Next we want to add these to the previous larger pet dataframe made

In [71]:
final_pets <- final_pets %>%
 left_join(tar_to_wc_pets, by = "Breed")

In [72]:
final_pets

Breed,Auckland,Bay_of_Plenty,Canterbury,Gisbourne,Hawkes_Bay,Taranaki,Tasman,Waikato,Wellington,West_Coast
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
All cross breeds,40000,19612,33535,3044,8314,6548,4741,24237,22329,2005
All pure and cross breeds,106078,39480,95107,9746,25308,17790,10919,61601,50760,4743
All pure breeds,66078,19868,61572,6702,16994,11242,6178,37364,28431,2738
Beagle,668,108,454,16,140,54,35,224,280,17
Bichon Frise,1619,337,1820,36,209,287,101,515,620,128
Boxer,893,282,794,52,142,157,78,409,291,33
Brazilian Fila,0,0,0,0,0,0,0,0,0,0
"Cattle, Australian",648,295,317,79,175,142,159,702,256,29
"Chihuahua, Long Coat",607,254,908,27,132,123,35,434,351,16
"Chihuahua, Smooth Coat",1006,222,416,19,75,101,47,337,231,49


## Compressing the Income, population and home ownership

## Taranaki - West Coast: Population

In [73]:
taranaki_pop = compress_pop(taranaki_pop )
tasman_pop = compress_pop(tasman_pop)
waikato_pop = compress_pop(waikato_pop)
wellington_pop = compress_pop(wellington_pop)
west_coast_pop = compress_pop(west_coast_pop)


In [74]:
taranaki_pop  <- rename(taranaki_pop, Total_People = Category, Taranaki = Value) 
tasman_pop <- rename(tasman_pop, Total_People = Category, Tasman = Value) 
waikato_pop <- rename(waikato_pop, Total_People = Category, Waikato = Value) 
wellington_pop <- rename(wellington_pop, Total_People = Category, Wellington = Value) 
west_coast_pop <- rename(west_coast_pop, Total_People = Category, West_Coast = Value) 

In [75]:
#Stitch them together

a_b <- taranaki_pop %>%
 left_join(tasman_pop, by = "Total_People")

a_b_c <- a_b %>%
 left_join(waikato_pop, by = "Total_People")

a_b_c_g <- a_b_c %>%
 left_join(wellington_pop, by = "Total_People")

tar_to_west_pop <- a_b_c_g %>%
 left_join(west_coast_pop, by = "Total_People")


tar_to_west_pop  

Total_People,Taranaki,Tasman,Waikato,Wellington,West_Coast
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Total people,109755,47154,400206,479616,23844


Then combine these with the previous dataframe

In [76]:
final_pops <- final_pops %>%
 left_join(tar_to_west_pop, by = "Total_People")

In [77]:
final_pops

Total_People,Auckland,Bay_Of_Plenty,Canterbury,Gisbourne,Hawks_Bay,Taranaki,Tasman,Waikato,Wellington,West_Coast
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Total people,1415550,271248,559227,43653,151095,109755,47154,400206,479616,23844


## Income

In [78]:
#Compress the the regions again, dont want to know about the individual districts 
taranaki_income = compress_inc(taranaki_income)
tasman_income = compress_inc(tasman_income)
waikato_income = compress_inc(waikato_income)
wellington_income = compress_inc(wellington_income)
west_coast_income = compress_inc(west_coast_income)

In [79]:
#Rename them so that we have the region names in the dataframe
taranaki_income <- rename(taranaki_income, Median_Income = Measure, Taranaki = Value) 
tasman_income <- rename(tasman_income, Median_Income = Measure, Tasman = Value) 
waikato_income <- rename(waikato_income, Median_Income = Measure, Waikato = Value) 
wellington_income <- rename(wellington_income, Median_Income = Measure, Wellington = Value) 
west_coast_income<- rename(west_coast_income, Median_Income = Measure, West_Coast = Value) 

In [80]:
#Stitch them together
a_b <- taranaki_income %>%
 left_join(tasman_income, by = "Median_Income")

a_b_c <- a_b %>%
 left_join(waikato_income, by = "Median_Income")

a_b_c_g <- a_b_c %>%
 left_join(wellington_income, by = "Median_Income")

tar_to_wc_income <- a_b_c_g %>%
 left_join(west_coast_income, by = "Median_Income")

Join it with the previous income dataframe

In [81]:
final_incomes  <- final_incomes %>%
 left_join(tar_to_wc_income, by = "Median_Income")

In [82]:
final_incomes

Median_Income,Auckland,Bay_Of_Plenty,Canterbury,Gisbourne,Hawks_Bay,Taranaki,Tasman,Waikato,Wellington,West_Coast
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Median personal income ($),29600,24216.67,29209.09,24400,25275,28866.67,25700,27280,29944.44,25900


## Home Ownership

In [83]:
taranaki_homes = compress(taranaki_homes)
tasman_homes = compress(tasman_homes)
waikato_homes = compress(waikato_homes)
wellington_homes = compress(wellington_homes)
west_coast_homes = compress(west_coast_homes)

In [84]:
taranaki_homes <- rename(taranaki_homes, Total_People = Category, Taranaki = Value) 
tasman_homes <- rename(tasman_homes, Total_People = Category, Tasman = Value) 
waikato_homes <- rename(waikato_homes, Total_People = Category, Waikato = Value) 
wellington_homes <- rename(wellington_homes, Total_People = Category, Wellington = Value) 
west_coast_homes<- rename(west_coast_homes, Total_People = Category, West_Coast = Value) 

In [85]:
#Stitch them together
a_b <- taranaki_homes %>%
 left_join(tasman_homes, by = "Total_People")

a_b_c <- a_b %>%
 left_join(waikato_homes, by = "Total_People")

a_b_c_g <- a_b_c %>%
 left_join(wellington_homes, by = "Total_People")

tar_wc_homes <- a_b_c_g %>%
 left_join(west_coast_homes, by = "Total_People")

In [86]:
#Add this to the other dataframe made previously 
final_homes <- final_homes %>%
 left_join(tar_wc_homes, by = "Total_People")

In [87]:
final_homes

Total_People,Auckland,Bay_Of_Plenty,Canterbury,Gisbourne,Hawks_Bay,Taranaki,Tasman,Waikato,Wellington,West_Coast
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Dwelling Held in a Family Trust,67533,16167,27543,1632,8694,6423,2850,19788,22263,789
Dwelling Not Owned and Not Held in a Family Trust,168705,33927,63795,6006,18399,12981,4356,52089,59613,2781
Dwelling Owned or Partly Owned,201408,45918,110733,7080,26919,21108,10197,67722,88149,5373
Not Elsewhere Included,31848,7476,11151,1275,3600,2577,864,9387,9597,849


## Looking at Manawatu  - Southland (the last regions )

This portion changes in style due to the seperation of this task between group members
(However I did change the working directories to be on my computer to check it all runs)

In [88]:
library(tidyr)
library(tidyverse)
library(readr)
library(skimr)

In [89]:
getwd()
#set the working directory to the file that the region you're looking at is saved to
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Manawatu - Whanganui")

In [90]:
#Get the list of files that you're going to be reading from 
#se the path to be to the folder of the region that you're looking at
Manawatu_Whanganui_file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Manawatu - Whanganui")
#change this above to be where the files are save on your computer

In [91]:
#initiate a blank data frame, each iteration of the loop will append the data from the given file to this variable
Manawatu_Whanganui_dogs_df <- data.frame()

In [92]:
#Specifically to get the information about pets this is the loop, 
#but can change the data[data$Topic == ""] to get it to be about other information in the csvs

for (i in 1:length(Manawatu_Whanganui_file_list)){
  data = read_csv(Manawatu_Whanganui_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  pets_data = data[data$Topic == "Pets",]
  #and then would just change these variables names to be something sensible like house_data
  pets_data$Topic = NULL
  pets_data$NullReason = NULL
  pets_data$Source = NULL
  pets_data$ValueUnit = NULL
  pets_data$ValueLabel = NULL
    
    
  #this is specific to the pets portion, this removes the acc data
  pets_counts = pets_data[c(-1,-2,-3),]
  pets_counts
  
  
  #and if we wanted to get home ownership instead of dogs
  #we'd change the measure == "Home ownership by households"
  total_registered_dogs = pets_counts[pets_counts$Measure =="Registered dogs — Total registered dogs",]
    
  
  Manawatu_Whanganui_dogs_df <- rbind(Manawatu_Whanganui_dogs_df, total_registered_dogs) #for each iteration, bind the new data to the building dataset

}

Parsed with column specification:
cols(
  `Territorial Authority` = [31mcol_character()[39m,
  Topic = [31mcol_character()[39m,
  Measure = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  Value = [32mcol_double()[39m,
  ValueUnit = [31mcol_character()[39m,
  ValueLabel = [31mcol_character()[39m,
  NullReason = [31mcol_character()[39m,
  Date = [31mcol_character()[39m,
  DateLabel = [31mcol_character()[39m,
  Source = [31mcol_character()[39m
)

Parsed with column specification:
cols(
  `Territorial Authority` = [31mcol_character()[39m,
  Topic = [31mcol_character()[39m,
  Measure = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  Value = [32mcol_double()[39m,
  ValueUnit = [31mcol_character()[39m,
  ValueLabel = [31mcol_character()[39m,
  NullReason = [31mcol_character()[39m,
  Date = [31mcol_character()[39m,
  DateLabel = [31mcol_character()[39m,
  Source = [31mcol_character()[39m
)

Parsed with column specifi

In [93]:
Manawatu_Whanganui_dogs_df

Territorial Authority,Measure,Category,Value,Date,DateLabel
<chr>,<chr>,<chr>,<dbl>,<chr>,<chr>
Horowhenua,Registered dogs — Total registered dogs,All pure and cross breeds,6287,2019,Year ended June
Horowhenua,Registered dogs — Total registered dogs,All pure breeds,3790,2019,Year ended June
Horowhenua,Registered dogs — Total registered dogs,Beagle,29,2019,Year ended June
Horowhenua,Registered dogs — Total registered dogs,Bichon Frise,60,2019,Year ended June
Horowhenua,Registered dogs — Total registered dogs,Boxer,38,2019,Year ended June
Horowhenua,Registered dogs — Total registered dogs,Brazilian Fila,0,2019,Year ended June
Horowhenua,Registered dogs — Total registered dogs,"Cattle, Australian",47,2019,Year ended June
Horowhenua,Registered dogs — Total registered dogs,"Chihuahua, Long Coat",33,2019,Year ended June
Horowhenua,Registered dogs — Total registered dogs,"Chihuahua, Smooth Coat",43,2019,Year ended June
Horowhenua,Registered dogs — Total registered dogs,"Collie, Bearded",21,2019,Year ended June


In [94]:
Manawatu_Whanganui_population_df = data.frame()

In [95]:
for (i in 1:length(Manawatu_Whanganui_file_list)){
  data = read.csv(Manawatu_Whanganui_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  demo_data = data[data$Topic == "Demographics",]

  demo_data$Topic = NULL
  demo_data$NullReason = NULL
  demo_data$Source = NULL
  demo_data$ValueUnit = NULL
  demo_data$ValueLabel = NULL  

  population = demo_data[demo_data$Measure == "Population - residents at 2013 census",]  
  Manawatu_Whanganui_population_df <- rbind(Manawatu_Whanganui_population_df, population) #for each iteration, bind the new data to the building dataset
  #dataset <- rbind(dataset, population)  
}
#population_df

In [96]:
Manawatu_Whanganui_population_df

Territorial.Authority,Measure,Category,Value,Date,DateLabel
<fct>,<fct>,<fct>,<dbl>,<fct>,<fct>
Horowhenua,Population - residents at 2013 census,Total people,30096,2013,Census Year
Manawatu,Population - residents at 2013 census,Total people,27459,2013,Census Year
Palmerston North,Population - residents at 2013 census,Total people,80079,2013,Census Year
Rangitikei,Population - residents at 2013 census,Total people,14019,2013,Census Year
Ruapehu,Population - residents at 2013 census,Total people,11844,2013,Census Year
Tararua,Population - residents at 2013 census,Total people,16854,2013,Census Year
Whanganui,Population - residents at 2013 census,Total people,42150,2013,Census Year


Make a function to read through and look at the home ovwnership now 

In [97]:
Manawatu_Whanganui_homes_df  = data.frame()

In [98]:
for (i in 1:length(Manawatu_Whanganui_file_list)){
  data = read.csv(Manawatu_Whanganui_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  home_data = data[data$Topic == "Households",]

  home_data$Topic = NULL
  home_data$NullReason = NULL
  home_data$Source = NULL
  home_data$ValueUnit = NULL
  home_data$ValueLabel = NULL  

  ownership = home_data[home_data$Measure == "Home ownership by households",]  
  Manawatu_Whanganui_homes_df <- rbind(Manawatu_Whanganui_homes_df, ownership) #for each iteration, bind the new data to the building dataset
}

In [99]:
Manawatu_Whanganui_homes_df

Unnamed: 0_level_0,Territorial.Authority,Measure,Category,Value,Date,DateLabel
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<dbl>,<fct>,<fct>
220,Horowhenua,Home ownership by households,Dwelling Owned or Partly Owned,6483,2013,Census Year
221,Horowhenua,Home ownership by households,Dwelling Not Owned and Not Held in a Family Trust,3624,2013,Census Year
222,Horowhenua,Home ownership by households,Dwelling Held in a Family Trust,1473,2013,Census Year
223,Horowhenua,Home ownership by households,Not Elsewhere Included,915,2013,Census Year
2201,Manawatu,Home ownership by households,Dwelling Owned or Partly Owned,5568,2013,Census Year
2211,Manawatu,Home ownership by households,Dwelling Not Owned and Not Held in a Family Trust,2892,2013,Census Year
2221,Manawatu,Home ownership by households,Dwelling Held in a Family Trust,1593,2013,Census Year
2231,Manawatu,Home ownership by households,Not Elsewhere Included,423,2013,Census Year
2202,Palmerston North,Home ownership by households,Dwelling Owned or Partly Owned,13932,2013,Census Year
2212,Palmerston North,Home ownership by households,Dwelling Not Owned and Not Held in a Family Trust,10473,2013,Census Year


Loop through and make the income into a dataframe

In [100]:
Manawatu_Whanganui_median_income_df = data.frame()

In [101]:
for (i in 1:length(Manawatu_Whanganui_file_list)){
  data = read.csv(Manawatu_Whanganui_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  income_data = data[data$Topic == "Income",]

  income_data$Topic = NULL
  income_data$NullReason = NULL
  income_data$Source = NULL
  income_data$ValueUnit = NULL
  income_data$ValueLabel = NULL  

  personal_income = income_data[income_data$Measure == "Median personal income ($)",]  
  Manawatu_Whanganui_median_income_df <- rbind(Manawatu_Whanganui_median_income_df, personal_income) #for each iteration, bind the new data to the building dataset
}

In [102]:
Manawatu_Whanganui_median_income_df

Unnamed: 0_level_0,Territorial.Authority,Measure,Category,Value,Date,DateLabel
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<dbl>,<fct>,<fct>
128,Horowhenua,Median personal income ($),,21800,2013,Census Year
1281,Manawatu,Median personal income ($),,28400,2013,Census Year
1282,Palmerston North,Median personal income ($),,27000,2013,Census Year
1283,Rangitikei,Median personal income ($),,25700,2013,Census Year
1284,Ruapehu,Median personal income ($),,24100,2013,Census Year
1285,Tararua,Median personal income ($),,25100,2013,Census Year
1286,Whanganui,Median personal income ($),,23500,2013,Census Year


In [103]:
Manawatu_Whanganui_income_bracket_df = data.frame()

In [104]:
for (i in 1:length(Manawatu_Whanganui_file_list)){
  data = read.csv(Manawatu_Whanganui_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  income_bracket_data = data[data$Topic == "Income",]

  income_bracket_data$Topic = NULL
  income_bracket_data$NullReason = NULL
  income_bracket_data$Source = NULL
  income_bracket_data$ValueUnit = NULL
  income_bracket_data$ValueLabel = NULL  

  bracket_income = income_bracket_data[income_bracket_data$Measure == "Number of families in income bracket",]  
  Manawatu_Whanganui_income_bracket_df <- rbind(Manawatu_Whanganui_income_bracket_df, bracket_income) #for each iteration, bind the new data to the building dataset
}

In [105]:
Manawatu_Whanganui_income_bracket_df

Unnamed: 0_level_0,Territorial.Authority,Measure,Category,Value,Date,DateLabel
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<dbl>,<fct>,<fct>
158,Horowhenua,Number of families in income bracket,"$20,000 and under",612.000000,2013,Census Year
159,Horowhenua,Number of families in income bracket,"$20,001 to $30,000",999.000000,2013,Census Year
160,Horowhenua,Number of families in income bracket,"$30,001 to $50,000",1947.000000,2013,Census Year
161,Horowhenua,Number of families in income bracket,"$50,001 to $70,000",1275.000000,2013,Census Year
162,Horowhenua,Number of families in income bracket,"$70,001 to $100,000",1296.000000,2013,Census Year
163,Horowhenua,Number of families in income bracket,"$100,000 and over",1038.000000,2013,Census Year
164,Horowhenua,Number of families in income bracket,Not stated,1035.000000,2013,Census Year
165,Horowhenua,Number of families in income bracket,"$20,000 and under",7.461595,2013,Census Year
166,Horowhenua,Number of families in income bracket,"$20,001 to $30,000",12.179956,2013,Census Year
167,Horowhenua,Number of families in income bracket,"$30,001 to $50,000",23.738113,2013,Census Year


## **Manawatu-Whanganui Data Frames:**

 - Manawatu_Whanganui_dogs_df
 - Manawatu_Whanganui_population_df
 - Manawatu_Whanganui_homes_df
 - Manawatu_Whanganui_median_income_df
 - Manawatu_Whanganui_income_bracket_df

In [106]:
getwd()
#set the working directory to the file that the region you're looking at is saved to
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Marlborough")

In [107]:
#Get the list of files that you're going to be reading from 
#se the path to be to the folder of the region that you're looking at
Marlborough_file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Marlborough")
#change this above to be where the files are save on your computer

In [108]:
#initiate a blank data frame, each iteration of the loop will append the data from the given file to this variable
Marlborough_dogs_df <- data.frame()

In [109]:
#Specifically to get the information about pets this is the loop, 
#but can change the data[data$Topic == ""] to get it to be about other information in the csvs

for (i in 1:length(Marlborough_file_list)){
  data = read_csv(Marlborough_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  pets_data = data[data$Topic == "Pets",]
  #and then would just change these variables names to be something sensible like house_data
  pets_data$Topic = NULL
  pets_data$NullReason = NULL
  pets_data$Source = NULL
  pets_data$ValueUnit = NULL
  pets_data$ValueLabel = NULL
    
    
  #this is specific to the pets portion, this removes the acc data
  pets_counts = pets_data[c(-1,-2,-3),]
  pets_counts
  
  
  #and if we wanted to get home ownership instead of dogs
  #we'd change the measure == "Home ownership by households"
  total_registered_dogs = pets_counts[pets_counts$Measure =="Registered dogs — Total registered dogs",]
    
  
  Marlborough_dogs_df <- rbind(Marlborough_dogs_df, total_registered_dogs) #for each iteration, bind the new data to the building dataset

}

Parsed with column specification:
cols(
  `Territorial Authority` = [31mcol_character()[39m,
  Topic = [31mcol_character()[39m,
  Measure = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  Value = [32mcol_double()[39m,
  ValueUnit = [31mcol_character()[39m,
  ValueLabel = [31mcol_character()[39m,
  NullReason = [31mcol_character()[39m,
  Date = [31mcol_character()[39m,
  DateLabel = [31mcol_character()[39m,
  Source = [31mcol_character()[39m
)



In [110]:
Marlborough_dogs_df

Territorial Authority,Measure,Category,Value,Date,DateLabel
<chr>,<chr>,<chr>,<dbl>,<chr>,<chr>
Marlborough,Registered dogs — Total registered dogs,All pure and cross breeds,10444,2019,Year ended June
Marlborough,Registered dogs — Total registered dogs,All pure breeds,6244,2019,Year ended June
Marlborough,Registered dogs — Total registered dogs,Beagle,21,2019,Year ended June
Marlborough,Registered dogs — Total registered dogs,Bichon Frise,140,2019,Year ended June
Marlborough,Registered dogs — Total registered dogs,Boxer,80,2019,Year ended June
Marlborough,Registered dogs — Total registered dogs,Brazilian Fila,0,2019,Year ended June
Marlborough,Registered dogs — Total registered dogs,"Cattle, Australian",50,2019,Year ended June
Marlborough,Registered dogs — Total registered dogs,"Chihuahua, Long Coat",49,2019,Year ended June
Marlborough,Registered dogs — Total registered dogs,"Chihuahua, Smooth Coat",34,2019,Year ended June
Marlborough,Registered dogs — Total registered dogs,"Collie, Bearded",116,2019,Year ended June


In [111]:
Marlborough_population_df = data.frame()

In [112]:
for (i in 1:length(Marlborough_file_list)){
  data = read.csv(Marlborough_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  demo_data = data[data$Topic == "Demographics",]

  demo_data$Topic = NULL
  demo_data$NullReason = NULL
  demo_data$Source = NULL
  demo_data$ValueUnit = NULL
  demo_data$ValueLabel = NULL  

  population = demo_data[demo_data$Measure == "Population - residents at 2013 census",]  
  Marlborough_population_df <- rbind(Marlborough_population_df, population) #for each iteration, bind the new data to the building dataset
  #dataset <- rbind(dataset, population)  
}
#population_df

In [113]:
Marlborough_population_df

Territorial.Authority,Measure,Category,Value,Date,DateLabel
<fct>,<fct>,<fct>,<dbl>,<fct>,<fct>
Marlborough,Population - residents at 2013 census,Total people,43416,2013,Census Year


In [114]:
Marlborough_homes_df  = data.frame()

In [115]:
for (i in 1:length(Marlborough_file_list)){
  data = read.csv(Marlborough_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  home_data = data[data$Topic == "Households",]

  home_data$Topic = NULL
  home_data$NullReason = NULL
  home_data$Source = NULL
  home_data$ValueUnit = NULL
  home_data$ValueLabel = NULL  

  ownership = home_data[home_data$Measure == "Home ownership by households",]  
  Marlborough_homes_df <- rbind(Marlborough_homes_df, ownership) #for each iteration, bind the new data to the building dataset
}

In [116]:
Marlborough_homes_df

Unnamed: 0_level_0,Territorial.Authority,Measure,Category,Value,Date,DateLabel
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<dbl>,<fct>,<fct>
220,Marlborough,Home ownership by households,Dwelling Owned or Partly Owned,9249,2013,Census Year
221,Marlborough,Home ownership by households,Dwelling Not Owned and Not Held in a Family Trust,4863,2013,Census Year
222,Marlborough,Home ownership by households,Dwelling Held in a Family Trust,2607,2013,Census Year
223,Marlborough,Home ownership by households,Not Elsewhere Included,954,2013,Census Year


In [117]:
Marlborough_median_income_df = data.frame()

In [118]:
for (i in 1:length(Marlborough_file_list)){
  data = read.csv(Marlborough_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  income_data = data[data$Topic == "Income",]

  income_data$Topic = NULL
  income_data$NullReason = NULL
  income_data$Source = NULL
  income_data$ValueUnit = NULL
  income_data$ValueLabel = NULL  

  personal_income = income_data[income_data$Measure == "Median personal income ($)",]  
  Marlborough_median_income_df <- rbind(Marlborough_median_income_df, personal_income) #for each iteration, bind the new data to the building dataset
}

In [119]:
Marlborough_median_income_df

Unnamed: 0_level_0,Territorial.Authority,Measure,Category,Value,Date,DateLabel
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<dbl>,<fct>,<fct>
128,Marlborough,Median personal income ($),,27900,2013,Census Year


In [120]:
Marlborough_income_bracket_df = data.frame()

In [121]:
for (i in 1:length(Marlborough_file_list)){
  data = read.csv(Marlborough_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  income_bracket_data = data[data$Topic == "Income",]

  income_bracket_data$Topic = NULL
  income_bracket_data$NullReason = NULL
  income_bracket_data$Source = NULL
  income_bracket_data$ValueUnit = NULL
  income_bracket_data$ValueLabel = NULL  

  bracket_income = income_bracket_data[income_bracket_data$Measure == "Number of families in income bracket",]  
  Marlborough_income_bracket_df <- rbind(Marlborough_income_bracket_df, bracket_income) #for each iteration, bind the new data to the building dataset
}

In [122]:
Marlborough_income_bracket_df

Unnamed: 0_level_0,Territorial.Authority,Measure,Category,Value,Date,DateLabel
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<dbl>,<fct>,<fct>
158,Marlborough,Number of families in income bracket,"$20,000 and under",645.0,2013,Census Year
159,Marlborough,Number of families in income bracket,"$20,001 to $30,000",942.0,2013,Census Year
160,Marlborough,Number of families in income bracket,"$30,001 to $50,000",2373.0,2013,Census Year
161,Marlborough,Number of families in income bracket,"$50,001 to $70,000",2094.0,2013,Census Year
162,Marlborough,Number of families in income bracket,"$70,001 to $100,000",2679.0,2013,Census Year
163,Marlborough,Number of families in income bracket,"$100,000 and over",2580.0,2013,Census Year
164,Marlborough,Number of families in income bracket,Not stated,1248.0,2013,Census Year
165,Marlborough,Number of families in income bracket,"$20,000 and under",5.134941,2013,Census Year
166,Marlborough,Number of families in income bracket,"$20,001 to $30,000",7.499403,2013,Census Year
167,Marlborough,Number of families in income bracket,"$30,001 to $50,000",18.891808,2013,Census Year


## **Marlborough Data Frames:**

 - Marlborough_dogs_df
 - Marlborough_population_df
 - Marlborough_homes_df
 - Marlborough_median_income_df
 - Marlborough_income_bracket_df

In [123]:
getwd()
#set the working directory to the file that the region you're looking at is saved to
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Nelson")

In [124]:
#Get the list of files that you're going to be reading from 
#se the path to be to the folder of the region that you're looking at
Nelson_file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Nelson")
#change this above to be where the files are save on your computer

In [125]:
#initiate a blank data frame, each iteration of the loop will append the data from the given file to this variable
Nelson_dogs_df <- data.frame()

In [126]:
#Specifically to get the information about pets this is the loop, 
#but can change the data[data$Topic == ""] to get it to be about other information in the csvs

for (i in 1:length(Nelson_file_list)){
  data = read_csv(Nelson_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  pets_data = data[data$Topic == "Pets",]
  #and then would just change these variables names to be something sensible like house_data
  pets_data$Topic = NULL
  pets_data$NullReason = NULL
  pets_data$Source = NULL
  pets_data$ValueUnit = NULL
  pets_data$ValueLabel = NULL
    
    
  #this is specific to the pets portion, this removes the acc data
  pets_counts = pets_data[c(-1,-2,-3),]
  pets_counts
  
  
  #and if we wanted to get home ownership instead of dogs
  #we'd change the measure == "Home ownership by households"
  total_registered_dogs = pets_counts[pets_counts$Measure =="Registered dogs — Total registered dogs",]
    
  
  Nelson_dogs_df <- rbind(Nelson_dogs_df, total_registered_dogs) #for each iteration, bind the new data to the building dataset

}

Parsed with column specification:
cols(
  `Territorial Authority` = [31mcol_character()[39m,
  Topic = [31mcol_character()[39m,
  Measure = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  Value = [32mcol_double()[39m,
  ValueUnit = [31mcol_character()[39m,
  ValueLabel = [31mcol_character()[39m,
  NullReason = [31mcol_character()[39m,
  Date = [31mcol_character()[39m,
  DateLabel = [31mcol_character()[39m,
  Source = [31mcol_character()[39m
)



In [127]:
Nelson_population_df = data.frame()

In [128]:
for (i in 1:length(Nelson_file_list)){
  data = read.csv(Nelson_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  demo_data = data[data$Topic == "Demographics",]

  demo_data$Topic = NULL
  demo_data$NullReason = NULL
  demo_data$Source = NULL
  demo_data$ValueUnit = NULL
  demo_data$ValueLabel = NULL  

  population = demo_data[demo_data$Measure == "Population - residents at 2013 census",]  
  Nelson_population_df <- rbind(Nelson_population_df, population) #for each iteration, bind the new data to the building dataset
  #dataset <- rbind(dataset, population)  
}
#population_df

In [129]:
Nelson_homes_df  = data.frame()

In [130]:
for (i in 1:length(Nelson_file_list)){
  data = read.csv(Nelson_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  home_data = data[data$Topic == "Households",]

  home_data$Topic = NULL
  home_data$NullReason = NULL
  home_data$Source = NULL
  home_data$ValueUnit = NULL
  home_data$ValueLabel = NULL  

  ownership = home_data[home_data$Measure == "Home ownership by households",]  
  Nelson_homes_df <- rbind(Nelson_homes_df, ownership) #for each iteration, bind the new data to the building dataset
}

In [131]:
Nelson_median_income_df = data.frame()

In [132]:
for (i in 1:length(Nelson_file_list)){
  data = read.csv(Nelson_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  income_data = data[data$Topic == "Income",]

  income_data$Topic = NULL
  income_data$NullReason = NULL
  income_data$Source = NULL
  income_data$ValueUnit = NULL
  income_data$ValueLabel = NULL  

  personal_income = income_data[income_data$Measure == "Median personal income ($)",]  
  Nelson_median_income_df <- rbind(Nelson_median_income_df, personal_income) #for each iteration, bind the new data to the building dataset
}

In [133]:
Nelson_income_bracket_df = data.frame()

In [134]:
for (i in 1:length(Nelson_file_list)){
  data = read.csv(Nelson_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  income_bracket_data = data[data$Topic == "Income",]

  income_bracket_data$Topic = NULL
  income_bracket_data$NullReason = NULL
  income_bracket_data$Source = NULL
  income_bracket_data$ValueUnit = NULL
  income_bracket_data$ValueLabel = NULL  

  bracket_income = income_bracket_data[income_bracket_data$Measure == "Number of families in income bracket",]  
  Nelson_income_bracket_df <- rbind(Nelson_income_bracket_df, bracket_income) #for each iteration, bind the new data to the building dataset
}

## **Nelson Data Frames:**

 - Nelson_dogs_df
 - Nelson_population_df
 - Nelson_homes_df
 - Nelson_median_income_df
 - Nelson_income_bracket_df

In [135]:
getwd()
#set the working directory to the file that the region you're looking at is saved to
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Northland")

In [136]:
#Get the list of files that you're going to be reading from 
#se the path to be to the folder of the region that you're looking at
Northland_file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Northland")
#change this above to be where the files are save on your computer

In [137]:
#initiate a blank data frame, each iteration of the loop will append the data from the given file to this variable
Northland_dogs_df <- data.frame()

In [138]:
#Specifically to get the information about pets this is the loop, 
#but can change the data[data$Topic == ""] to get it to be about other information in the csvs

for (i in 1:length(Northland_file_list)){
  data = read_csv(Northland_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  pets_data = data[data$Topic == "Pets",]
  #and then would just change these variables names to be something sensible like house_data
  pets_data$Topic = NULL
  pets_data$NullReason = NULL
  pets_data$Source = NULL
  pets_data$ValueUnit = NULL
  pets_data$ValueLabel = NULL
    
    
  #this is specific to the pets portion, this removes the acc data
  pets_counts = pets_data[c(-1,-2,-3),]
  pets_counts
  
  
  #and if we wanted to get home ownership instead of dogs
  #we'd change the measure == "Home ownership by households"
  total_registered_dogs = pets_counts[pets_counts$Measure =="Registered dogs — Total registered dogs",]
    
  
  Northland_dogs_df <- rbind(Northland_dogs_df, total_registered_dogs) #for each iteration, bind the new data to the building dataset

}

Parsed with column specification:
cols(
  `Territorial Authority` = [31mcol_character()[39m,
  Topic = [31mcol_character()[39m,
  Measure = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  Value = [32mcol_double()[39m,
  ValueUnit = [31mcol_character()[39m,
  ValueLabel = [31mcol_character()[39m,
  NullReason = [31mcol_character()[39m,
  Date = [31mcol_character()[39m,
  DateLabel = [31mcol_character()[39m,
  Source = [31mcol_character()[39m
)

Parsed with column specification:
cols(
  `Territorial Authority` = [31mcol_character()[39m,
  Topic = [31mcol_character()[39m,
  Measure = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  Value = [32mcol_double()[39m,
  ValueUnit = [31mcol_character()[39m,
  ValueLabel = [31mcol_character()[39m,
  NullReason = [31mcol_character()[39m,
  Date = [31mcol_character()[39m,
  DateLabel = [31mcol_character()[39m,
  Source = [31mcol_character()[39m
)

Parsed with column specifi

In [139]:
Northland_population_df = data.frame()

In [140]:
for (i in 1:length(Northland_file_list)){
  data = read.csv(Northland_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  demo_data = data[data$Topic == "Demographics",]

  demo_data$Topic = NULL
  demo_data$NullReason = NULL
  demo_data$Source = NULL
  demo_data$ValueUnit = NULL
  demo_data$ValueLabel = NULL  

  population = demo_data[demo_data$Measure == "Population - residents at 2013 census",]  
  Northland_population_df <- rbind(Northland_population_df, population) #for each iteration, bind the new data to the building dataset
  #dataset <- rbind(dataset, population)  
}
#population_df

In [141]:
Northland_homes_df  = data.frame()

In [142]:
for (i in 1:length(Northland_file_list)){
  data = read.csv(Northland_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  home_data = data[data$Topic == "Households",]

  home_data$Topic = NULL
  home_data$NullReason = NULL
  home_data$Source = NULL
  home_data$ValueUnit = NULL
  home_data$ValueLabel = NULL  

  ownership = home_data[home_data$Measure == "Home ownership by households",]  
  Northland_homes_df <- rbind(Northland_homes_df, ownership) #for each iteration, bind the new data to the building dataset
}

In [143]:
Northland_median_income_df = data.frame()

In [144]:
for (i in 1:length(Northland_file_list)){
  data = read.csv(Northland_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  income_data = data[data$Topic == "Income",]

  income_data$Topic = NULL
  income_data$NullReason = NULL
  income_data$Source = NULL
  income_data$ValueUnit = NULL
  income_data$ValueLabel = NULL  

  personal_income = income_data[income_data$Measure == "Median personal income ($)",]  
  Northland_median_income_df <- rbind(Northland_median_income_df, personal_income) #for each iteration, bind the new data to the building dataset
}

In [145]:
Northland_income_bracket_df = data.frame()

In [146]:
for (i in 1:length(Northland_file_list)){
  data = read.csv(Northland_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  income_bracket_data = data[data$Topic == "Income",]

  income_bracket_data$Topic = NULL
  income_bracket_data$NullReason = NULL
  income_bracket_data$Source = NULL
  income_bracket_data$ValueUnit = NULL
  income_bracket_data$ValueLabel = NULL  

  bracket_income = income_bracket_data[income_bracket_data$Measure == "Number of families in income bracket",]  
  Northland_income_bracket_df <- rbind(Northland_income_bracket_df, bracket_income) #for each iteration, bind the new data to the building dataset
}

## **Northland Data Frames:**

 - Northland_dogs_df
 - Northland_population_df
 - Northland_homes_df
 - Northland_median_income_df
 - Northland_income_bracket_df

In [147]:
getwd()
#set the working directory to the file that the region you're looking at is saved to
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Otago")

In [148]:
#Get the list of files that you're going to be reading from 
#se the path to be to the folder of the region that you're looking at
Otago_file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Otago")
#change this above to be where the files are save on your computer

In [149]:
#initiate a blank data frame, each iteration of the loop will append the data from the given file to this variable
Otago_dogs_df <- data.frame()

In [150]:
#Specifically to get the information about pets this is the loop, 
#but can change the data[data$Topic == ""] to get it to be about other information in the csvs

for (i in 1:length(Otago_file_list)){
  data = read_csv(Otago_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  pets_data = data[data$Topic == "Pets",]
  #and then would just change these variables names to be something sensible like house_data
  pets_data$Topic = NULL
  pets_data$NullReason = NULL
  pets_data$Source = NULL
  pets_data$ValueUnit = NULL
  pets_data$ValueLabel = NULL
    
    
  #this is specific to the pets portion, this removes the acc data
  pets_counts = pets_data[c(-1,-2,-3),]
  pets_counts
  
  
  #and if we wanted to get home ownership instead of dogs
  #we'd change the measure == "Home ownership by households"
  total_registered_dogs = pets_counts[pets_counts$Measure =="Registered dogs — Total registered dogs",]
    
  
  Otago_dogs_df <- rbind(Otago_dogs_df, total_registered_dogs) #for each iteration, bind the new data to the building dataset

}

Parsed with column specification:
cols(
  `Territorial Authority` = [31mcol_character()[39m,
  Topic = [31mcol_character()[39m,
  Measure = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  Value = [32mcol_double()[39m,
  ValueUnit = [31mcol_character()[39m,
  ValueLabel = [31mcol_character()[39m,
  NullReason = [31mcol_character()[39m,
  Date = [31mcol_character()[39m,
  DateLabel = [31mcol_character()[39m,
  Source = [31mcol_character()[39m
)

Parsed with column specification:
cols(
  `Territorial Authority` = [31mcol_character()[39m,
  Topic = [31mcol_character()[39m,
  Measure = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  Value = [32mcol_double()[39m,
  ValueUnit = [31mcol_character()[39m,
  ValueLabel = [31mcol_character()[39m,
  NullReason = [31mcol_character()[39m,
  Date = [31mcol_character()[39m,
  DateLabel = [31mcol_character()[39m,
  Source = [31mcol_character()[39m
)

Parsed with column specifi

In [151]:
Otago_population_df = data.frame()

In [152]:
for (i in 1:length(Otago_file_list)){
  data = read.csv(Otago_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  demo_data = data[data$Topic == "Demographics",]

  demo_data$Topic = NULL
  demo_data$NullReason = NULL
  demo_data$Source = NULL
  demo_data$ValueUnit = NULL
  demo_data$ValueLabel = NULL  

  population = demo_data[demo_data$Measure == "Population - residents at 2013 census",]  
  Otago_population_df <- rbind(Otago_population_df, population) #for each iteration, bind the new data to the building dataset
  #dataset <- rbind(dataset, population)  
}
#population_df

In [153]:
Otago_homes_df  = data.frame()

In [154]:
for (i in 1:length(Otago_file_list)){
  data = read.csv(Otago_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  home_data = data[data$Topic == "Households",]

  home_data$Topic = NULL
  home_data$NullReason = NULL
  home_data$Source = NULL
  home_data$ValueUnit = NULL
  home_data$ValueLabel = NULL  

  ownership = home_data[home_data$Measure == "Home ownership by households",]  
  Otago_homes_df <- rbind(Otago_homes_df, ownership) #for each iteration, bind the new data to the building dataset
}

In [155]:
Otago_median_income_df = data.frame()

In [156]:
for (i in 1:length(Otago_file_list)){
  data = read.csv(Otago_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  income_data = data[data$Topic == "Income",]

  income_data$Topic = NULL
  income_data$NullReason = NULL
  income_data$Source = NULL
  income_data$ValueUnit = NULL
  income_data$ValueLabel = NULL  

  personal_income = income_data[income_data$Measure == "Median personal income ($)",]  
  Otago_median_income_df <- rbind(Otago_median_income_df, personal_income) #for each iteration, bind the new data to the building dataset
}

In [157]:
Otago_income_bracket_df = data.frame()

In [158]:
for (i in 1:length(Otago_file_list)){
  data = read.csv(Otago_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  income_bracket_data = data[data$Topic == "Income",]

  income_bracket_data$Topic = NULL
  income_bracket_data$NullReason = NULL
  income_bracket_data$Source = NULL
  income_bracket_data$ValueUnit = NULL
  income_bracket_data$ValueLabel = NULL  

  bracket_income = income_bracket_data[income_bracket_data$Measure == "Number of families in income bracket",]  
  Otago_income_bracket_df <- rbind(Otago_income_bracket_df, bracket_income) #for each iteration, bind the new data to the building dataset
}

## **Otago Data Frames:**

 - Otago_dogs_df
 - Otago_population_df
 - Otago_homes_df
 - Otago_median_income_df
 - Otago_income_bracket_df

In [159]:
getwd()
#set the working directory to the file that the region you're looking at is saved to
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Southland")

In [160]:
#Get the list of files that you're going to be reading from 
#se the path to be to the folder of the region that you're looking at
Southland_file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Southland")
#change this above to be where the files are save on your computer

In [161]:
#initiate a blank data frame, each iteration of the loop will append the data from the given file to this variable
Southland_dogs_df <- data.frame()

In [162]:
#Specifically to get the information about pets this is the loop, 
#but can change the data[data$Topic == ""] to get it to be about other information in the csvs

for (i in 1:length(Southland_file_list)){
  data = read_csv(Southland_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  pets_data = data[data$Topic == "Pets",]
  #and then would just change these variables names to be something sensible like house_data
  pets_data$Topic = NULL
  pets_data$NullReason = NULL
  pets_data$Source = NULL
  pets_data$ValueUnit = NULL
  pets_data$ValueLabel = NULL
    
    
  #this is specific to the pets portion, this removes the acc data
  pets_counts = pets_data[c(-1,-2,-3),]
  pets_counts
  
  
  #and if we wanted to get home ownership instead of dogs
  #we'd change the measure == "Home ownership by households"
  total_registered_dogs = pets_counts[pets_counts$Measure =="Registered dogs — Total registered dogs",]
    
  
  Southland_dogs_df <- rbind(Southland_dogs_df, total_registered_dogs) #for each iteration, bind the new data to the building dataset

}

Parsed with column specification:
cols(
  `Territorial Authority` = [31mcol_character()[39m,
  Topic = [31mcol_character()[39m,
  Measure = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  Value = [32mcol_double()[39m,
  ValueUnit = [31mcol_character()[39m,
  ValueLabel = [31mcol_character()[39m,
  NullReason = [31mcol_character()[39m,
  Date = [31mcol_character()[39m,
  DateLabel = [31mcol_character()[39m,
  Source = [31mcol_character()[39m
)

Parsed with column specification:
cols(
  `Territorial Authority` = [31mcol_character()[39m,
  Topic = [31mcol_character()[39m,
  Measure = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  Value = [32mcol_double()[39m,
  ValueUnit = [31mcol_character()[39m,
  ValueLabel = [31mcol_character()[39m,
  NullReason = [31mcol_character()[39m,
  Date = [31mcol_character()[39m,
  DateLabel = [31mcol_character()[39m,
  Source = [31mcol_character()[39m
)

Parsed with column specifi

In [163]:
Southland_population_df = data.frame()

In [164]:
for (i in 1:length(Southland_file_list)){
  data = read.csv(Southland_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  demo_data = data[data$Topic == "Demographics",]

  demo_data$Topic = NULL
  demo_data$NullReason = NULL
  demo_data$Source = NULL
  demo_data$ValueUnit = NULL
  demo_data$ValueLabel = NULL  

  population = demo_data[demo_data$Measure == "Population - residents at 2013 census",]  
  Southland_population_df <- rbind(Southland_population_df, population) #for each iteration, bind the new data to the building dataset
  #dataset <- rbind(dataset, population)  
}
#population_df

In [165]:
Southland_homes_df  = data.frame()

In [166]:
for (i in 1:length(Southland_file_list)){
  data = read.csv(Southland_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  home_data = data[data$Topic == "Households",]

  home_data$Topic = NULL
  home_data$NullReason = NULL
  home_data$Source = NULL
  home_data$ValueUnit = NULL
  home_data$ValueLabel = NULL  

  ownership = home_data[home_data$Measure == "Home ownership by households",]  
  Southland_homes_df <- rbind(Southland_homes_df, ownership) #for each iteration, bind the new data to the building dataset
}

In [167]:
Southland_median_income_df = data.frame()

In [168]:
for (i in 1:length(Southland_file_list)){
  data = read.csv(Southland_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  income_data = data[data$Topic == "Income",]

  income_data$Topic = NULL
  income_data$NullReason = NULL
  income_data$Source = NULL
  income_data$ValueUnit = NULL
  income_data$ValueLabel = NULL  

  personal_income = income_data[income_data$Measure == "Median personal income ($)",]  
  Southland_median_income_df <- rbind(Southland_median_income_df, personal_income) #for each iteration, bind the new data to the building dataset
}

In [169]:
Southland_income_bracket_df = data.frame()

In [170]:
for (i in 1:length(Southland_file_list)){
  data = read.csv(Southland_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  income_bracket_data = data[data$Topic == "Income",]

  income_bracket_data$Topic = NULL
  income_bracket_data$NullReason = NULL
  income_bracket_data$Source = NULL
  income_bracket_data$ValueUnit = NULL
  income_bracket_data$ValueLabel = NULL  

  bracket_income = income_bracket_data[income_bracket_data$Measure == "Number of families in income bracket",]  
  Southland_income_bracket_df <- rbind(Southland_income_bracket_df, bracket_income) #for each iteration, bind the new data to the building dataset
}

## **Southland Data Frames:**

 - Southland_dogs_df
 - Southland_population_df
 - Southland_homes_df
 - Southland_median_income_df
 - Southland_income_bracket_df

## **All Data Frames:**

 - Manawatu_Whanganui_dogs_df
 - Manawatu_Whanganui_population_df
 - Manawatu_Whanganui_homes_df
 - Manawatu_Whanganui_median_income_df
 - Manawatu_Whanganui_income_bracket_dfSouthland_file_listNelson_dogs_df
 - Marlborough_dogs_df
 - Marlborough_population_df
 - Marlborugh_homes_df
 - Marlborough_median_income_df
 - Marlborough_income_bracket_df
 - Nelson_dogs_df
 - Nelson_population_df
 - Nelson_homes_df
 - Nelson_median_income_df
 - Nelson_income_bracket_df
 - Northland_dogs_df
 - Northland_population_df
 - Northland_homes_df
 - Northland_median_income_df
 - Northland_income_bracket_df
 - Otago_dogs_df
 - Otago_population_df
 - Otago_homes_df
 - Otago_median_income_df
 - Otago_income_bracket_df
 - Southland_dogs_df
 - Southland_population_df
 - Southland_homes_df
 - Southland_median_income_df
 - Southland_income_bracket_df

In [171]:
Manawatu_Whanganui_dogs_totals_df <- aggregate(Manawatu_Whanganui_dogs_df['Value'], by=Manawatu_Whanganui_dogs_df['Category'], sum) 

In [172]:
Marlborough_dogs_totals_df <- aggregate(Marlborough_dogs_df['Value'], by=Marlborough_dogs_df['Category'], sum) 

In [173]:
Nelson_dogs_totals_df <- aggregate(Nelson_dogs_df['Value'], by=Nelson_dogs_df['Category'], sum) 

In [174]:
Otago_dogs_totals_df <- aggregate(Otago_dogs_df['Value'], by=Otago_dogs_df['Category'], sum) 

In [175]:
Southland_dogs_totals_df <- aggregate(Southland_dogs_df['Value'], by=Southland_dogs_df['Category'], sum) 

In [176]:
Northland_dogs_totals_df <- aggregate(Northland_dogs_df['Value'], by=Northland_dogs_df['Category'], sum) 

In [177]:
dog_totals_df <- bind_cols(Manawatu_Whanganui_dogs_totals_df, Marlborough_dogs_totals_df, Nelson_dogs_totals_df, Otago_dogs_totals_df, Southland_dogs_totals_df, Northland_dogs_totals_df)

New names:
* Category -> Category...1
* Value -> Value...2
* Category -> Category...3
* Value -> Value...4
* Category -> Category...5
* ...



In [178]:
clean_dog_totals_df <- subset(dog_totals_df, select = -c(Category...3, Category...5, Category...7, Category...9, Category...11))

In [179]:
Final_dog_df <- rename(clean_dog_totals_df, c("Breed"="Category...1", "Manawatu_Whanganui"="Value...2", "Marlborough"="Value...4", "Nelson"="Value...6", "Otago"="Value...8", "Southland"="Value...10", "Northland"="Value...12"))

In [180]:
Final_dog_df

Unnamed: 0_level_0,Breed,Manawatu_Whanganui,Marlborough,Nelson,Otago,Southland,Northland
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,All cross breeds,11987,4200,2693,11666,7499,9349
2,All pure and cross breeds,45676,10444,5377,33644,24552,24532
3,All pure breeds,33689,6244,2684,21978,17053,15183
4,Beagle,183,21,27,100,72,51
5,Bichon Frise,527,140,101,396,391,273
6,Boxer,343,80,45,212,146,169
7,Brazilian Fila,0,0,0,0,0,0
8,"Cattle, Australian",286,50,39,103,54,391
9,"Chihuahua, Long Coat",169,49,17,81,67,179
10,"Chihuahua, Smooth Coat",303,34,40,278,247,149


In [181]:
Manawatu_Whanganui_populations_totals_df <- aggregate(Manawatu_Whanganui_population_df['Value'], by=Manawatu_Whanganui_population_df['Category'], sum)

In [182]:
Marlborough_populations_totals_df <- aggregate(Marlborough_population_df['Value'], by=Marlborough_population_df['Category'], sum)

In [183]:
Nelson_populations_totals_df <- aggregate(Nelson_population_df['Value'], by=Nelson_population_df['Category'], sum)

In [184]:
Otago_populations_totals_df <- aggregate(Otago_population_df['Value'], by=Otago_population_df['Category'], sum)

In [185]:
Southland_populations_totals_df <- aggregate(Southland_population_df['Value'], by=Southland_population_df['Category'], sum)

In [186]:
Northland_populations_totals_df <- aggregate(Northland_population_df['Value'], by=Northland_population_df['Category'], sum)

In [187]:
populations_totals_df <- bind_cols(Manawatu_Whanganui_populations_totals_df, Marlborough_populations_totals_df, Nelson_populations_totals_df, Otago_populations_totals_df, Southland_populations_totals_df, Northland_populations_totals_df)

New names:
* Category -> Category...1
* Value -> Value...2
* Category -> Category...3
* Value -> Value...4
* Category -> Category...5
* ...



In [188]:
clean_populations_totals_df <- subset(populations_totals_df, select = -c(Category...3, Category...5, Category...7, Category...9, Category...11))

In [189]:
Final_populations_df <- rename(clean_populations_totals_df, c("Total_People"="Category...1", "Manawatu_Whanganui"="Value...2", "Marlborough"="Value...4", "Nelson"="Value...6", "Otago"="Value...8", "Southland"="Value...10", "Northland"="Value...12"))

In [190]:
Final_populations_df

Unnamed: 0_level_0,Total_People,Manawatu_Whanganui,Marlborough,Nelson,Otago,Southland,Northland
Unnamed: 0_level_1,<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,Total people,222501,43416,46437,183258,93342,151692


In [191]:
Manawatu_Whanganui_homes_totals_df <- aggregate(Manawatu_Whanganui_homes_df['Value'], by=Manawatu_Whanganui_homes_df['Category'], sum)

In [192]:
Marlborough_homes_totals_df <- aggregate(Marlborough_homes_df['Value'], by=Marlborough_homes_df['Category'], sum)

In [193]:
Nelson_homes_totals_df <- aggregate(Nelson_homes_df['Value'], by=Nelson_homes_df['Category'], sum)

In [194]:
Otago_homes_totals_df <- aggregate(Otago_homes_df['Value'], by=Otago_homes_df['Category'], sum)

In [195]:
Southland_homes_totals_df <- aggregate(Southland_homes_df['Value'], by=Southland_homes_df['Category'], sum)

In [196]:
Northland_homes_totals_df <- aggregate(Northland_homes_df['Value'], by=Northland_homes_df['Category'], sum)

In [197]:
homes_totals_df <- bind_cols(Manawatu_Whanganui_homes_totals_df, Marlborough_homes_totals_df, Nelson_homes_totals_df, Otago_homes_totals_df, Southland_homes_totals_df, Northland_homes_totals_df)

New names:
* Category -> Category...1
* Value -> Value...2
* Category -> Category...3
* Value -> Value...4
* Category -> Category...5
* ...



In [198]:
clean_homes_totals_df <- subset(homes_totals_df, select = -c(Category...3, Category...5, Category...7, Category...9, Category...11))

In [199]:
Final_homes_df <- rename(clean_homes_totals_df, c("Total_People"="Category...1", "Manawatu_Whanganui"="Value...2", "Marlborough"="Value...4", "Nelson"="Value...6", "Otago"="Value...8", "Southland"="Value...10", "Northland"="Value...12"))

In [200]:
Final_homes_df

Unnamed: 0_level_0,Total_People,Manawatu_Whanganui,Marlborough,Nelson,Otago,Southland,Northland
Unnamed: 0_level_1,<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,Dwelling Held in a Family Trust,11058,2607,2445,12204,5709,7581
2,Dwelling Not Owned and Not Held in a Family Trust,28365,4863,5598,21825,10740,18063
3,Dwelling Owned or Partly Owned,42207,9249,9648,32904,18960,27861
4,Not Elsewhere Included,5301,954,849,3855,2037,5439


In [201]:
Manawatu_Whanganui_median_income_totals_df <- aggregate(Manawatu_Whanganui_median_income_df['Value'], by=Manawatu_Whanganui_median_income_df['Measure'], sum)

In [202]:
Marlborough_median_income_totals_df <- aggregate(Marlborough_median_income_df['Value'], by=Marlborough_median_income_df['Measure'], sum)

In [203]:
Nelson_median_income_totals_df <- aggregate(Nelson_median_income_df['Value'], by=Nelson_median_income_df['Measure'], sum)

In [204]:
Otago_median_income_totals_df <- aggregate(Otago_median_income_df['Value'], by=Otago_median_income_df['Measure'], sum)

In [205]:
Southland_median_income_totals_df <- aggregate(Southland_median_income_df['Value'], by=Southland_median_income_df['Measure'], sum)

In [206]:
Northland_median_income_totals_df <- aggregate(Northland_median_income_df['Value'], by=Northland_median_income_df['Measure'], sum)

In [207]:
median_income_totals_df <- bind_cols(Manawatu_Whanganui_median_income_totals_df, Marlborough_median_income_totals_df, Nelson_median_income_totals_df, Otago_median_income_totals_df, Southland_median_income_totals_df, Northland_median_income_totals_df)

New names:
* Measure -> Measure...1
* Value -> Value...2
* Measure -> Measure...3
* Value -> Value...4
* Measure -> Measure...5
* ...



In [208]:
clean_median_income_totals_df <- subset(median_income_totals_df, select = -c(Measure...3, Measure...5, Measure...7, Measure...9, Measure...11))

In [209]:
Final_median_income_totals_df <- rename(clean_median_income_totals_df, c("Median_Income"="Measure...1", "Manawatu_Whanganui"="Value...2", "Marlborough"="Value...4", "Nelson"="Value...6", "Otago"="Value...8", "Southland"="Value...10", "Northland"="Value...12"))

In [210]:
Final_median_income_totals_df

Unnamed: 0_level_0,Median_Income,Manawatu_Whanganui,Marlborough,Nelson,Otago,Southland,Northland
Unnamed: 0_level_1,<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,Median personal income ($),175600,27900,27200,116600,90100,69400


## **Final Total Data Frames For Manawatu - Southland:**

 - Final_dog_df
 - Final_populations_df
 - Final_homes_df
 - Final_median_income_totals_df

Now we will combine these with the dataframes previously made on income owenrship population and dogs

In [211]:
final_pets = final_pets %>%
    left_join(Final_dog_df, by = "Breed")

In [212]:
final_pets

Breed,Auckland,Bay_of_Plenty,Canterbury,Gisbourne,Hawkes_Bay,Taranaki,Tasman,Waikato,Wellington,West_Coast,Manawatu_Whanganui,Marlborough,Nelson,Otago,Southland,Northland
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
All cross breeds,40000,19612,33535,3044,8314,6548,4741,24237,22329,2005,11987,4200,2693,11666,7499,9349
All pure and cross breeds,106078,39480,95107,9746,25308,17790,10919,61601,50760,4743,45676,10444,5377,33644,24552,24532
All pure breeds,66078,19868,61572,6702,16994,11242,6178,37364,28431,2738,33689,6244,2684,21978,17053,15183
Beagle,668,108,454,16,140,54,35,224,280,17,183,21,27,100,72,51
Bichon Frise,1619,337,1820,36,209,287,101,515,620,128,527,140,101,396,391,273
Boxer,893,282,794,52,142,157,78,409,291,33,343,80,45,212,146,169
Brazilian Fila,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
"Cattle, Australian",648,295,317,79,175,142,159,702,256,29,286,50,39,103,54,391
"Chihuahua, Long Coat",607,254,908,27,132,123,35,434,351,16,169,49,17,81,67,179
"Chihuahua, Smooth Coat",1006,222,416,19,75,101,47,337,231,49,303,34,40,278,247,149


In [213]:
final_pops = final_pops %>%
    left_join(Final_populations_df, by = "Total_People")

In [214]:
final_pops

Total_People,Auckland,Bay_Of_Plenty,Canterbury,Gisbourne,Hawks_Bay,Taranaki,Tasman,Waikato,Wellington,West_Coast,Manawatu_Whanganui,Marlborough,Nelson,Otago,Southland,Northland
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Total people,1415550,271248,559227,43653,151095,109755,47154,400206,479616,23844,222501,43416,46437,183258,93342,151692


In [215]:
final_incomes = final_incomes %>%
    left_join(Final_median_income_totals_df, by = "Median_Income")

In [216]:
final_incomes

Median_Income,Auckland,Bay_Of_Plenty,Canterbury,Gisbourne,Hawks_Bay,Taranaki,Tasman,Waikato,Wellington,West_Coast,Manawatu_Whanganui,Marlborough,Nelson,Otago,Southland,Northland
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Median personal income ($),29600,24216.67,29209.09,24400,25275,28866.67,25700,27280,29944.44,25900,175600,27900,27200,116600,90100,69400


In [217]:
final_homes = final_homes %>%
    left_join(Final_homes_df, by = "Total_People")

In [218]:
final_homes

Total_People,Auckland,Bay_Of_Plenty,Canterbury,Gisbourne,Hawks_Bay,Taranaki,Tasman,Waikato,Wellington,West_Coast,Manawatu_Whanganui,Marlborough,Nelson,Otago,Southland,Northland
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Dwelling Held in a Family Trust,67533,16167,27543,1632,8694,6423,2850,19788,22263,789,11058,2607,2445,12204,5709,7581
Dwelling Not Owned and Not Held in a Family Trust,168705,33927,63795,6006,18399,12981,4356,52089,59613,2781,28365,4863,5598,21825,10740,18063
Dwelling Owned or Partly Owned,201408,45918,110733,7080,26919,21108,10197,67722,88149,5373,42207,9249,9648,32904,18960,27861
Not Elsewhere Included,31848,7476,11151,1275,3600,2577,864,9387,9597,849,5301,954,849,3855,2037,5439


## This leaves us with 4 dataframes that contain all the infomartion that we want from the CSVs from the figures NZ site

Now we will write these to CSVs, so that we can work with them in another notebook as this one is becoming rather long

In [226]:
write.csv(final_pets, "All_Pets_FiguresNZ.csv" )

In [227]:
write.csv(final_homes, "All_Homes_FiguresNZ.csv")

In [228]:
write.csv(final_incomes, "All_Incomes_FiguresNZ.csv")

In [229]:
write.csv(final_pops, "All_Populations_FiguresNZ.csv")

In [225]:
getwd()