## Data Wrangling Project: Looking at the Pets of New Zealand 

## This project looked at all of the dog breeds in New Zealands Regions 

## At the end of this notebook we will end with 4 dataframes:


One for the pet information for all the regions 


One for the population information for all three regions 


One for the income information for all three regions 


One for the home ownership information for all three regions  

We had CSVs from all of the regions in New Zealand that contained information about Pets counts, population information median income and home ownership 


The first step is to extract this information from the CSVs and make a dataframe for each of this information

To do this we sorted the information about different districts in New Zealnd (61) into folders for different regions in New Zealand. 

In [None]:
#first load the tidyverse library which has many useful libraries within it
library(tidyverse)

## Define functions to extract the information we want from the CSVs

In [None]:
#Extract the pets information from the CSV
#This function takes a dataframe and a file list
#This loops through each file name in a file list and extracts information about the counts of dogs in that csv
#It then appends this to the dataframe before going to the next file in the file list and repeating the process
#At the end it will return the dataframe, which will now have all the information from all of the CSVs appended to it 


pet_reader = function(file_list,df){
        for (i in 1:length(file_list)){
              data = read_csv(file_list[i])
              pets_data = data[data$Topic == "Pets",]
            #it also strips out information that isn't needed for us from the csv, done below
              pets_data$Topic = NULL
              pets_data$NullReason = NULL
              pets_data$Source = NULL
              pets_data$ValueUnit = NULL
              pets_data$ValueLabel = NULL
              pets_data$Date = NULL
              pets_data$DateLabel = NULL  
    
              #this is specific to the pets portion, this removes the acc data
              pets_counts = pets_data[c(-1,-2,-3),]
              pets_counts
  
  
              total_registered_dogs = pets_counts[pets_counts$Measure =="Registered dogs — Total registered dogs",]
    
  
              df <- rbind(df, total_registered_dogs) #for each iteration, bind the new data to the building dataset

        }
    return(df)
}

In [None]:
#This is a similar function but focuses on reading just the population data from CSV
#It takes a file list and a dataframe and for each file name in the file list 
#goes through and reads the csv, extracts the informatino about the population and appends it to the dataframe that you
#give it. It then moves to the next file in the file list and repeats 
#at the end it returns the dataframe. 
pop_reader = function(file_list, df){

        for (i in 1:length(file_list)){
          data = read.csv(file_list[i])
          demo_data = data[data$Topic == "Demographics",]
            #it also strips out information that isn't needed for us from the csv, done below

          demo_data$Topic = NULL
          demo_data$NullReason = NULL
          demo_data$Source = NULL
          demo_data$ValueUnit = NULL
          demo_data$ValueLabel = NULL
          demo_data$Date = NULL
          demo_data$DateLabel = NULL  

          population = demo_data[demo_data$Measure == "Population - residents at 2013 census",]  
          df <- rbind(df, population) #for each iteration, bind the new data to the building dataset
    }
        return(df)

}

In [None]:
#Again a similar function, this one just gets information about home ownership. 
#Given a dataframe and a file list, will read through each file in the file list 
#extract the informatino about the home ownership in that file
#append it to the dataframe 
#and move to the enxt file in the file list 
home_reader = function(file_list, df){
        for (i in 1:length(file_list)){
          data = read.csv(file_list[i])
          home_data = data[data$Topic == "Households",]
            #it also strips out information that isn't needed for us from the csv, done below

          home_data$Topic = NULL
          home_data$NullReason = NULL
          home_data$Source = NULL
          home_data$ValueUnit = NULL
          home_data$ValueLabel = NULL  
          home_data$Date = NULL
          home_data$DateLabel = NULL  

          ownership = home_data[home_data$Measure == "Home ownership by households",]  
          df <- rbind(df, ownership) #for each iteration, bind the new data to the building dataset
        }
        return(df)

}

In [None]:
#The final function, this gets information about the median income in a region
#takes in a dataframe and a file list
#and for each name in the file list will go through the csv, get the information about the income 
#append it to the dataframe
#and then repeat for the next file in the file list
income_reader = function(file_list, df){
        for (i in 1:length(file_list)){
          data = read.csv(file_list[i])
          income_data = data[data$Topic == "Income",]
            #it also strips out information that isn't needed for us from the csv, done below

          income_data$Topic = NULL
          income_data$NullReason = NULL
          income_data$Source = NULL
          income_data$ValueUnit = NULL
          income_data$ValueLabel = NULL  
          income_data$Date = NULL
          income_data$DateLabel = NULL

          personal_income = income_data[income_data$Measure == "Median personal income ($)",]  
          df <- rbind(df, personal_income) #for each iteration, bind the new data to the building dataset
        }
        return(df)

}

## Auckland To Hawkes Bay - Extract the information that we want from the CSVs

In [None]:
#set the working directory to the folder that has all of the csv's for the region we are working on 
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Auckland")

In [None]:
#Read all of the file names in a path (which is where we have saved all of the CSVs for a region)
file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Auckland")
file_list

In [None]:
#Make an empty dataframe for the 4 bits of information that we want to extract from the CSV
auckland_pet <- data.frame()
auckland_pop <- data.frame()
auckland_homes <- data.frame()
auckland_income <- data.frame()

In [None]:
#run through the reader function that we defined earlier with the file list and the pet dataframe
auckland_pets = pet_reader(file_list, auckland_pet)
auckland_pets

In [None]:
#use the population reader with the file list and population dataframe 
auckland_pop = pop_reader(file_list, auckland_pop)
auckland_pop

In [None]:
#use the home reader with the file list and the home dataframe 
auckland_homes = home_reader(file_list, auckland_homes)
auckland_homes

In [None]:
auckland_income = income_reader(file_list, auckland_income)
auckland_income

We can repeat the steps we have taken with the Auckland region with all of the other regions in New Zealand

## Bay of Plenty

In [None]:
#set the working directory to the folder that has all of the csv's for the region we are working on 
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Bay of Plenty")
#and make the file list a list of names in the location
file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Bay of Plenty")
file_list

In [None]:
#make the empty dataframes for the bay of plenty
bop_pet <- data.frame()
bop_pop <- data.frame()
bop_homes <- data.frame()
bop_income <- data.frame()

In [None]:
#go through the readeder functions with the file list and the dataframes that we have just defined
bop_pet = pet_reader(file_list, bop_pet)
bop_pet

bop_pop = pop_reader(file_list, bop_pop)
bop_pop

bop_homes = home_reader(file_list, bop_homes)
bop_homes

bop_income = income_reader(file_list, bop_income)
bop_income

## Canterbury

In [None]:
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Canterbury")
#Set the working directory to where the canterbury csvs are and make the file list for those csvs
file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Canterbury")
file_list

In [None]:
#define the empty dataframes for canterbury
cant_pet <- data.frame()
cant_pop <- data.frame()
cant_homes <- data.frame()
cant_income <- data.frame()

In [None]:
#run through the reader functions with the dataframes defined before 
cant_pet = pet_reader(file_list, cant_pet)
cant_pet

cant_pop = pop_reader(file_list, cant_pop)
cant_pop

cant_homes = home_reader(file_list, cant_homes)
cant_homes

cant_income = income_reader(file_list, cant_income)
cant_income

## Gisbourne

In [None]:
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Gisbourne")
#get the file list for the gisbourne files
file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Gisbourne")
file_list

In [None]:
#make the empty dataframes for the gisbourne region
gisb_pet <- data.frame()
gisb_pop <- data.frame()
gisb_homes <- data.frame()
gisb_income <- data.frame()

In [None]:
#run through the reader functions with those dataframes
gisb_pet = pet_reader(file_list, gisb_pet)
gisb_pet

gisb_pop = pop_reader(file_list, gisb_pop)
gisb_pop

gisb_homes = home_reader(file_list, gisb_homes)
gisb_homes

gisb_income = income_reader(file_list, gisb_income)
gisb_income

## Hawke's Bay

In [None]:
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Hawke's Bay")
#get the file list for the hawkes bay region
file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Hawke's Bay")
file_list

In [None]:
#make our empty datframes for the hawkes bay reigon
hb_pet <- data.frame()
hb_pop <- data.frame()
hb_homes <- data.frame()
hb_income <- data.frame()

In [None]:
#run through the reader functions with the hawkes bay dataframes
hb_pet = pet_reader(file_list, hb_pet)
hb_pet

hb_pop = pop_reader(file_list, hb_pop)
hb_pop

hb_homes = home_reader(file_list, hb_homes)
hb_homes

hb_income = income_reader(file_list, hb_income)
hb_income

## Aggregration now 

Now we must aggregate. This is because we do not want information about individual districts, we want information about the region as a whole. Therefore we can use the aggregrate function to aggregarate the dataframes by a metric we have given it (either sum or mean)

## Compress Pets

In [None]:
#here im going to write a function just to save having to write the aggregrate function a lot of times
#this aggregrates the value column by the category column and sums them (the category column is the breed name)
#the value column is the count for that breed name
compress_pets = function(region){
        df<- aggregate(region['Value'], by=region['Category'], sum) 
       return(df)
}

In [None]:
#compress the pets dataframes
auckland_pets = compress_pets(auckland_pets)
bop_pet = compress_pets(bop_pet)
cant_pet = compress_pets(cant_pet)
gisb_pet = compress_pets(gisb_pet)
hb_pet = compress_pets(hb_pet)

In [None]:
#check that they're now compressed and aren't showing for districts 
auckland_pets
bop_pet
cant_pet
gisb_pet
hb_pet

In [None]:
#Now we want to rename the columns in the dataframe to be more sensible 
#We will rename the category column to Breeds as this is what that column shows
#and rename each value column to the name of the region for the respective region


auckland_pets <- rename(auckland_pets, Breed = Category, Auckland = Value)
bop_pet <- rename(bop_pet, Breed = Category, Bay_of_Plenty = Value)
cant_pet <- rename(cant_pet, Breed = Category, Canterbury = Value)
gisb_pet <- rename(gisb_pet, Breed = Category, Gisbourne = Value)
hb_pet <- rename(hb_pet, Breed = Category, Hawkes_Bay = Value)


auckland_pets
bop_pet
cant_pet
gisb_pet
hb_pet

## Stitch them all together

In [None]:
#then we want these all as one bigger dataframe, so we will join them by the breed name 
a_b <- auckland_pets %>%
 left_join(bop_pet, by = "Breed")

a_b_c <- a_b %>%
 left_join(cant_pet, by = "Breed")

a_b_c_g <- a_b_c %>%
 left_join(gisb_pet, by = "Breed")

final_pets <- a_b_c_g %>%
 left_join(hb_pet, by = "Breed")


In [None]:
final_pets


## Population

In [None]:
#make a compress function for the population 
#this is the same as the previous function for pets, it sums the population for all of the distrcits 
#to give a population count for the whole region 
compress_pop = function(region){
    df <- aggregate(region["Value"], by = region['Category'], sum)
    return(df)
}

In [None]:
#Do the same as we did for pets but all in one cell. 
#So do the compress function to aggregrate the dataframes to be just regionwide counts
auckland_pop = compress_pop(auckland_pop)
bop_pop = compress_pop(bop_pop)
cant_pop = compress_pop(cant_pop)
gisb_pop = compress_pop(gisb_pop)
hb_pop = compress_pop(hb_pop)

#And rename the columns in the dataframes to be sensible
auckland_pop  <- rename(auckland_pop, Total_People = Category, Auckland = Value) 
bop_pop <- rename(bop_pop, Total_People = Category, Bay_Of_Plenty = Value) 
cant_pop <- rename(cant_pop, Total_People = Category, Canterbury = Value) 
gisb_pop <- rename(gisb_pop, Total_People = Category, Gisbourne = Value) 
hb_pop <- rename(hb_pop, Total_People = Category, Hawks_Bay = Value) 

In [None]:
#Stitch them together for one big population dataframe
a_b <- auckland_pop %>%
 left_join(bop_pop, by = "Total_People")

a_b_c <- a_b %>%
 left_join(cant_pop, by = "Total_People")

a_b_c_g <- a_b_c %>%
 left_join(gisb_pop, by = "Total_People")

final_pops <- a_b_c_g %>%
 left_join(hb_pop, by = "Total_People")

In [None]:
final_pops

## Income

In [None]:
#make a compress function for the population 
#this is different from the previous function for pets, it takes the mean the of the median income for all of the distrcits 
#to give a mean median income for the region 
#you can't do the sum as otherwise it will just balloon and a region with more districts will be higher
compress_inc = function(region){
        df<- aggregate(region['Value'], by=region['Measure'], mean) 
       return(df)
}

In [None]:
#compress the incomes
auckland_income = compress_inc(auckland_income)
bop_income = compress_inc(bop_income)
cant_income = compress_inc(cant_income)
gisb_income = compress_inc(gisb_income)
hb_income = compress_inc(hb_income)

In [None]:
#rename the columns to have region names and and say median income 
auckland_income <- rename(auckland_income, Median_Income = Measure, Auckland = Value) 
bop_income <- rename(bop_income, Median_Income = Measure, Bay_Of_Plenty = Value) 
cant_income <- rename(cant_income, Median_Income = Measure, Canterbury = Value) 
gisb_income <- rename(gisb_income, Median_Income = Measure, Gisbourne = Value) 
hb_income<- rename(hb_income, Median_Income = Measure, Hawks_Bay = Value) 

In [None]:
#Stitch them together into one bigger dataframe
a_b <- auckland_income %>%
 left_join(bop_income, by = "Median_Income")

a_b_c <- a_b %>%
 left_join(cant_income, by = "Median_Income")

a_b_c_g <- a_b_c %>%
 left_join(gisb_income, by = "Median_Income")

final_incomes <- a_b_c_g %>%
 left_join(hb_income, by = "Median_Income")

In [None]:
final_incomes


## Home Ownership

In [None]:
#make a last function to compress the home ownership for all of the districts in the region
#again this does the same as the pets one, sums them for the count of homes owned for the whole region
#done this so i dont have to write the whole thing out every time 
compress = function(region){
        df<- aggregate(region['Value'], by=region['Category'], sum) 
       return(df)
}

In [None]:
#compress the home dataframes
auckland_homes = compress(auckland_homes)
bop_homes = compress(bop_homes)
cant_homes = compress(cant_homes)
gisb_homes = compress(gisb_homes)
hb_homes = compress(hb_homes)

In [None]:
#rename them with region names and a total people count 
auckland_homes <- rename(auckland_homes, Total_People = Category, Auckland = Value) 
bop_homes <- rename(bop_homes, Total_People = Category, Bay_Of_Plenty = Value) 
cant_homes <- rename(cant_homes, Total_People = Category, Canterbury = Value) 
gisb_homes <- rename(gisb_homes, Total_People = Category, Gisbourne = Value) 
hb_homes<- rename(hb_homes, Total_People = Category, Hawks_Bay = Value) 

In [None]:
#Stitch them together into one bigger dataframe
a_b <- auckland_homes %>%
 left_join(bop_homes, by = "Total_People")

a_b_c <- a_b %>%
 left_join(cant_homes, by = "Total_People")

a_b_c_g <- a_b_c %>%
 left_join(gisb_homes, by = "Total_People")

final_homes <- a_b_c_g %>%
 left_join(hb_homes, by = "Total_People")

In [None]:
final_homes

## So now we have 5 of the regions in NZ, we want to get the other regions as well

## Looking at Taranki region - West Coast Region

Get the pets information, the population data, the home ownership and the income

## Taranaki

In [None]:
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Taranaki")

In [None]:
getwd()

In [None]:
#get a list of files in the taranaki region
file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Taranaki")
file_list

In [None]:
#initiate a blank data frame, each iteration of the loop will append the data from the given file to this variable
taranaki_pets <- data.frame()
taranaki_pop <- data.frame()
taranaki_homes <- data.frame()
taranaki_income <- data.frame()

In [None]:
#use the reader functions on each of these dataframes
taranaki_pets = pet_reader(file_list, taranaki_pets)
taranaki_pets

taranaki_pop = pop_reader(file_list, taranaki_pop)
taranaki_pop

taranaki_homes = home_reader(file_list, taranaki_homes)
taranaki_homes

taranaki_income = income_reader(file_list, taranaki_income)
taranaki_income

## Tasman

In [None]:
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Tasman")
getwd()

In [None]:
#get the file list for the tasman region
file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Tasman")
file_list

In [None]:
#initiate the blank dataframes
tasman_pets <- data.frame()
tasman_pop <- data.frame()
tasman_homes <- data.frame()
tasman_income <- data.frame()

In [None]:
#use the reader functions to get the information
tasman_pets = pet_reader(file_list, tasman_pets)
tasman_pets

tasman_pop = pop_reader(file_list, tasman_pop)
tasman_pop

tasman_homes = home_reader(file_list, tasman_homes)
tasman_homes

tasman_income = income_reader(file_list, tasman_income)
tasman_income

## Waikato

In [None]:
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Waikato")
getwd()

In [None]:
#get the file list for the waikato region
file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Waikato")
file_list

In [None]:
#make the blank dataframes
waikato_pets <- data.frame()
waikato_pop <- data.frame()
waikato_homes <- data.frame()
waikato_income <- data.frame()

In [None]:
#use the reader functions for the waikato region
waikato_pets = pet_reader(file_list, waikato_pets)
waikato_pets

waikato_pop = pop_reader(file_list, waikato_pop)
waikato_pop

waikato_homes = home_reader(file_list, waikato_homes)
tasman_homes

waikato_income = income_reader(file_list, waikato_income)
waikato_income

## Wellington

In [None]:
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Wellington")
getwd()

In [None]:
#get the file list for the wellington region
file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Wellington")
file_list

In [None]:
#make the blank dataframes for the wellington region
wellington_pets <- data.frame()
wellington_pop <- data.frame()
wellington_homes <- data.frame()
wellington_income <- data.frame()

In [None]:
#use the reder functions with these dataframes for the wellington region
wellington_pets = pet_reader(file_list, wellington_pets)
wellington_pets

wellington_pop = pop_reader(file_list, wellington_pop)
wellington_pop

wellington_homes = home_reader(file_list, wellington_homes)
wellington_homes

wellington_income = income_reader(file_list, wellington_income)
wellington_income

## West Coast

In [None]:
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/West Coast")
getwd()

In [None]:
#get the file list for the west coast region
file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/West Coast")
file_list

In [None]:
#make the blank dataframes
west_coast_pets <- data.frame()
west_coast_pop <- data.frame()
west_coast_homes <- data.frame()
west_coast_income <- data.frame()

In [None]:
#use the reader functions for the dataframes
west_coast_pets = pet_reader(file_list, west_coast_pets)
west_coast_pets

west_coast_pop = pop_reader(file_list, west_coast_pop)
west_coast_pop

west_coast_homes = home_reader(file_list, west_coast_homes)
west_coast_homes

west_coast_income = income_reader(file_list, west_coast_income)
west_coast_income

## Aggregration now for these. 

## Aggregrate the pet information 

In [None]:
#use the compress pets functions on the pets dataframes
taranaki_pets = compress_pets(taranaki_pets)
tasman_pets = compress_pets(tasman_pets)
waikato_pets = compress_pets(waikato_pets)
wellington_pets = compress_pets(wellington_pets)
west_coast_pets = compress_pets(west_coast_pets)


In [None]:
#rename them with the region names and the Breed for the breeds colums
taranaki_pets <- rename(taranaki_pets, Breed = Category, Taranaki = Value)
tasman_pets <- rename(tasman_pets, Breed = Category, Tasman = Value)
waikato_pets <- rename(waikato_pets, Breed = Category, Waikato = Value)
wellington_pets <- rename(wellington_pets, Breed = Category, Wellington = Value)
west_coast_pets <- rename(west_coast_pets, Breed = Category, West_Coast = Value)


In [None]:
#Check that the compression and renaming worked
taranaki_pets
tasman_pets
waikato_pets
wellington_pets
west_coast_pets

In [None]:
#Join these regions pet informatino together into a bigger dataframe
tar_tas <- taranaki_pets%>%
 left_join(tasman_pets, by = "Breed")

tar_tas_wai<- tar_tas %>%
 left_join(waikato_pets, by = "Breed")

tar_tas_wai_wel <- tar_tas_wai %>%
 left_join(wellington_pets, by = "Breed")

tar_to_wc_pets <- tar_tas_wai_wel %>%
 left_join(west_coast_pets, by = "Breed")



In [None]:
tar_to_wc_pets

## Next we want to add these to the previous larger pet dataframe made

In [None]:
#and join it to the dataframe we made for the first five regions, joining by breed 
final_pets <- final_pets %>%
 left_join(tar_to_wc_pets, by = "Breed")

In [None]:
final_pets

## Compressing the Income, population and home ownership

## Taranaki - West Coast: Population

In [None]:
#compress the population dataframes
taranaki_pop = compress_pop(taranaki_pop )
tasman_pop = compress_pop(tasman_pop)
waikato_pop = compress_pop(waikato_pop)
wellington_pop = compress_pop(wellington_pop)
west_coast_pop = compress_pop(west_coast_pop)


In [None]:
#rename them with the region names 
taranaki_pop  <- rename(taranaki_pop, Total_People = Category, Taranaki = Value) 
tasman_pop <- rename(tasman_pop, Total_People = Category, Tasman = Value) 
waikato_pop <- rename(waikato_pop, Total_People = Category, Waikato = Value) 
wellington_pop <- rename(wellington_pop, Total_People = Category, Wellington = Value) 
west_coast_pop <- rename(west_coast_pop, Total_People = Category, West_Coast = Value) 

In [None]:
#Stitch them together

a_b <- taranaki_pop %>%
 left_join(tasman_pop, by = "Total_People")

a_b_c <- a_b %>%
 left_join(waikato_pop, by = "Total_People")

a_b_c_g <- a_b_c %>%
 left_join(wellington_pop, by = "Total_People")

tar_to_west_pop <- a_b_c_g %>%
 left_join(west_coast_pop, by = "Total_People")


tar_to_west_pop  

Then combine these with the previous dataframe

In [None]:
#then join them with previous population dataframe
final_pops <- final_pops %>%
 left_join(tar_to_west_pop, by = "Total_People")

In [None]:
final_pops

## Income

In [None]:
#Compress the the regions again, dont want to know about the individual districts 
taranaki_income = compress_inc(taranaki_income)
tasman_income = compress_inc(tasman_income)
waikato_income = compress_inc(waikato_income)
wellington_income = compress_inc(wellington_income)
west_coast_income = compress_inc(west_coast_income)

In [None]:
#Rename them so that we have the region names in the dataframe
taranaki_income <- rename(taranaki_income, Median_Income = Measure, Taranaki = Value) 
tasman_income <- rename(tasman_income, Median_Income = Measure, Tasman = Value) 
waikato_income <- rename(waikato_income, Median_Income = Measure, Waikato = Value) 
wellington_income <- rename(wellington_income, Median_Income = Measure, Wellington = Value) 
west_coast_income<- rename(west_coast_income, Median_Income = Measure, West_Coast = Value) 

In [None]:
#Stitch them together
a_b <- taranaki_income %>%
 left_join(tasman_income, by = "Median_Income")

a_b_c <- a_b %>%
 left_join(waikato_income, by = "Median_Income")

a_b_c_g <- a_b_c %>%
 left_join(wellington_income, by = "Median_Income")

tar_to_wc_income <- a_b_c_g %>%
 left_join(west_coast_income, by = "Median_Income")

Join it with the previous income dataframe

In [None]:
final_incomes  <- final_incomes %>%
 left_join(tar_to_wc_income, by = "Median_Income")

In [None]:
final_incomes

## Home Ownership

In [None]:
#Compress the homes dataframes that we've just made
taranaki_homes = compress(taranaki_homes)
tasman_homes = compress(tasman_homes)
waikato_homes = compress(waikato_homes)
wellington_homes = compress(wellington_homes)
west_coast_homes = compress(west_coast_homes)

In [None]:
#rename them with the region names
taranaki_homes <- rename(taranaki_homes, Total_People = Category, Taranaki = Value) 
tasman_homes <- rename(tasman_homes, Total_People = Category, Tasman = Value) 
waikato_homes <- rename(waikato_homes, Total_People = Category, Waikato = Value) 
wellington_homes <- rename(wellington_homes, Total_People = Category, Wellington = Value) 
west_coast_homes<- rename(west_coast_homes, Total_People = Category, West_Coast = Value) 

In [None]:
#Stitch them together
a_b <- taranaki_homes %>%
 left_join(tasman_homes, by = "Total_People")

a_b_c <- a_b %>%
 left_join(waikato_homes, by = "Total_People")

a_b_c_g <- a_b_c %>%
 left_join(wellington_homes, by = "Total_People")

tar_wc_homes <- a_b_c_g %>%
 left_join(west_coast_homes, by = "Total_People")

In [None]:
#Add this to the other dataframe made previously 
final_homes <- final_homes %>%
 left_join(tar_wc_homes, by = "Total_People")

In [None]:
final_homes

## Looking at Manawatu  - Southland (the last regions )

This portion changes in style due to the seperation of this task between group members
(However I did change the working directories to be on my computer to check it all runs)

In [None]:
library(tidyr)
library(tidyverse)
library(readr)
library(skimr)

In [None]:
getwd()
#set the working directory to the file that the region you're looking at is saved to
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Manawatu - Whanganui")

In [None]:
#Get the list of files that you're going to be reading from 
#se the path to be to the folder of the region that you're looking at
Manawatu_Whanganui_file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Manawatu - Whanganui")
#change this above to be where the files are save on your computer

In [None]:
#initiate a blank data frame, each iteration of the loop will append the data from the given file to this variable
Manawatu_Whanganui_dogs_df <- data.frame()

In [None]:
#Specifically to get the information about pets this is the loop, 
#but can change the data[data$Topic == ""] to get it to be about other information in the csvs

for (i in 1:length(Manawatu_Whanganui_file_list)){
  data = read_csv(Manawatu_Whanganui_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  pets_data = data[data$Topic == "Pets",]
  #and then would just change these variables names to be something sensible like house_data
  pets_data$Topic = NULL
  pets_data$NullReason = NULL
  pets_data$Source = NULL
  pets_data$ValueUnit = NULL
  pets_data$ValueLabel = NULL
    
    
  #this is specific to the pets portion, this removes the acc data
  pets_counts = pets_data[c(-1,-2,-3),]
  pets_counts
  
  
  #and if we wanted to get home ownership instead of dogs
  #we'd change the measure == "Home ownership by households"
  total_registered_dogs = pets_counts[pets_counts$Measure =="Registered dogs — Total registered dogs",]
    
  
  Manawatu_Whanganui_dogs_df <- rbind(Manawatu_Whanganui_dogs_df, total_registered_dogs) #for each iteration, bind the new data to the building dataset

}

In [None]:
Manawatu_Whanganui_dogs_df

In [None]:
Manawatu_Whanganui_population_df = data.frame()

In [None]:
for (i in 1:length(Manawatu_Whanganui_file_list)){
  data = read.csv(Manawatu_Whanganui_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  demo_data = data[data$Topic == "Demographics",]

  demo_data$Topic = NULL
  demo_data$NullReason = NULL
  demo_data$Source = NULL
  demo_data$ValueUnit = NULL
  demo_data$ValueLabel = NULL  

  population = demo_data[demo_data$Measure == "Population - residents at 2013 census",]  
  Manawatu_Whanganui_population_df <- rbind(Manawatu_Whanganui_population_df, population) #for each iteration, bind the new data to the building dataset
  #dataset <- rbind(dataset, population)  
}
#population_df

In [None]:
Manawatu_Whanganui_population_df

Make a function to read through and look at the home ovwnership now 

In [None]:
Manawatu_Whanganui_homes_df  = data.frame()

In [None]:
for (i in 1:length(Manawatu_Whanganui_file_list)){
  data = read.csv(Manawatu_Whanganui_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  home_data = data[data$Topic == "Households",]

  home_data$Topic = NULL
  home_data$NullReason = NULL
  home_data$Source = NULL
  home_data$ValueUnit = NULL
  home_data$ValueLabel = NULL  

  ownership = home_data[home_data$Measure == "Home ownership by households",]  
  Manawatu_Whanganui_homes_df <- rbind(Manawatu_Whanganui_homes_df, ownership) #for each iteration, bind the new data to the building dataset
}

In [None]:
Manawatu_Whanganui_homes_df

Loop through and make the income into a dataframe

In [None]:
Manawatu_Whanganui_median_income_df = data.frame()

In [None]:
for (i in 1:length(Manawatu_Whanganui_file_list)){
  data = read.csv(Manawatu_Whanganui_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  income_data = data[data$Topic == "Income",]

  income_data$Topic = NULL
  income_data$NullReason = NULL
  income_data$Source = NULL
  income_data$ValueUnit = NULL
  income_data$ValueLabel = NULL  

  personal_income = income_data[income_data$Measure == "Median personal income ($)",]  
  Manawatu_Whanganui_median_income_df <- rbind(Manawatu_Whanganui_median_income_df, personal_income) #for each iteration, bind the new data to the building dataset
}

In [None]:
Manawatu_Whanganui_median_income_df

In [None]:
Manawatu_Whanganui_income_bracket_df = data.frame()

In [None]:
for (i in 1:length(Manawatu_Whanganui_file_list)){
  data = read.csv(Manawatu_Whanganui_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  income_bracket_data = data[data$Topic == "Income",]

  income_bracket_data$Topic = NULL
  income_bracket_data$NullReason = NULL
  income_bracket_data$Source = NULL
  income_bracket_data$ValueUnit = NULL
  income_bracket_data$ValueLabel = NULL  

  bracket_income = income_bracket_data[income_bracket_data$Measure == "Number of families in income bracket",]  
  Manawatu_Whanganui_income_bracket_df <- rbind(Manawatu_Whanganui_income_bracket_df, bracket_income) #for each iteration, bind the new data to the building dataset
}

In [None]:
Manawatu_Whanganui_income_bracket_df

## **Manawatu-Whanganui Data Frames:**

 - Manawatu_Whanganui_dogs_df
 - Manawatu_Whanganui_population_df
 - Manawatu_Whanganui_homes_df
 - Manawatu_Whanganui_median_income_df
 - Manawatu_Whanganui_income_bracket_df

In [None]:
getwd()
#set the working directory to the file that the region you're looking at is saved to
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Marlborough")

In [None]:
#Get the list of files that you're going to be reading from 
#se the path to be to the folder of the region that you're looking at
Marlborough_file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Marlborough")
#change this above to be where the files are save on your computer

In [None]:
#initiate a blank data frame, each iteration of the loop will append the data from the given file to this variable
Marlborough_dogs_df <- data.frame()

In [None]:
#Specifically to get the information about pets this is the loop, 
#but can change the data[data$Topic == ""] to get it to be about other information in the csvs

for (i in 1:length(Marlborough_file_list)){
  data = read_csv(Marlborough_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  pets_data = data[data$Topic == "Pets",]
  #and then would just change these variables names to be something sensible like house_data
  pets_data$Topic = NULL
  pets_data$NullReason = NULL
  pets_data$Source = NULL
  pets_data$ValueUnit = NULL
  pets_data$ValueLabel = NULL
    
    
  #this is specific to the pets portion, this removes the acc data
  pets_counts = pets_data[c(-1,-2,-3),]
  pets_counts
  
  
  #and if we wanted to get home ownership instead of dogs
  #we'd change the measure == "Home ownership by households"
  total_registered_dogs = pets_counts[pets_counts$Measure =="Registered dogs — Total registered dogs",]
    
  
  Marlborough_dogs_df <- rbind(Marlborough_dogs_df, total_registered_dogs) #for each iteration, bind the new data to the building dataset

}

In [None]:
Marlborough_dogs_df

In [None]:
Marlborough_population_df = data.frame()

In [None]:
for (i in 1:length(Marlborough_file_list)){
  data = read.csv(Marlborough_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  demo_data = data[data$Topic == "Demographics",]

  demo_data$Topic = NULL
  demo_data$NullReason = NULL
  demo_data$Source = NULL
  demo_data$ValueUnit = NULL
  demo_data$ValueLabel = NULL  

  population = demo_data[demo_data$Measure == "Population - residents at 2013 census",]  
  Marlborough_population_df <- rbind(Marlborough_population_df, population) #for each iteration, bind the new data to the building dataset
  #dataset <- rbind(dataset, population)  
}
#population_df

In [None]:
Marlborough_population_df

In [None]:
Marlborough_homes_df  = data.frame()

In [None]:
for (i in 1:length(Marlborough_file_list)){
  data = read.csv(Marlborough_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  home_data = data[data$Topic == "Households",]

  home_data$Topic = NULL
  home_data$NullReason = NULL
  home_data$Source = NULL
  home_data$ValueUnit = NULL
  home_data$ValueLabel = NULL  

  ownership = home_data[home_data$Measure == "Home ownership by households",]  
  Marlborough_homes_df <- rbind(Marlborough_homes_df, ownership) #for each iteration, bind the new data to the building dataset
}

In [None]:
Marlborough_homes_df

In [None]:
Marlborough_median_income_df = data.frame()

In [None]:
for (i in 1:length(Marlborough_file_list)){
  data = read.csv(Marlborough_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  income_data = data[data$Topic == "Income",]

  income_data$Topic = NULL
  income_data$NullReason = NULL
  income_data$Source = NULL
  income_data$ValueUnit = NULL
  income_data$ValueLabel = NULL  

  personal_income = income_data[income_data$Measure == "Median personal income ($)",]  
  Marlborough_median_income_df <- rbind(Marlborough_median_income_df, personal_income) #for each iteration, bind the new data to the building dataset
}

In [None]:
Marlborough_median_income_df

In [None]:
Marlborough_income_bracket_df = data.frame()

In [None]:
for (i in 1:length(Marlborough_file_list)){
  data = read.csv(Marlborough_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  income_bracket_data = data[data$Topic == "Income",]

  income_bracket_data$Topic = NULL
  income_bracket_data$NullReason = NULL
  income_bracket_data$Source = NULL
  income_bracket_data$ValueUnit = NULL
  income_bracket_data$ValueLabel = NULL  

  bracket_income = income_bracket_data[income_bracket_data$Measure == "Number of families in income bracket",]  
  Marlborough_income_bracket_df <- rbind(Marlborough_income_bracket_df, bracket_income) #for each iteration, bind the new data to the building dataset
}

In [None]:
Marlborough_income_bracket_df

## **Marlborough Data Frames:**

 - Marlborough_dogs_df
 - Marlborough_population_df
 - Marlborough_homes_df
 - Marlborough_median_income_df
 - Marlborough_income_bracket_df

In [None]:
getwd()
#set the working directory to the file that the region you're looking at is saved to
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Nelson")

In [None]:
#Get the list of files that you're going to be reading from 
#se the path to be to the folder of the region that you're looking at
Nelson_file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Nelson")
#change this above to be where the files are save on your computer

In [None]:
#initiate a blank data frame, each iteration of the loop will append the data from the given file to this variable
Nelson_dogs_df <- data.frame()

In [None]:
#Specifically to get the information about pets this is the loop, 
#but can change the data[data$Topic == ""] to get it to be about other information in the csvs

for (i in 1:length(Nelson_file_list)){
  data = read_csv(Nelson_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  pets_data = data[data$Topic == "Pets",]
  #and then would just change these variables names to be something sensible like house_data
  pets_data$Topic = NULL
  pets_data$NullReason = NULL
  pets_data$Source = NULL
  pets_data$ValueUnit = NULL
  pets_data$ValueLabel = NULL
    
    
  #this is specific to the pets portion, this removes the acc data
  pets_counts = pets_data[c(-1,-2,-3),]
  pets_counts
  
  
  #and if we wanted to get home ownership instead of dogs
  #we'd change the measure == "Home ownership by households"
  total_registered_dogs = pets_counts[pets_counts$Measure =="Registered dogs — Total registered dogs",]
    
  
  Nelson_dogs_df <- rbind(Nelson_dogs_df, total_registered_dogs) #for each iteration, bind the new data to the building dataset

}

In [None]:
Nelson_population_df = data.frame()

In [None]:
for (i in 1:length(Nelson_file_list)){
  data = read.csv(Nelson_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  demo_data = data[data$Topic == "Demographics",]

  demo_data$Topic = NULL
  demo_data$NullReason = NULL
  demo_data$Source = NULL
  demo_data$ValueUnit = NULL
  demo_data$ValueLabel = NULL  

  population = demo_data[demo_data$Measure == "Population - residents at 2013 census",]  
  Nelson_population_df <- rbind(Nelson_population_df, population) #for each iteration, bind the new data to the building dataset
  #dataset <- rbind(dataset, population)  
}
#population_df

In [None]:
Nelson_homes_df  = data.frame()

In [None]:
for (i in 1:length(Nelson_file_list)){
  data = read.csv(Nelson_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  home_data = data[data$Topic == "Households",]

  home_data$Topic = NULL
  home_data$NullReason = NULL
  home_data$Source = NULL
  home_data$ValueUnit = NULL
  home_data$ValueLabel = NULL  

  ownership = home_data[home_data$Measure == "Home ownership by households",]  
  Nelson_homes_df <- rbind(Nelson_homes_df, ownership) #for each iteration, bind the new data to the building dataset
}

In [None]:
Nelson_median_income_df = data.frame()

In [None]:
for (i in 1:length(Nelson_file_list)){
  data = read.csv(Nelson_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  income_data = data[data$Topic == "Income",]

  income_data$Topic = NULL
  income_data$NullReason = NULL
  income_data$Source = NULL
  income_data$ValueUnit = NULL
  income_data$ValueLabel = NULL  

  personal_income = income_data[income_data$Measure == "Median personal income ($)",]  
  Nelson_median_income_df <- rbind(Nelson_median_income_df, personal_income) #for each iteration, bind the new data to the building dataset
}

In [None]:
Nelson_income_bracket_df = data.frame()

In [None]:
for (i in 1:length(Nelson_file_list)){
  data = read.csv(Nelson_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  income_bracket_data = data[data$Topic == "Income",]

  income_bracket_data$Topic = NULL
  income_bracket_data$NullReason = NULL
  income_bracket_data$Source = NULL
  income_bracket_data$ValueUnit = NULL
  income_bracket_data$ValueLabel = NULL  

  bracket_income = income_bracket_data[income_bracket_data$Measure == "Number of families in income bracket",]  
  Nelson_income_bracket_df <- rbind(Nelson_income_bracket_df, bracket_income) #for each iteration, bind the new data to the building dataset
}

## **Nelson Data Frames:**

 - Nelson_dogs_df
 - Nelson_population_df
 - Nelson_homes_df
 - Nelson_median_income_df
 - Nelson_income_bracket_df

In [None]:
getwd()
#set the working directory to the file that the region you're looking at is saved to
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Northland")

In [None]:
#Get the list of files that you're going to be reading from 
#se the path to be to the folder of the region that you're looking at
Northland_file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Northland")
#change this above to be where the files are save on your computer

In [None]:
#initiate a blank data frame, each iteration of the loop will append the data from the given file to this variable
Northland_dogs_df <- data.frame()

In [None]:
#Specifically to get the information about pets this is the loop, 
#but can change the data[data$Topic == ""] to get it to be about other information in the csvs

for (i in 1:length(Northland_file_list)){
  data = read_csv(Northland_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  pets_data = data[data$Topic == "Pets",]
  #and then would just change these variables names to be something sensible like house_data
  pets_data$Topic = NULL
  pets_data$NullReason = NULL
  pets_data$Source = NULL
  pets_data$ValueUnit = NULL
  pets_data$ValueLabel = NULL
    
    
  #this is specific to the pets portion, this removes the acc data
  pets_counts = pets_data[c(-1,-2,-3),]
  pets_counts
  
  
  #and if we wanted to get home ownership instead of dogs
  #we'd change the measure == "Home ownership by households"
  total_registered_dogs = pets_counts[pets_counts$Measure =="Registered dogs — Total registered dogs",]
    
  
  Northland_dogs_df <- rbind(Northland_dogs_df, total_registered_dogs) #for each iteration, bind the new data to the building dataset

}

In [None]:
Northland_population_df = data.frame()

In [None]:
for (i in 1:length(Northland_file_list)){
  data = read.csv(Northland_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  demo_data = data[data$Topic == "Demographics",]

  demo_data$Topic = NULL
  demo_data$NullReason = NULL
  demo_data$Source = NULL
  demo_data$ValueUnit = NULL
  demo_data$ValueLabel = NULL  

  population = demo_data[demo_data$Measure == "Population - residents at 2013 census",]  
  Northland_population_df <- rbind(Northland_population_df, population) #for each iteration, bind the new data to the building dataset
  #dataset <- rbind(dataset, population)  
}
#population_df

In [None]:
Northland_homes_df  = data.frame()

In [None]:
for (i in 1:length(Northland_file_list)){
  data = read.csv(Northland_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  home_data = data[data$Topic == "Households",]

  home_data$Topic = NULL
  home_data$NullReason = NULL
  home_data$Source = NULL
  home_data$ValueUnit = NULL
  home_data$ValueLabel = NULL  

  ownership = home_data[home_data$Measure == "Home ownership by households",]  
  Northland_homes_df <- rbind(Northland_homes_df, ownership) #for each iteration, bind the new data to the building dataset
}

In [None]:
Northland_median_income_df = data.frame()

In [None]:
for (i in 1:length(Northland_file_list)){
  data = read.csv(Northland_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  income_data = data[data$Topic == "Income",]

  income_data$Topic = NULL
  income_data$NullReason = NULL
  income_data$Source = NULL
  income_data$ValueUnit = NULL
  income_data$ValueLabel = NULL  

  personal_income = income_data[income_data$Measure == "Median personal income ($)",]  
  Northland_median_income_df <- rbind(Northland_median_income_df, personal_income) #for each iteration, bind the new data to the building dataset
}

In [None]:
Northland_income_bracket_df = data.frame()

In [None]:
for (i in 1:length(Northland_file_list)){
  data = read.csv(Northland_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  income_bracket_data = data[data$Topic == "Income",]

  income_bracket_data$Topic = NULL
  income_bracket_data$NullReason = NULL
  income_bracket_data$Source = NULL
  income_bracket_data$ValueUnit = NULL
  income_bracket_data$ValueLabel = NULL  

  bracket_income = income_bracket_data[income_bracket_data$Measure == "Number of families in income bracket",]  
  Northland_income_bracket_df <- rbind(Northland_income_bracket_df, bracket_income) #for each iteration, bind the new data to the building dataset
}

## **Northland Data Frames:**

 - Northland_dogs_df
 - Northland_population_df
 - Northland_homes_df
 - Northland_median_income_df
 - Northland_income_bracket_df

In [None]:
getwd()
#set the working directory to the file that the region you're looking at is saved to
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Otago")

In [None]:
#Get the list of files that you're going to be reading from 
#se the path to be to the folder of the region that you're looking at
Otago_file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Otago")
#change this above to be where the files are save on your computer

In [None]:
#initiate a blank data frame, each iteration of the loop will append the data from the given file to this variable
Otago_dogs_df <- data.frame()

In [None]:
#Specifically to get the information about pets this is the loop, 
#but can change the data[data$Topic == ""] to get it to be about other information in the csvs

for (i in 1:length(Otago_file_list)){
  data = read_csv(Otago_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  pets_data = data[data$Topic == "Pets",]
  #and then would just change these variables names to be something sensible like house_data
  pets_data$Topic = NULL
  pets_data$NullReason = NULL
  pets_data$Source = NULL
  pets_data$ValueUnit = NULL
  pets_data$ValueLabel = NULL
    
    
  #this is specific to the pets portion, this removes the acc data
  pets_counts = pets_data[c(-1,-2,-3),]
  pets_counts
  
  
  #and if we wanted to get home ownership instead of dogs
  #we'd change the measure == "Home ownership by households"
  total_registered_dogs = pets_counts[pets_counts$Measure =="Registered dogs — Total registered dogs",]
    
  
  Otago_dogs_df <- rbind(Otago_dogs_df, total_registered_dogs) #for each iteration, bind the new data to the building dataset

}

In [None]:
Otago_population_df = data.frame()

In [None]:
for (i in 1:length(Otago_file_list)){
  data = read.csv(Otago_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  demo_data = data[data$Topic == "Demographics",]

  demo_data$Topic = NULL
  demo_data$NullReason = NULL
  demo_data$Source = NULL
  demo_data$ValueUnit = NULL
  demo_data$ValueLabel = NULL  

  population = demo_data[demo_data$Measure == "Population - residents at 2013 census",]  
  Otago_population_df <- rbind(Otago_population_df, population) #for each iteration, bind the new data to the building dataset
  #dataset <- rbind(dataset, population)  
}
#population_df

In [None]:
Otago_homes_df  = data.frame()

In [None]:
for (i in 1:length(Otago_file_list)){
  data = read.csv(Otago_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  home_data = data[data$Topic == "Households",]

  home_data$Topic = NULL
  home_data$NullReason = NULL
  home_data$Source = NULL
  home_data$ValueUnit = NULL
  home_data$ValueLabel = NULL  

  ownership = home_data[home_data$Measure == "Home ownership by households",]  
  Otago_homes_df <- rbind(Otago_homes_df, ownership) #for each iteration, bind the new data to the building dataset
}

In [None]:
Otago_median_income_df = data.frame()

In [None]:
for (i in 1:length(Otago_file_list)){
  data = read.csv(Otago_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  income_data = data[data$Topic == "Income",]

  income_data$Topic = NULL
  income_data$NullReason = NULL
  income_data$Source = NULL
  income_data$ValueUnit = NULL
  income_data$ValueLabel = NULL  

  personal_income = income_data[income_data$Measure == "Median personal income ($)",]  
  Otago_median_income_df <- rbind(Otago_median_income_df, personal_income) #for each iteration, bind the new data to the building dataset
}

In [None]:
Otago_income_bracket_df = data.frame()

In [None]:
for (i in 1:length(Otago_file_list)){
  data = read.csv(Otago_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  income_bracket_data = data[data$Topic == "Income",]

  income_bracket_data$Topic = NULL
  income_bracket_data$NullReason = NULL
  income_bracket_data$Source = NULL
  income_bracket_data$ValueUnit = NULL
  income_bracket_data$ValueLabel = NULL  

  bracket_income = income_bracket_data[income_bracket_data$Measure == "Number of families in income bracket",]  
  Otago_income_bracket_df <- rbind(Otago_income_bracket_df, bracket_income) #for each iteration, bind the new data to the building dataset
}

## **Otago Data Frames:**

 - Otago_dogs_df
 - Otago_population_df
 - Otago_homes_df
 - Otago_median_income_df
 - Otago_income_bracket_df

In [None]:
getwd()
#set the working directory to the file that the region you're looking at is saved to
setwd("~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Southland")

In [None]:
#Get the list of files that you're going to be reading from 
#se the path to be to the folder of the region that you're looking at
Southland_file_list <- list.files(path="~/UNI SHIT/Masters/Year 1 /Semester 2 /Data Wrangling/Group Project/Pets/Region_CSVs/Southland")
#change this above to be where the files are save on your computer

In [None]:
#initiate a blank data frame, each iteration of the loop will append the data from the given file to this variable
Southland_dogs_df <- data.frame()

In [None]:
#Specifically to get the information about pets this is the loop, 
#but can change the data[data$Topic == ""] to get it to be about other information in the csvs

for (i in 1:length(Southland_file_list)){
  data = read_csv(Southland_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  pets_data = data[data$Topic == "Pets",]
  #and then would just change these variables names to be something sensible like house_data
  pets_data$Topic = NULL
  pets_data$NullReason = NULL
  pets_data$Source = NULL
  pets_data$ValueUnit = NULL
  pets_data$ValueLabel = NULL
    
    
  #this is specific to the pets portion, this removes the acc data
  pets_counts = pets_data[c(-1,-2,-3),]
  pets_counts
  
  
  #and if we wanted to get home ownership instead of dogs
  #we'd change the measure == "Home ownership by households"
  total_registered_dogs = pets_counts[pets_counts$Measure =="Registered dogs — Total registered dogs",]
    
  
  Southland_dogs_df <- rbind(Southland_dogs_df, total_registered_dogs) #for each iteration, bind the new data to the building dataset

}

In [None]:
Southland_population_df = data.frame()

In [None]:
for (i in 1:length(Southland_file_list)){
  data = read.csv(Southland_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  demo_data = data[data$Topic == "Demographics",]

  demo_data$Topic = NULL
  demo_data$NullReason = NULL
  demo_data$Source = NULL
  demo_data$ValueUnit = NULL
  demo_data$ValueLabel = NULL  

  population = demo_data[demo_data$Measure == "Population - residents at 2013 census",]  
  Southland_population_df <- rbind(Southland_population_df, population) #for each iteration, bind the new data to the building dataset
  #dataset <- rbind(dataset, population)  
}
#population_df

In [None]:
Southland_homes_df  = data.frame()

In [None]:
for (i in 1:length(Southland_file_list)){
  data = read.csv(Southland_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  home_data = data[data$Topic == "Households",]

  home_data$Topic = NULL
  home_data$NullReason = NULL
  home_data$Source = NULL
  home_data$ValueUnit = NULL
  home_data$ValueLabel = NULL  

  ownership = home_data[home_data$Measure == "Home ownership by households",]  
  Southland_homes_df <- rbind(Southland_homes_df, ownership) #for each iteration, bind the new data to the building dataset
}

In [None]:
Southland_median_income_df = data.frame()

In [None]:
for (i in 1:length(Southland_file_list)){
  data = read.csv(Southland_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  income_data = data[data$Topic == "Income",]

  income_data$Topic = NULL
  income_data$NullReason = NULL
  income_data$Source = NULL
  income_data$ValueUnit = NULL
  income_data$ValueLabel = NULL  

  personal_income = income_data[income_data$Measure == "Median personal income ($)",]  
  Southland_median_income_df <- rbind(Southland_median_income_df, personal_income) #for each iteration, bind the new data to the building dataset
}

In [None]:
Southland_income_bracket_df = data.frame()

In [None]:
for (i in 1:length(Southland_file_list)){
  data = read.csv(Southland_file_list[i])
  #e.g. if we wanted household information we would change the topic to be "Households" below
  income_bracket_data = data[data$Topic == "Income",]

  income_bracket_data$Topic = NULL
  income_bracket_data$NullReason = NULL
  income_bracket_data$Source = NULL
  income_bracket_data$ValueUnit = NULL
  income_bracket_data$ValueLabel = NULL  

  bracket_income = income_bracket_data[income_bracket_data$Measure == "Number of families in income bracket",]  
  Southland_income_bracket_df <- rbind(Southland_income_bracket_df, bracket_income) #for each iteration, bind the new data to the building dataset
}

## **Southland Data Frames:**

 - Southland_dogs_df
 - Southland_population_df
 - Southland_homes_df
 - Southland_median_income_df
 - Southland_income_bracket_df

## **All Data Frames:**

 - Manawatu_Whanganui_dogs_df
 - Manawatu_Whanganui_population_df
 - Manawatu_Whanganui_homes_df
 - Manawatu_Whanganui_median_income_df
 - Manawatu_Whanganui_income_bracket_dfSouthland_file_listNelson_dogs_df
 - Marlborough_dogs_df
 - Marlborough_population_df
 - Marlborugh_homes_df
 - Marlborough_median_income_df
 - Marlborough_income_bracket_df
 - Nelson_dogs_df
 - Nelson_population_df
 - Nelson_homes_df
 - Nelson_median_income_df
 - Nelson_income_bracket_df
 - Northland_dogs_df
 - Northland_population_df
 - Northland_homes_df
 - Northland_median_income_df
 - Northland_income_bracket_df
 - Otago_dogs_df
 - Otago_population_df
 - Otago_homes_df
 - Otago_median_income_df
 - Otago_income_bracket_df
 - Southland_dogs_df
 - Southland_population_df
 - Southland_homes_df
 - Southland_median_income_df
 - Southland_income_bracket_df

In [None]:
Manawatu_Whanganui_dogs_totals_df <- aggregate(Manawatu_Whanganui_dogs_df['Value'], by=Manawatu_Whanganui_dogs_df['Category'], sum) 

In [None]:
Marlborough_dogs_totals_df <- aggregate(Marlborough_dogs_df['Value'], by=Marlborough_dogs_df['Category'], sum) 

In [None]:
Nelson_dogs_totals_df <- aggregate(Nelson_dogs_df['Value'], by=Nelson_dogs_df['Category'], sum) 

In [None]:
Otago_dogs_totals_df <- aggregate(Otago_dogs_df['Value'], by=Otago_dogs_df['Category'], sum) 

In [None]:
Southland_dogs_totals_df <- aggregate(Southland_dogs_df['Value'], by=Southland_dogs_df['Category'], sum) 

In [None]:
Northland_dogs_totals_df <- aggregate(Northland_dogs_df['Value'], by=Northland_dogs_df['Category'], sum) 

In [None]:
dog_totals_df <- bind_cols(Manawatu_Whanganui_dogs_totals_df, Marlborough_dogs_totals_df, Nelson_dogs_totals_df, Otago_dogs_totals_df, Southland_dogs_totals_df, Northland_dogs_totals_df)

In [None]:
clean_dog_totals_df <- subset(dog_totals_df, select = -c(Category...3, Category...5, Category...7, Category...9, Category...11))

In [None]:
Final_dog_df <- rename(clean_dog_totals_df, c("Breed"="Category...1", "Manawatu_Whanganui"="Value...2", "Marlborough"="Value...4", "Nelson"="Value...6", "Otago"="Value...8", "Southland"="Value...10", "Northland"="Value...12"))

In [None]:
Final_dog_df

In [None]:
Manawatu_Whanganui_populations_totals_df <- aggregate(Manawatu_Whanganui_population_df['Value'], by=Manawatu_Whanganui_population_df['Category'], sum)

In [None]:
Marlborough_populations_totals_df <- aggregate(Marlborough_population_df['Value'], by=Marlborough_population_df['Category'], sum)

In [None]:
Nelson_populations_totals_df <- aggregate(Nelson_population_df['Value'], by=Nelson_population_df['Category'], sum)

In [None]:
Otago_populations_totals_df <- aggregate(Otago_population_df['Value'], by=Otago_population_df['Category'], sum)

In [None]:
Southland_populations_totals_df <- aggregate(Southland_population_df['Value'], by=Southland_population_df['Category'], sum)

In [None]:
Northland_populations_totals_df <- aggregate(Northland_population_df['Value'], by=Northland_population_df['Category'], sum)

In [None]:
populations_totals_df <- bind_cols(Manawatu_Whanganui_populations_totals_df, Marlborough_populations_totals_df, Nelson_populations_totals_df, Otago_populations_totals_df, Southland_populations_totals_df, Northland_populations_totals_df)

In [None]:
clean_populations_totals_df <- subset(populations_totals_df, select = -c(Category...3, Category...5, Category...7, Category...9, Category...11))

In [None]:
Final_populations_df <- rename(clean_populations_totals_df, c("Total_People"="Category...1", "Manawatu_Whanganui"="Value...2", "Marlborough"="Value...4", "Nelson"="Value...6", "Otago"="Value...8", "Southland"="Value...10", "Northland"="Value...12"))

In [None]:
Final_populations_df

In [None]:
Manawatu_Whanganui_homes_totals_df <- aggregate(Manawatu_Whanganui_homes_df['Value'], by=Manawatu_Whanganui_homes_df['Category'], sum)

In [None]:
Marlborough_homes_totals_df <- aggregate(Marlborough_homes_df['Value'], by=Marlborough_homes_df['Category'], sum)

In [None]:
Nelson_homes_totals_df <- aggregate(Nelson_homes_df['Value'], by=Nelson_homes_df['Category'], sum)

In [None]:
Otago_homes_totals_df <- aggregate(Otago_homes_df['Value'], by=Otago_homes_df['Category'], sum)

In [None]:
Southland_homes_totals_df <- aggregate(Southland_homes_df['Value'], by=Southland_homes_df['Category'], sum)

In [None]:
Northland_homes_totals_df <- aggregate(Northland_homes_df['Value'], by=Northland_homes_df['Category'], sum)

In [None]:
homes_totals_df <- bind_cols(Manawatu_Whanganui_homes_totals_df, Marlborough_homes_totals_df, Nelson_homes_totals_df, Otago_homes_totals_df, Southland_homes_totals_df, Northland_homes_totals_df)

In [None]:
clean_homes_totals_df <- subset(homes_totals_df, select = -c(Category...3, Category...5, Category...7, Category...9, Category...11))

In [None]:
Final_homes_df <- rename(clean_homes_totals_df, c("Total_People"="Category...1", "Manawatu_Whanganui"="Value...2", "Marlborough"="Value...4", "Nelson"="Value...6", "Otago"="Value...8", "Southland"="Value...10", "Northland"="Value...12"))

In [None]:
Final_homes_df

In [None]:
Manawatu_Whanganui_median_income_totals_df <- aggregate(Manawatu_Whanganui_median_income_df['Value'], by=Manawatu_Whanganui_median_income_df['Measure'], mean)

In [None]:
Marlborough_median_income_totals_df <- aggregate(Marlborough_median_income_df['Value'], by=Marlborough_median_income_df['Measure'], mean)

In [None]:
Nelson_median_income_totals_df <- aggregate(Nelson_median_income_df['Value'], by=Nelson_median_income_df['Measure'], mean)

In [None]:
Otago_median_income_totals_df <- aggregate(Otago_median_income_df['Value'], by=Otago_median_income_df['Measure'], mean)

In [None]:
Southland_median_income_totals_df <- aggregate(Southland_median_income_df['Value'], by=Southland_median_income_df['Measure'], mean)

In [None]:
Northland_median_income_totals_df <- aggregate(Northland_median_income_df['Value'], by=Northland_median_income_df['Measure'], mean)

In [None]:
median_income_totals_df <- bind_cols(Manawatu_Whanganui_median_income_totals_df, Marlborough_median_income_totals_df, Nelson_median_income_totals_df, Otago_median_income_totals_df, Southland_median_income_totals_df, Northland_median_income_totals_df)

In [None]:
clean_median_income_totals_df <- subset(median_income_totals_df, select = -c(Measure...3, Measure...5, Measure...7, Measure...9, Measure...11))

In [None]:
Final_median_income_totals_df <- rename(clean_median_income_totals_df, c("Median_Income"="Measure...1", "Manawatu_Whanganui"="Value...2", "Marlborough"="Value...4", "Nelson"="Value...6", "Otago"="Value...8", "Southland"="Value...10", "Northland"="Value...12"))

In [None]:
Final_median_income_totals_df

## **Final Total Data Frames For Manawatu - Southland:**

 - Final_dog_df
 - Final_populations_df
 - Final_homes_df
 - Final_median_income_totals_df

Now we will combine these with the dataframes previously made on income owenrship population and dogs

In [None]:
final_pets = final_pets %>%
    left_join(Final_dog_df, by = "Breed")

In [None]:
final_pets

In [None]:
final_pops = final_pops %>%
    left_join(Final_populations_df, by = "Total_People")

In [None]:
final_pops

In [None]:
final_incomes = final_incomes %>%
    left_join(Final_median_income_totals_df, by = "Median_Income")

In [None]:
final_incomes

In [None]:
final_homes = final_homes %>%
    left_join(Final_homes_df, by = "Total_People")

In [None]:
final_homes

## This leaves us with 4 dataframes that contain all the infomartion that we want from the CSVs from the figures NZ site

Now we will write these to CSVs, so that we can work with them in another notebook as this one is becoming rather long

In [None]:
write.csv(final_pets, "All_Pets_FiguresNZ.csv" )

In [None]:
write.csv(final_homes, "All_Homes_FiguresNZ.csv")

In [None]:
write.csv(final_incomes, "All_Incomes_FiguresNZ.csv")

In [None]:
write.csv(final_pops, "All_Populations_FiguresNZ.csv")