# Importing the Relevant Python Libraries

In [3]:
# importing pandas to work with the excel files as dataframes
import pandas as pd
# importing os to support looping through a directory
import os

# Generating Multiple Dataframes
The code below dowloads publicly available Animal Shelter Data from Austin, TX and converts it into a pandas dataframe. 

A separate dataframe is then constructed for each Animal Type and saved as an Excel file in a separate file directory. 

Prior to executing this code, one must create a folder called 'Files_to_combine' within the file directory where this python file is saved so that the .to_excel() function as written has a valid file path to save to.

In [4]:
# Reading in the Austin Animal Shelter Data
animal_outcomes = pd.read_csv('https://data.austintexas.gov/api/views/9t4d-g238/rows.csv?accessType=DOWNLOAD')
# Inspecting the different types of animals
animal_outcomes['Animal Type'].value_counts()

Dog          60806
Cat          40215
Other         5607
Bird           484
Livestock       16
Name: Animal Type, dtype: int64

In [5]:
# Creating a separate dataframe of the Animal Shelter Data for each Animal Type
Dog_df = animal_outcomes.loc[animal_outcomes['Animal Type'] == 'Dog']
Cat_df = animal_outcomes.loc[animal_outcomes['Animal Type'] == 'Cat']
Other_df = animal_outcomes.loc[animal_outcomes['Animal Type'] == 'Other']
Bird_df = animal_outcomes.loc[animal_outcomes['Animal Type'] == 'Bird']
Livestock_df = animal_outcomes.loc[animal_outcomes['Animal Type'] == 'Livestock']

In [7]:
# Creating a separate excel file for each dataframe and saving it to a single directory
Dog_df.to_excel('Files_to_combine/Austin_Dogs.xlsx', index = False)
Cat_df.to_excel('Files_to_combine/Austin_Cats.xlsx', index = False)
Other_df.to_excel('Files_to_combine/Austin_Others.xlsx', index = False)
Bird_df.to_excel('Files_to_combine/Austin_Birds.xlsx', index = False)
Livestock_df.to_excel('Files_to_combine/Austin_Livestock.xlsx', index = False)


Below is the code for looping through a file directory and reading all of the files, which in this case must be Excel files, and then writing them as a separate sheet within a multi-sheet Excel file.

This code loops performs this action in a single loop. If you wanted to manipulate the data from the excel files before writing them to the multi-sheet Excel file, you could create separate loops for the read and write process with the desired manipulation occurring in between.

In [25]:
# creating the writer that the pandas.to_excel() function will write to
writer = pd.ExcelWriter('Combined_Austin_Files.xlsx')
# Creating a path variable that points to the directory that will be looped through
path = 'Files_to_combine'
# The for loop that loops through the list of files in the directory at the path
for file in os.listdir(path):
    #reading a given excel file as a dataframe
    current = pd.read_excel(os.path.join(path, file))
    # writing a given dataframe to a sheet in the output excel workbook
    current.to_excel(excel_writer = writer, sheet_name = file.replace('.xlsx', ''), index=False)
# Saving the previously created multi-sheet excel workbook. 
# The pandas.to_excel() function will automatically save your file if creating a single-sheet excel file with a provided path, 
# but one must explicitly save it in this situation because you're writing to the writer and not directly to the path.
writer.save()

In [17]:
file_dict = {}
for file in os.listdir("Files_to_combine"):
    file_dict.update({file: pd.DataFrame()})
file_dict

{'Austin_Birds.xlsx': Empty DataFrame
 Columns: []
 Index: [], 'Austin_Cats.xlsx': Empty DataFrame
 Columns: []
 Index: [], 'Austin_Dogs.xlsx': Empty DataFrame
 Columns: []
 Index: [], 'Austin_Livestock.xlsx': Empty DataFrame
 Columns: []
 Index: [], 'Austin_Others.xlsx': Empty DataFrame
 Columns: []
 Index: []}

In [21]:
file_dict['Austin_Birds.xlsx'].head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color
0,A803148,,08/28/2019 12:19:00 PM,08/28/2019 12:19:00 PM,08/28/2014,Euthanasia,Suffering,Bird,Unknown,5 years,Grackle,Black
1,A802697,Ocean,08/21/2019 07:00:00 PM,08/21/2019 07:00:00 PM,08/21/2018,Adoption,,Bird,Intact Female,1 year,Budgerigar,Blue
2,A802698,Sunshine,08/21/2019 07:00:00 PM,08/21/2019 07:00:00 PM,08/21/2018,Adoption,,Bird,Intact Female,1 year,Budgerigar,Yellow
3,A800866,,07/30/2019 01:11:00 PM,07/30/2019 01:11:00 PM,07/28/2018,Return to Owner,,Bird,Unknown,1 year,Parrot,Blue
4,A800449,,07/26/2019 05:45:00 PM,07/26/2019 05:45:00 PM,07/21/2017,Adoption,,Bird,Unknown,2 years,Conure,Yellow/Green


In [20]:
writer = pd.ExcelWriter('Combined_Austin_Files.xlsx')
path = "Files_to_combine"
for file in os.listdir(path):
    file_dict[file] = pd.read_excel(os.path.join(path, file))
file_dict

{'Austin_Birds.xlsx':     Animal ID      Name                DateTime               MonthYear  \
 0     A803148       NaN  08/28/2019 12:19:00 PM  08/28/2019 12:19:00 PM   
 1     A802697     Ocean  08/21/2019 07:00:00 PM  08/21/2019 07:00:00 PM   
 2     A802698  Sunshine  08/21/2019 07:00:00 PM  08/21/2019 07:00:00 PM   
 3     A800866       NaN  07/30/2019 01:11:00 PM  07/30/2019 01:11:00 PM   
 4     A800449       NaN  07/26/2019 05:45:00 PM  07/26/2019 05:45:00 PM   
 ..        ...       ...                     ...                     ...   
 479   A668105       NaN  12/01/2013 03:15:00 AM  12/01/2013 03:15:00 AM   
 480   A668106       NaN  12/01/2013 03:15:00 AM  12/01/2013 03:15:00 AM   
 481   A666594       NaN  11/10/2013 04:36:00 PM  11/10/2013 04:36:00 PM   
 482   A666593       NaN  11/10/2013 04:36:00 PM  11/10/2013 04:36:00 PM   
 483   A666232       NaN  11/03/2013 07:02:00 PM  11/03/2013 07:02:00 PM   
 
     Date of Birth     Outcome Type Outcome Subtype Animal Type  

In [24]:
writer = pd.ExcelWriter('Combined_Austin_Files.xlsx')
for key in file_dict:
    file_dict[key].to_excel(excel_writer = writer, sheet_name = key.replace('.xlsx', ''), index=False)
writer.save()