In [1]:
import pandas as pd
import numpy as np
import os

import warnings
warnings.filterwarnings(action='ignore')

In [2]:
DATA_FILES = sorted([file for file in os.listdir() if os.path.splitext(file)[-1] == '.txt'])
DATA_FILES

['서울시-수의사현황-통계-2009-2018.txt',
 '서울시-유기동물보호-현황-통계-2009-2018.txt',
 '서울시-주민등록인구-구별-통계.txt']

In [3]:
veterinarian_file = DATA_FILES[0]
abandoned_animal_file = DATA_FILES[1]

## Vererinarian Status

In [4]:
vet_df = pd.read_csv(veterinarian_file, delimiter='\t', header = 2, dtype={'기간': str})

In [5]:
vet_df.head()

Unnamed: 0,기간,자치구별,합계,남자,여자,행정,연구,공수의,개업수의,학교,기타,단체
0,2009,합계,1100,901,199,25,2,25,768,81,199,-
1,2009,종로구,15,11,4,-,-,1,14,-,-,-
2,2009,중구,12,9,3,1,-,1,6,-,4,-
3,2009,용산구,32,28,4,2,-,1,22,-,7,-
4,2009,성동구,20,20,-,1,-,1,17,1,-,-


In [6]:
vet_df = vet_df.iloc[1:, :]
vet_df.head()

Unnamed: 0,기간,자치구별,합계,남자,여자,행정,연구,공수의,개업수의,학교,기타,단체
1,2009,종로구,15,11,4,-,-,1,14,-,-,-
2,2009,중구,12,9,3,1,-,1,6,-,4,-
3,2009,용산구,32,28,4,2,-,1,22,-,7,-
4,2009,성동구,20,20,-,1,-,1,17,1,-,-
5,2009,광진구,90,63,27,1,-,1,28,44,16,-


In [7]:
vet_df = vet_df[~(vet_df['자치구별'].str.contains('합계'))]
vet_df.head()

Unnamed: 0,기간,자치구별,합계,남자,여자,행정,연구,공수의,개업수의,학교,기타,단체
1,2009,종로구,15,11,4,-,-,1,14,-,-,-
2,2009,중구,12,9,3,1,-,1,6,-,4,-
3,2009,용산구,32,28,4,2,-,1,22,-,7,-
4,2009,성동구,20,20,-,1,-,1,17,1,-,-
5,2009,광진구,90,63,27,1,-,1,28,44,16,-


In [8]:
vet_df = vet_df[['기간', '자치구별', '합계']]
vet_df.head()

Unnamed: 0,기간,자치구별,합계
1,2009,종로구,15
2,2009,중구,12
3,2009,용산구,32
4,2009,성동구,20
5,2009,광진구,90


In [9]:
vet_df.columns = ['기간', '자치구', '수의사합계']

## Abandoned Animal Status

In [10]:
abd_df = pd.read_csv(abandoned_animal_file, delimiter='\t', header = 1, na_values='-')
abd_df.fillna(0, inplace = True)
abd_df.shape

(287, 18)

In [11]:
abd_df = abd_df[~(abd_df['자치구'].str.contains('합계'))]

In [12]:
abd_df.columns

Index(['기간', '자치구', '합계', '개', '개.1', '개.2', '개.3', '개.4', '고양이', '고양이.1',
       '고양이.2', '고양이.3', '고양이.4', '기타', '기타.1', '기타.2', '기타.3', '기타.4'],
      dtype='object')

In [13]:
dog_df = abd_df[['기간', '자치구', '개', '개.1', '개.2', '개.3', '개.4']]
dog_df = dog_df.rename(columns=dog_df.iloc[0]).drop(dog_df.index[0]).fillna(0)

cat_df = abd_df[['기간', '자치구', '고양이', '고양이.1', '고양이.2', '고양이.3', '고양이.4']]
cat_df = cat_df.rename(columns=cat_df.iloc[0]).drop(cat_df.index[0]).fillna(0)

etc_df = abd_df[['기간', '자치구', '기타', '기타.1', '기타.2', '기타.3', '기타.4']]
etc_df = etc_df.rename(columns=etc_df.iloc[0]).drop(etc_df.index[0]).fillna(0)

In [14]:
dog_df['animal'] = '개'
cat_df['animal'] = '고양이'
etc_df['animal'] = '기타 동물'
dog_df.head()

Unnamed: 0,기간,자치구,계,인도(주인),입양분양,폐사안락사,계류기증,animal
2,2009,종로구,227,69,119,30,9,개
3,2009,중구,115,11,17,87,0,개
4,2009,용산구,541,62,298,181,0,개
5,2009,성동구,265,46,26,193,0,개
6,2009,광진구,357,46,57,251,3,개


In [15]:
all_df = pd.concat([dog_df, cat_df, etc_df])
all_df.sort_values(['기간', '자치구'], inplace = True)

In [16]:
all_df.head()

Unnamed: 0,기간,자치구,계,인도(주인),입양분양,폐사안락사,계류기증,animal
24,2009,강남구,314,69,58,184,3,개
24,2009,강남구,251,4,13,215,19,고양이
24,2009,강남구,13,0,7,4,2,기타 동물
26,2009,강동구,754,189,404,106,55,개
26,2009,강동구,268,4,149,94,21,고양이


In [17]:
all_df.columns

Index(['기간', '자치구', '계', '인도(주인)', '입양분양', '폐사안락사', '계류기증', 'animal'], dtype='object')

In [18]:
all_df = all_df[['기간', 'animal', '자치구', '계', '인도(주인)', '입양분양', '폐사안락사', '계류기증',]]
all_df.reset_index(drop = True, inplace = True)
all_df

Unnamed: 0,기간,animal,자치구,계,인도(주인),입양분양,폐사안락사,계류기증
0,2009,개,강남구,314,69,58,184,3
1,2009,고양이,강남구,251,4,13,215,19
2,2009,기타 동물,강남구,13,0,7,4,2
3,2009,개,강동구,754,189,404,106,55
4,2009,고양이,강동구,268,4,149,94,21
...,...,...,...,...,...,...,...,...
820,2019,고양이,중구,60,0,13,47,0
821,2019,기타 동물,중구,9,0,7,2,0
822,2019,개,중랑구,247,108,50,79,10
823,2019,고양이,중랑구,140,2,17,121,0


In [19]:
os.getcwd()

'/Users/eunbinpark/workspace/git/Tableau_visualisation/Abandoned-Animal/data'

## Merge DataFrames

In [20]:
all_df.head()

Unnamed: 0,기간,animal,자치구,계,인도(주인),입양분양,폐사안락사,계류기증
0,2009,개,강남구,314,69,58,184,3
1,2009,고양이,강남구,251,4,13,215,19
2,2009,기타 동물,강남구,13,0,7,4,2
3,2009,개,강동구,754,189,404,106,55
4,2009,고양이,강동구,268,4,149,94,21


In [21]:
vet_df.head()

Unnamed: 0,기간,자치구,수의사합계
1,2009,종로구,15
2,2009,중구,12
3,2009,용산구,32
4,2009,성동구,20
5,2009,광진구,90


In [22]:
pd.merge(all_df, vet_df, on = ['기간', '자치구'])

Unnamed: 0,기간,animal,자치구,계,인도(주인),입양분양,폐사안락사,계류기증,수의사합계
0,2009,개,강남구,314,69,58,184,3,116
1,2009,고양이,강남구,251,4,13,215,19,116
2,2009,기타 동물,강남구,13,0,7,4,2,116
3,2009,개,강동구,754,189,404,106,55,41
4,2009,고양이,강동구,268,4,149,94,21,41
...,...,...,...,...,...,...,...,...,...
745,2018,고양이,중구,59,0,5,54,0,37
746,2018,기타 동물,중구,12,2,6,4,0,37
747,2018,개,중랑구,259,84,72,100,3,61
748,2018,고양이,중랑구,137,3,23,111,0,61


In [23]:
all_df['기간'].dtype, vet_df['기간'].dtype

(dtype('O'), dtype('O'))

In [24]:
final = pd.merge(all_df, vet_df, on = ['기간', '자치구'])
final

Unnamed: 0,기간,animal,자치구,계,인도(주인),입양분양,폐사안락사,계류기증,수의사합계
0,2009,개,강남구,314,69,58,184,3,116
1,2009,고양이,강남구,251,4,13,215,19,116
2,2009,기타 동물,강남구,13,0,7,4,2,116
3,2009,개,강동구,754,189,404,106,55,41
4,2009,고양이,강동구,268,4,149,94,21,41
...,...,...,...,...,...,...,...,...,...
745,2018,고양이,중구,59,0,5,54,0,37
746,2018,기타 동물,중구,12,2,6,4,0,37
747,2018,개,중랑구,259,84,72,100,3,61
748,2018,고양이,중랑구,137,3,23,111,0,61


## 서울시 인구 

In [25]:
filename = '서울시-주민등록인구-구별-통계.txt'

In [26]:
df = pd.read_csv(filename, delimiter='\t', header = 2, dtype={'기간':str})
df.head()

Unnamed: 0,기간,자치구,세대,계,남자,여자,계.1,남자.1,여자.1,계.2,남자.2,여자.2,인구밀도(명/㎢),면적(㎢),세대당인구,65세이상고령자
0,2009,합계,4116660,10464051,5181359,5282692,10208302,5059269,5149033,255749,122090,133659,17289,605.25,2.48,942946
1,2009,종로구,74315,177543,88432,89111,168603,84257,84346,8940,4175,4765,7426,23.91,2.27,20753
2,2009,중구,58249,137861,69046,68815,129465,65025,64440,8396,4021,4375,13841,9.96,2.22,16008
3,2009,용산구,107456,251200,123528,127672,238708,116954,121754,12492,6574,5918,11487,21.87,2.22,28167
4,2009,성동구,126670,316064,158203,157861,308058,154333,153725,8006,3870,4136,18759,16.85,2.43,30300


In [27]:
df.shape

(286, 16)

In [28]:
df['기간'].unique()

array(['2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016',
       '2017', '2018', '2019'], dtype=object)

In [29]:
df.columns

Index(['기간', '자치구', '세대', '계', '남자', '여자', '계.1', '남자.1', '여자.1', '계.2',
       '남자.2', '여자.2', '인구밀도(명/㎢)', '면적(㎢)', '세대당인구', '65세이상고령자'],
      dtype='object')

In [30]:
df = df[['기간', '자치구', '계']]
pop = df[df['자치구'] != '합계']
pop.rename(columns = {'계' : '인구총계'}, inplace = True)

In [31]:
final = pd.merge(final, pop, on = ['기간', '자치구'])

In [32]:
final.dtypes

기간        object
animal    object
자치구       object
계         object
인도(주인)    object
입양분양      object
폐사안락사     object
계류기증      object
수의사합계     object
인구총계      object
dtype: object

In [34]:
for col in final.columns[3:]:
    final[col] = final[col].str.replace(pat = ',', repl = '').astype(float)

In [35]:
final.dtypes

기간         object
animal     object
자치구        object
계         float64
인도(주인)    float64
입양분양      float64
폐사안락사     float64
계류기증      float64
수의사합계     float64
인구총계      float64
dtype: object

In [44]:
final.fillna(0, inplace=True)

In [45]:
final['인도(주인)'] + final['입양분양']

0      127.0
1       17.0
2        7.0
3      593.0
4      153.0
       ...  
745      5.0
746      8.0
747    156.0
748     26.0
749      7.0
Length: 750, dtype: float64

In [46]:
final['폐사안락사'] + final['계류기증']

0      187.0
1      234.0
2        6.0
3      161.0
4      115.0
       ...  
745     54.0
746      4.0
747    103.0
748    111.0
749      3.0
Length: 750, dtype: float64

In [39]:
# final.to_csv('abandoned_animal_data_with_pop.csv', index = False)