## Excel 자동화

In [12]:
import pandas as pd

df1 = pd.read_csv('./data/customers.csv')
print(df1.head())

# 만약 한글이 포함된 파일이라면 encoding=‘utf-8-sig’ 또는 encoding=‘cp949’를 추가
df2 = pd.read_csv('./data/seongnam-population.csv', encoding='cp949')
print(df2.head())

df3 = pd.read_csv('./data/seoul_avg_age.csv')
df3.head(3)

# Caution! read_excel은 openpyxl로 불러오므로 DRM이 걸린 파일은 실패합니다.
# df1 = pd.read_excel('{path/filename}', sheet_name='{sheet_name}',
# header=2, index_col='id', thousands=',',
# )

   CustomerID                        CustomerName         ContactName  \
0           1                 Alfreds Futterkiste        Maria Anders   
1           2  Ana Trujillo Emparedados y helados        Ana Trujillo   
2           3             Antonio Moreno Taquería      Antonio Moreno   
3           4                     Around the Horn        Thomas Hardy   
4           5                  Berglunds snabbköp  Christina Berglund   

                         Address         City PostalCode  Country  
0                  Obere Str. 57       Berlin      12209  Germany  
1  Avda. de la Constitución 2222  México D.F.      05021   Mexico  
2                 Mataderos 2312  México D.F.      05023   Mexico  
3                120 Hanover Sq.       London    WA1 1DP       UK  
4                 Berguvsvägen 8        Luleå   S-958 22   Sweden  
    구별              동  인구수_계  인구수_남  인구수_여  19세 이상_계  19세 이상_남  19세 이상_여  \
0  수정구  신흥1동           13511   7107   6404     12381      6519      5862   
1

Unnamed: 0,동별(1),동별(2),2019,2020,2021,2022
0,동별(1),동별(2),전체평균연령,전체평균연령,전체평균연령,전체평균연령
1,합계,종로구,44.7,45.2,45.4,45.6
2,합계,중구,45.0,45.6,45.8,46.1


In [13]:
df3.drop(df3.index[0], inplace=True)
df3.head(3)

Unnamed: 0,동별(1),동별(2),2019,2020,2021,2022
1,합계,종로구,44.7,45.2,45.4,45.6
2,합계,중구,45.0,45.6,45.8,46.1
3,합계,용산구,43.9,44.3,44.3,44.4


In [15]:
df3.drop(df3.columns[0], inplace=True, axis=1)

In [16]:
df3.head(3)

Unnamed: 0,동별(2),2019,2020,2021,2022
1,종로구,44.7,45.2,45.4,45.6
2,중구,45.0,45.6,45.8,46.1
3,용산구,43.9,44.3,44.3,44.4


In [17]:
df3.rename(columns={df3.columns[0]:'district'}, inplace=True)
df3.head()

Unnamed: 0,district,2019,2020,2021,2022
1,종로구,44.7,45.2,45.4,45.6
2,중구,45.0,45.6,45.8,46.1
3,용산구,43.9,44.3,44.3,44.4
4,성동구,42.4,43.0,43.4,43.8
5,광진구,41.9,42.4,42.9,43.3


In [19]:
df3.set_index(df3.columns[0], inplace=True)     # 더 이상 district 칼럼은 데이터가 아님
df3.head()

Unnamed: 0_level_0,2019,2020,2021,2022
district,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
종로구,44.7,45.2,45.4,45.6
중구,45.0,45.6,45.8,46.1
용산구,43.9,44.3,44.3,44.4
성동구,42.4,43.0,43.4,43.8
광진구,41.9,42.4,42.9,43.3


In [20]:
df3 = df3.astype({'2019':'float',
                  '2020':'float',
                  '2021':'float',
                  '2022':'float'
                  })

In [21]:
df3.sort_values(by=['2020', '2021'])  # list로 오는 경우 앞에 있는 칼럼이 동일하면 뒤에있는 칼럼 기준으로 정렬

Unnamed: 0_level_0,2019,2020,2021,2022
district,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
서초구,40.8,41.3,41.7,42.1
강남구,40.9,41.4,41.9,42.3
송파구,41.2,41.7,42.2,42.6
마포구,41.6,42.0,42.2,42.6
양천구,41.6,42.2,42.8,43.4
광진구,41.9,42.4,42.9,43.3
강동구,42.5,42.6,43.0,43.4
관악구,42.1,42.6,42.9,43.1
강서구,42.0,42.6,43.1,43.5
성동구,42.4,43.0,43.4,43.8


In [26]:
user_dict= {
    'id': ['A001','B002','C003','D004'],
    'name': ['John Doe', 'Jane Doe', 'Gildong Hong', 'Dongeun Moon'],
    'age': [32,None,19,47],
    'email': ['jd1234@example.com', 'janeland@example.com','gdhond@example.com', 'Welcomemygym@example.com'],
    'username': ['jd1234',None,'gdhong', 'bravoyj'],
}

df4 = pd.DataFrame.from_dict(user_dict)
df4

Unnamed: 0,id,name,age,email,username
0,A001,John Doe,32.0,jd1234@example.com,jd1234
1,B002,Jane Doe,,janeland@example.com,
2,C003,Gildong Hong,19.0,gdhond@example.com,gdhong
3,D004,Dongeun Moon,47.0,Welcomemygym@example.com,bravoyj


In [27]:
values = {
    'name' : '',
    'age' : 0,
    'email' : '',
    'username' : ''    
}

df4.fillna(value=values, inplace=True)

Unnamed: 0,id,name,age,email,username
0,A001,John Doe,32.0,jd1234@example.com,jd1234
1,B002,Jane Doe,0.0,janeland@example.com,
2,C003,Gildong Hong,19.0,gdhond@example.com,gdhong
3,D004,Dongeun Moon,47.0,Welcomemygym@example.com,bravoyj


In [28]:
df4.assign(isSocial = [False, True, False, False])

Unnamed: 0,id,name,age,email,username,isSocial
0,A001,John Doe,32.0,jd1234@example.com,jd1234,False
1,B002,Jane Doe,,janeland@example.com,,True
2,C003,Gildong Hong,19.0,gdhond@example.com,gdhong,False
3,D004,Dongeun Moon,47.0,Welcomemygym@example.com,bravoyj,False


In [33]:
df1.drop_duplicates(subset='Country', keep='last')      # keep이 last이면 중복일때 last에 위치한 데이터만 남김
                                                        # keep이 False이면 중복이 발생한 데이터 전부 삭제

Unnamed: 0,CustomerID,CustomerName,ContactName,Address,City,PostalCode,Country
23,24,Folk och fä HB,Maria Larsson,Åkergatan 24,Bräcke,S-844 67,Sweden
36,37,Hungry Owl All-Night Grocers,Patricia McKenna,8 Johnstown Road,Cork,,Ireland
46,47,LINO-Delicateses,Felipe Izquierdo,Ave. 5 de Mayo Porlamar,I. de Margarita,4980,Venezuela
50,51,Mère Paillarde,Jean Fresnière,43 rue St. Laurent,Montréal,H1J 1C3,Canada
58,59,Piccolo und mehr,Georg Pipps,Geislweg 14,Salzburg,5020,Austria
59,60,Princesa Isabel Vinhoss,Isabel de Castro,Estrada da saúde n. 58,Lisboa,1756,Portugal
63,64,Rancho grande,Sergio Gutiérrez,Av. del Libertador 900,Buenos Aires,1010,Argentina
65,66,Reggiani Caseifici,Maurizio Moroni,Strada Provinciale 124,Reggio Emilia,42100,Italy
67,68,Richter Supermarkt,Michael Holz,Grenzacherweg 237,Genève,1203,Switzerland
68,69,Romero y tomillo,Alejandra Camino,Gran Vía 1,Madrid,28001,Spain


In [34]:
df1.to_excel('./data/고객명단.xlsx', sheet_name='해외고객명단', index=False)

In [35]:
with pd.ExcelWriter('./data/all_dfs.xlsx') as writer:
    df1.to_excel(writer, sheet_name='해외고객명단', index=False)
    df2.to_excel(writer, sheet_name='성남시 동별 인구통계', index=False)
    df3.to_excel(writer, sheet_name='서울시 구별 평균연령')
    df4.to_excel(writer, sheet_name='유저명단', index=False)