In [33]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np


In [34]:
data = {
'2021' : [1116, 12259, 16699, 65119],
'2020' : [628, 11075, 15695, 56013],
'2019' : [678,12700,15148,55443],
'2018' : [712, 13597, 16052, 56024],
'면적순위' : [4,3,2,1]
}
columns = ['면적순위', '2021', '2020', '2019', '2018']
index = ['서울','부산','대구','인천']
df = pd.DataFrame(data, index=index, columns=columns)
df

Unnamed: 0,면적순위,2021,2020,2019,2018
서울,4,1116,628,678,712
부산,3,12259,11075,12700,13597
대구,2,16699,15695,15148,16052
인천,1,65119,56013,55443,56024


In [35]:
df.columns

Index(['면적순위', '2021', '2020', '2019', '2018'], dtype='object')

In [36]:
df.index

Index(['서울', '부산', '대구', '인천'], dtype='object')

In [37]:
df.values

array([[    4,  1116,   628,   678,   712],
       [    3, 12259, 11075, 12700, 13597],
       [    2, 16699, 15695, 15148, 16052],
       [    1, 65119, 56013, 55443, 56024]], dtype=int64)

In [38]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 서울 to 인천
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   면적순위    4 non-null      int64
 1   2021    4 non-null      int64
 2   2020    4 non-null      int64
 3   2019    4 non-null      int64
 4   2018    4 non-null      int64
dtypes: int64(5)
memory usage: 192.0+ bytes


In [39]:
df.index.name = '지역'

In [40]:
df

Unnamed: 0_level_0,면적순위,2021,2020,2019,2018
지역,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
서울,4,1116,628,678,712
부산,3,12259,11075,12700,13597
대구,2,16699,15695,15148,16052
인천,1,65119,56013,55443,56024


In [41]:
df.columns.name = '특성'
df

특성,면적순위,2021,2020,2019,2018
지역,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
서울,4,1116,628,678,712
부산,3,12259,11075,12700,13597
대구,2,16699,15695,15148,16052
인천,1,65119,56013,55443,56024


In [42]:
df['3년 변화율'] = ((df['2021']-df['2018'])/df['2018']*100).round(2)
df

특성,면적순위,2021,2020,2019,2018,3년 변화율
지역,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
서울,4,1116,628,678,712,56.74
부산,3,12259,11075,12700,13597,-9.84
대구,2,16699,15695,15148,16052,4.03
인천,1,65119,56013,55443,56024,16.23


In [43]:
df = df[['2018','2019','2020','2021','면적순위','3년 변화율']]
df

# also can use df.T 
# But, it will change all data type to float because of the 3years calculation
# or use
# df = df['3년 변화율'].astype(int)
# df.T

특성,2018,2019,2020,2021,면적순위,3년 변화율
지역,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
서울,712,678,628,1116,4,56.74
부산,13597,12700,11075,12259,3,-9.84
대구,16052,15148,15695,16699,2,4.03
인천,56024,55443,56013,65119,1,16.23


In [44]:
df['2018'] # <- series value, cannot be df['2018', '2019']

지역
서울      712
부산    13597
대구    16052
인천    56024
Name: 2018, dtype: int64

In [45]:
df[['2018']] # <- dataFrame value, can be df['2018','2019']

특성,2018
지역,Unnamed: 1_level_1
서울,712
부산,13597
대구,16052
인천,56024


In [46]:
'''
Series value can be searched with index ex) df[0] or df['Seoul]
But! DataFrame value must be searched with column name df['Seoul']

'''

"\nSeries value can be searched with index ex) df[0] or df['Seoul]\nBut! DataFrame value must be searched with column name df['Seoul']\n\n"

In [47]:
df

특성,2018,2019,2020,2021,면적순위,3년 변화율
지역,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
서울,712,678,628,1116,4,56.74
부산,13597,12700,11075,12259,3,-9.84
대구,16052,15148,15695,16699,2,4.03
인천,56024,55443,56013,65119,1,16.23


In [48]:
df.sort_values(by='면적순위')

특성,2018,2019,2020,2021,면적순위,3년 변화율
지역,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
인천,56024,55443,56013,65119,1,16.23
대구,16052,15148,15695,16699,2,4.03
부산,13597,12700,11075,12259,3,-9.84
서울,712,678,628,1116,4,56.74


In [49]:
df[:2]

특성,2018,2019,2020,2021,면적순위,3년 변화율
지역,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
서울,712,678,628,1116,4,56.74
부산,13597,12700,11075,12259,3,-9.84


In [50]:
df[1:3]

특성,2018,2019,2020,2021,면적순위,3년 변화율
지역,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
부산,13597,12700,11075,12259,3,-9.84
대구,16052,15148,15695,16699,2,4.03


In [51]:
df['서울':'부산']

특성,2018,2019,2020,2021,면적순위,3년 변화율
지역,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
서울,712,678,628,1116,4,56.74
부산,13597,12700,11075,12259,3,-9.84


In [52]:
df['부산':'대구']

특성,2018,2019,2020,2021,면적순위,3년 변화율
지역,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
부산,13597,12700,11075,12259,3,-9.84
대구,16052,15148,15695,16699,2,4.03


In [53]:
# 단일 자료 가져오기
# loc, iloc
df.loc[['부산']]

특성,2018,2019,2020,2021,면적순위,3년 변화율
지역,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
부산,13597,12700,11075,12259,3,-9.84


In [55]:
df.loc['부산'] # Series Value

특성
2018      13597.00
2019      12700.00
2020      11075.00
2021      12259.00
면적순위          3.00
3년 변화율       -9.84
Name: 부산, dtype: float64

In [60]:
df.iloc[0]  

특성
2018       712.00
2019       678.00
2020       628.00
2021      1116.00
면적순위         4.00
3년 변화율      56.74
Name: 서울, dtype: float64

In [68]:
df.iloc[0,0] # = df['2018']['서울'] 
# df.lioc[0][0] 이것도 가능 하나 형변환이 일어남으로 추천 x

712

In [67]:
df.iloc[[1]]

특성,2018,2019,2020,2021,면적순위,3년 변화율
지역,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
부산,13597,12700,11075,12259,3,-9.84
