# 시리즈 함수 다루기

In [3]:
import pandas as pd

df = pd.read_csv("gapminder.tsv", sep = '\t')
df

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap
0,Afghanistan,Asia,1952,28.801,8425333,779.445314
1,Afghanistan,Asia,1957,30.332,9240934,820.853030
2,Afghanistan,Asia,1962,31.997,10267083,853.100710
3,Afghanistan,Asia,1967,34.020,11537966,836.197138
4,Afghanistan,Asia,1972,36.088,13079460,739.981106
...,...,...,...,...,...,...
1699,Zimbabwe,Africa,1987,62.351,9216418,706.157306
1700,Zimbabwe,Africa,1992,60.377,10704340,693.420786
1701,Zimbabwe,Africa,1997,46.809,11404948,792.449960
1702,Zimbabwe,Africa,2002,39.989,11926563,672.038623


In [4]:
year = df['year']
continent = df['continent']

In [5]:
year

0       1952
1       1957
2       1962
3       1967
4       1972
        ... 
1699    1987
1700    1992
1701    1997
1702    2002
1703    2007
Name: year, Length: 1704, dtype: int64

In [6]:
continent

0         Asia
1         Asia
2         Asia
3         Asia
4         Asia
         ...  
1699    Africa
1700    Africa
1701    Africa
1702    Africa
1703    Africa
Name: continent, Length: 1704, dtype: object

# 평균

In [7]:
year.mean()

1979.5

# 중앙값

In [8]:
year.median()

1979.5

# 최대값

In [9]:
year.max()

2007

# 최소값

In [10]:
year.min()

1952

# 표준편차

In [11]:
year.std()    # 표준편차: standard deviation

17.265329508973615

# 시리즈 연결하기

In [12]:
year.append(continent)

0         1952
1         1957
2         1962
3         1967
4         1972
         ...  
1699    Africa
1700    Africa
1701    Africa
1702    Africa
1703    Africa
Length: 3408, dtype: object

# 시리즈 요약 통계량 확인하기

In [13]:
year.describe()

count    1704.00000
mean     1979.50000
std        17.26533
min      1952.00000
25%      1965.75000
50%      1979.50000
75%      1993.25000
max      2007.00000
Name: year, dtype: float64

In [14]:
df.describe()      # 숫자로 이루어진 열들의 요약 통계량

Unnamed: 0,year,lifeExp,pop,gdpPercap
count,1704.0,1704.0,1704.0,1704.0
mean,1979.5,59.474439,29601210.0,7215.327081
std,17.26533,12.917107,106157900.0,9857.454543
min,1952.0,23.599,60011.0,241.165876
25%,1965.75,48.198,2793664.0,1202.060309
50%,1979.5,60.7125,7023596.0,3531.846988
75%,1993.25,70.8455,19585220.0,9325.462346
max,2007.0,82.603,1318683000.0,113523.1329


# 중복값 삭제하기

In [16]:
year.drop_duplicates()

0     1952
1     1957
2     1962
3     1967
4     1972
5     1977
6     1982
7     1987
8     1992
9     1997
10    2002
11    2007
Name: year, dtype: int64

In [17]:
df.drop_duplicates()    # 완전히 일치하는 행 제거

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap
0,Afghanistan,Asia,1952,28.801,8425333,779.445314
1,Afghanistan,Asia,1957,30.332,9240934,820.853030
2,Afghanistan,Asia,1962,31.997,10267083,853.100710
3,Afghanistan,Asia,1967,34.020,11537966,836.197138
4,Afghanistan,Asia,1972,36.088,13079460,739.981106
...,...,...,...,...,...,...
1699,Zimbabwe,Africa,1987,62.351,9216418,706.157306
1700,Zimbabwe,Africa,1992,60.377,10704340,693.420786
1701,Zimbabwe,Africa,1997,46.809,11404948,792.449960
1702,Zimbabwe,Africa,2002,39.989,11926563,672.038623


# 특정 값 바꾸기

In [18]:
b = pd.Series(['Mon', 'Fri', 'Thu', 'Sun'])
b.replace('Mon', 'Monday')

0    Monday
1       Fri
2       Thu
3       Sun
dtype: object

# 랜덤으로 샘플 추출하기

In [20]:
year.sample(10)    #  .sample(): 입력한 값 만큼 시리즈에서 랜덤으로 값을 추출하는 함수

548     1992
1044    1952
1544    1992
991     1987
730     2002
223     1987
883     1987
1237    1957
529     1957
1400    1992
Name: year, dtype: int64

# 정렬하기

In [21]:
year.sort_values()   # 오름차순 정렬

0       1952
528     1952
540     1952
1656    1952
552     1952
        ... 
1127    2007
1139    2007
1151    2007
1175    2007
1703    2007
Name: year, Length: 1704, dtype: int64

In [22]:
year.sort_values(ascending = False)    # 내림차순 정렬

1703    2007
491     2007
515     2007
527     2007
539     2007
        ... 
1116    1952
1128    1952
1140    1952
1152    1952
852     1952
Name: year, Length: 1704, dtype: int64

# 시리즈 데이터 프레임으로 변환하기

In [23]:
year.to_frame()

Unnamed: 0,year
0,1952
1,1957
2,1962
3,1967
4,1972
...,...
1699,1987
1700,1992
1701,1997
1702,2002


In [24]:
df.sort_values(by = "lifeExp")

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap
1292,Rwanda,Africa,1992,23.599,7290203,737.068595
0,Afghanistan,Asia,1952,28.801,8425333,779.445314
552,Gambia,Africa,1952,30.000,284320,485.230659
36,Angola,Africa,1952,30.015,4232095,3520.610273
1344,Sierra Leone,Africa,1952,30.331,2143249,879.787736
...,...,...,...,...,...,...
1487,Switzerland,Europe,2007,81.701,7554661,37506.419070
695,Iceland,Europe,2007,81.757,301931,36180.789190
802,Japan,Asia,2002,82.000,127065841,28604.591900
671,"Hong Kong, China",Asia,2007,82.208,6980412,39724.978670


In [25]:
df.sort_values(by = "lifeExp", ascending = False)

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap
803,Japan,Asia,2007,82.603,127467972,31656.068060
671,"Hong Kong, China",Asia,2007,82.208,6980412,39724.978670
802,Japan,Asia,2002,82.000,127065841,28604.591900
695,Iceland,Europe,2007,81.757,301931,36180.789190
1487,Switzerland,Europe,2007,81.701,7554661,37506.419070
...,...,...,...,...,...,...
1344,Sierra Leone,Africa,1952,30.331,2143249,879.787736
36,Angola,Africa,1952,30.015,4232095,3520.610273
552,Gambia,Africa,1952,30.000,284320,485.230659
0,Afghanistan,Asia,1952,28.801,8425333,779.445314


# gapminder 데이터 프레임에서 gdpPercap열을 내림차순으로 정렬하고 인증하기

In [26]:
df.sort_values(by = "gdpPercap", ascending = False)

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap
853,Kuwait,Asia,1957,58.033,212846,113523.132900
856,Kuwait,Asia,1972,67.712,841934,109347.867000
852,Kuwait,Asia,1952,55.565,160000,108382.352900
854,Kuwait,Asia,1962,60.470,358266,95458.111760
855,Kuwait,Asia,1967,64.624,575003,80894.883260
...,...,...,...,...,...,...
333,"Congo, Dem. Rep.",Africa,1997,42.587,47798986,312.188423
624,Guinea-Bissau,Africa,1952,32.500,580653,299.850319
876,Lesotho,Africa,1952,42.138,748747,298.846212
335,"Congo, Dem. Rep.",Africa,2007,46.462,64606759,277.551859
