# column 선택하기
  - 기본적으로 [ ]는 column을 추출 
  - 컬럼 인덱스일 경우 인덱스의 리스트 사용 가능
    - 리스트를 전달할 경우 결과는 Dataframe
    - 하나의 컬럼명을 전달할 경우 결과는 Series

In [1]:
import pandas as pd
import numpy as np

In [2]:
filepath = 'data/titanic.csv'
df = pd.read_csv(filepath)
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


## 하나의 컬럼 선택하기

> df[0] 으로 가져오기를 하면 index 가 아닌 columns 를 가져오기를 한다.

In [3]:
# 특정 컬럼을 가지고 오려면...
df['Pclass'] # 결과값은 Series 로 반환

0      3
1      1
2      3
3      1
4      3
      ..
886    2
887    1
888    3
889    1
890    3
Name: Pclass, Length: 891, dtype: int64

In [4]:
# 다른 방법은
df.Pclass

0      3
1      1
2      3
3      1
4      3
      ..
886    2
887    1
888    3
889    1
890    3
Name: Pclass, Length: 891, dtype: int64

## 복수의 컬럼 선택하기
- list 형태로 컬럼을 넣으면

In [5]:
# list 로 컬럼을 가져오면 ...
df[['Pclass']] # 결과값은 DataFrame으로 반환

Unnamed: 0,Pclass
0,3
1,1
2,3
3,1
4,3
...,...
886,2
887,1
888,3
889,1


In [7]:
# 기존 컬럼의 순서가 아닌 명시된 순서대로 출력된다.
df[['Pclass', 'Age', 'Name']] 

Unnamed: 0,Pclass,Age,Name
0,3,22.0,"Braund, Mr. Owen Harris"
1,1,38.0,"Cumings, Mrs. John Bradley (Florence Briggs Th..."
2,3,26.0,"Heikkinen, Miss. Laina"
3,1,35.0,"Futrelle, Mrs. Jacques Heath (Lily May Peel)"
4,3,35.0,"Allen, Mr. William Henry"
...,...,...,...
886,2,27.0,"Montvila, Rev. Juozas"
887,1,19.0,"Graham, Miss. Margaret Edith"
888,3,,"Johnston, Miss. Catherine Helen ""Carrie"""
889,1,26.0,"Behr, Mr. Karl Howell"


In [9]:
# 순서가 무관하고 동일한 컬럼을 여러번 호출할 수 있다.
df[['Name', 'Age', 'Survived', 'Survived']]

Unnamed: 0,Name,Age,Survived,Survived.1
0,"Braund, Mr. Owen Harris",22.0,0,0
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",38.0,1,1
2,"Heikkinen, Miss. Laina",26.0,1,1
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",35.0,1,1
4,"Allen, Mr. William Henry",35.0,0,0
...,...,...,...,...
886,"Montvila, Rev. Juozas",27.0,0,0
887,"Graham, Miss. Margaret Edith",19.0,1,1
888,"Johnston, Miss. Catherine Helen ""Carrie""",,0,0
889,"Behr, Mr. Karl Howell",26.0,1,1


---
# Row 선택하기

## DataFrame slicing
  - dataframe의 경우 기본적으로 [] 연산는 **column 선택**에 사용  `(인덱싱 용이 아니다!!)`
  - 하지만, slicing은 row 레벨로 지원

In [10]:
# slicing 은 row level 로 동작한다.
df[:10]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


In [11]:
df[7:10]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


In [12]:
df[-5:]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q


In [13]:
df[::10]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
10,11,1,3,"Sandstrom, Miss. Marguerite Rut",female,4.0,1,1,PP 9549,16.7000,G6,S
20,21,0,2,"Fynney, Mr. Joseph J",male,35.0,0,0,239865,26.0000,,S
30,31,0,1,"Uruchurtu, Don. Manuel E",male,40.0,0,0,PC 17601,27.7208,,C
40,41,0,3,"Ahlin, Mrs. Johan (Johanna Persdotter Larsson)",female,40.0,1,0,7546,9.4750,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
850,851,0,3,"Andersson, Master. Sigvard Harald Elias",male,4.0,4,2,347082,31.2750,,S
860,861,0,3,"Hansen, Mr. Claus Peter",male,41.0,2,0,350026,14.1083,,S
870,871,0,3,"Balkic, Mr. Cerin",male,26.0,0,0,349248,7.8958,,S
880,881,1,2,"Shelley, Mrs. William (Imanita Parrish Hall)",female,25.0,0,1,230433,26.0000,,S


In [14]:
df[::-1]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.7500,,Q
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C


In [15]:
df['Survived']

0      0
1      1
2      1
3      1
4      0
      ..
886    0
887    1
888    0
889    1
890    0
Name: Survived, Length: 891, dtype: int64

In [16]:
df['Survived'][:10]

0    0
1    1
2    1
3    1
4    0
5    0
6    0
7    0
8    1
9    1
Name: Survived, dtype: int64

> [] 는 기본적으로 column 을 지정하는데 슬라이싱 할 경우 전체 컬럼의 크기를 가져와하기 때문에 row 를 선택한것 처럼 보이는 것이다.   
> 컬럼을 따로 지정 하지 않으면 전체 컬럼에서 값을 가져오기 때문에 row 를 가져온 것처럼 보인다.

In [17]:
df[['Survived', 'Pclass', 'Age']][:10]

Unnamed: 0,Survived,Pclass,Age
0,0,3,22.0
1,1,1,38.0
2,1,3,26.0
3,1,1,35.0
4,0,3,35.0
5,0,3,
6,0,1,54.0
7,0,3,2.0
8,1,3,27.0
9,1,2,14.0


## row 선택하기
  - Series의 경우 []로 row 선택이 가능하나, **DataFrame의 경우는 기본적으로 column을 선택하도록 설계**
  - **.loc[], .iloc[]**로 row 선택 가능
    - loc - 인덱스 자체를 사용
    - iloc - 0 based index로 사용
    - 이 두 함수는 ,를 사용하여 column 선택도 가능

In [18]:
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB


In [21]:
df.index = np.arange(891) + 100
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
100,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
101,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
102,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
103,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
104,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
986,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
987,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
988,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
989,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


#### loc[index]

In [22]:
# loc[index 이름] 를 이용하여  row 하나 선택
df.loc[100] # 결과값은 Series 로 반환
# columns 와 index 둘다 있으면 DataFrame
# index 만 있으면 Series

PassengerId                          1
Survived                             0
Pclass                               3
Name           Braund, Mr. Owen Harris
Sex                               male
Age                               22.0
SibSp                                1
Parch                                0
Ticket                       A/5 21171
Fare                              7.25
Cabin                              NaN
Embarked                             S
Name: 100, dtype: object

In [24]:
# 복수개의 row 선택
df.loc[[100,101,102]]  # 결과값은 DataFrame 으로 반환

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
100,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
101,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
102,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S


In [25]:
df.loc[np.arange(100,105)]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
100,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
101,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
102,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
103,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
104,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [26]:
# 100번째 컬럼의 위치값 부터 104번째 컬럼의 위치값까지의 전체 column 을 출력
df[100:105]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
200,101,0,3,"Petranec, Miss. Matilda",female,28.0,0,0,349245,7.8958,,S
201,102,0,3,"Petroff, Mr. Pastcho (""Pentcho"")",male,,0,0,349215,7.8958,,S
202,103,0,1,"White, Mr. Richard Frasar",male,21.0,0,1,35281,77.2875,D26,S
203,104,0,3,"Johansson, Mr. Gustaf Joel",male,33.0,0,0,7540,8.6542,,S
204,105,0,3,"Gustafsson, Mr. Anders Vilhelm",male,37.0,2,0,3101276,7.925,,S


##### iloc[0-base index]

In [27]:
df.iloc[0] # 결과값은 Series 로 반환된다.
# index 이름은 100 이다.

PassengerId                          1
Survived                             0
Pclass                               3
Name           Braund, Mr. Owen Harris
Sex                               male
Age                               22.0
SibSp                                1
Parch                                0
Ticket                       A/5 21171
Fare                              7.25
Cabin                              NaN
Embarked                             S
Name: 100, dtype: object

In [28]:
df.iloc[[0]] # 결과값은 DataFrame 으로 반환

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
100,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S


In [30]:
df.iloc[np.arange(0,5)]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
100,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
101,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
102,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
103,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
104,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [32]:
df.iloc[[0, 100, 200, 4, 7]]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
100,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
200,101,0,3,"Petranec, Miss. Matilda",female,28.0,0,0,349245,7.8958,,S
300,201,0,3,"Vande Walle, Mr. Nestor Cyriel",male,28.0,0,0,345770,9.5,,S
104,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
107,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S


In [33]:
# 0번째 index ~ 4번째 index 위치까지의 데이터
df.iloc[:5]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
100,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
101,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
102,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
103,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
104,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [34]:
# 0 ~ 4 번째의 columns 위치의 모든 데이터
df[:5]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
100,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
101,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
102,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
103,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
104,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


## row, column 동시에 선택하기
loc[], iloc[] 속성을 이용할때, 콤마를 이용하여 둘다 명시 가능

In [37]:
# loc 은 index명과 column 명을 이용하여 선택한다.
df.loc[100, 'Pclass']

3

##### iloc 사용하면 row 뿐 아니라 column 도 0-base 로 접근해야 한다

In [38]:
# iloc 은 위치 기반이기 때문에 명칭이 아닌 위치값을 선택 한다.
# 그래서 iloc[위치, column 명] 은 에러가 발생한다.
df.iloc[0, 'Survived']

ValueError: Location based indexing can only have [integer, integer slice (START point is INCLUDED, END point is EXCLUDED), listlike of integers, boolean array] types

In [39]:
df.iloc[0, 1]

0

In [40]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
100,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
101,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
102,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
103,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
104,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [41]:
df.loc[104, 'Name']

'Allen, Mr. William Henry'

In [43]:
df.iloc[4, 3]

'Allen, Mr. William Henry'

In [44]:
# 복수 개의 row와 column 을 가져오는 방법
df.loc[[100,102,104], ['Pclass', 'Survived', 'Age']]

Unnamed: 0,Pclass,Survived,Age
100,3,0,22.0
102,3,1,26.0
104,3,0,35.0


In [45]:
df.iloc[[0,5,8], [2,4,6]]

Unnamed: 0,Pclass,Sex,SibSp
100,3,male,1
105,3,male,0
108,3,female,0


In [47]:
df[['Pclass', 'Survived', 'Age']].loc[[100, 400, 200]]

Unnamed: 0,Pclass,Survived,Age
100,3,0,22.0
400,3,1,
200,3,0,28.0


In [48]:
# iloc 을 통해서 column 을 가져오는 것이 아니기 때문에 이것도 가능하다.
df[['Pclass', 'Survived', 'Age']].iloc[np.arange(1,5)]

Unnamed: 0,Pclass,Survived,Age
101,1,1,38.0
102,3,1,26.0
103,1,1,35.0
104,3,0,35.0


- DataFrame의 값을 가져오는 것이 어떤 함수인지에 따라서 사용 방법도 달라질 수 있다.

In [49]:
# loc 은 index 명과 column 명으로 호출 하기 때문에 에러 
df.loc[[100,200,300], [0, 1, 3]]

KeyError: "None of [Index([0, 1, 3], dtype='int32')] are in the [columns]"

In [55]:
df[:10].loc[[100,101,102]]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
100,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
101,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
102,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S


##### 특정값 하나 뽑기

In [56]:
# 이 방식을 제일 추천
df.loc[102, 'Name']

'Heikkinen, Miss. Laina'

In [57]:
df['Name'][102]

'Heikkinen, Miss. Laina'

In [59]:
df.Name[102]

'Heikkinen, Miss. Laina'

In [10]:
import pandas as pd
data = { 'country': [ 'Belgium', 'France', 'Germany', 'Netherlands', 'United Kingdom'],
		'population' : [11.3,64.3, 81.3, 16.9, 64.9],
		'area' : [30510, 671308, 357050, 41526, 244820],
		'capital' : ['Brusseis', 'Paris', 'Berlin', 'Amsterdam' , 'London'] }
	
countries = pd.DataFrame(data)
countries = countries.set_index('country')

countries	

Unnamed: 0_level_0,population,area,capital
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Belgium,11.3,30510,Brusseis
France,64.3,671308,Paris
Germany,81.3,357050,Berlin
Netherlands,16.9,41526,Amsterdam
United Kingdom,64.9,244820,London


> 문제1. 각 국가의 수도 인구를 출력하세요

In [2]:
countries['population']

country
Belgium           11.3
France            64.3
Germany           81.3
Netherlands       16.9
United Kingdom    64.9
Name: population, dtype: float64

> 문제2. 벨기에 수도 인구를 출력 하세요

In [5]:
countries.loc['Belgium', 'population']

11.3

In [14]:
countries.loc['Belgium']['population']

11.3

In [11]:
countries.iloc[0, 0]

11.3

In [12]:
countries['population'].loc['Belgium']

11.3

In [13]:
countries['population'].iloc[0]

11.3

In [15]:
countries.population[0]

  countries.population[0]


11.3

In [16]:
countries.population['Belgium']

11.3

> 문제3. 벨기에 수도 인구 대비 다른 국가는 몇배 인지 출력하세요

In [9]:
countries['population'] / countries.loc['Belgium', 'population']

country
Belgium           1.000000
France            5.690265
Germany           7.194690
Netherlands       1.495575
United Kingdom    5.743363
Name: population, dtype: float64

In [17]:
countries.loc[:, 'population']  / countries.loc['Belgium', 'population']

country
Belgium           1.000000
France            5.690265
Germany           7.194690
Netherlands       1.495575
United Kingdom    5.743363
Name: population, dtype: float64

In [18]:
countries.iloc[:, 0] / countries.iloc[0,0]

country
Belgium           1.000000
France            5.690265
Germany           7.194690
Netherlands       1.495575
United Kingdom    5.743363
Name: population, dtype: float64