# 데이터프레임과 시리즈
### 3-1 나만의 데이터 만들기

In [96]:
import pandas as pd

#딕셔너리의 순서를 보장하기 위해서는 다음의 모듈을 사용해야 한다.
from collections import OrderedDict

In [4]:
s = pd.Series(['banana', 42])

In [5]:
print(s)

0    banana
1        42
dtype: object


In [6]:
s = pd.Series(['Wes Mckinney', 'Creator of Pandas'], index=['Person', 'Who'])

In [7]:
print(s)

Person         Wes Mckinney
Who       Creator of Pandas
dtype: object


In [94]:
scientists = pd.DataFrame({
    'Name': ['Rosaline Franklin', 'William Gosset'],
    'Occupation' :['Chemist', 'Statistician'],
    'Born' : ['1920-07-25', '1876-06-13'],
    'Died' : ['1958-04-16', '1937-10-16'],
    'Age' : [37, 61]
},
index=['a', 'b']
)

scientists

Unnamed: 0,Name,Occupation,Born,Died,Age
a,Rosaline Franklin,Chemist,1920-07-25,1958-04-16,37
b,William Gosset,Statistician,1876-06-13,1937-10-16,61


In [99]:
# 위 DataFrame처럼 dictionary를 사용할 때 순서를 보장받기 위해서는 다음처럼 해야 한다. OrderedDict()에 data를 넣는다.

scientists = pd.DataFrame(OrderedDict({
    'Name': ['Rosaline Franklin', 'William Gosset'],
    'Occupation' :['Chemist', 'Statistician'],
    'Born' : ['1920-07-25', '1876-06-13'],
    'Died' : ['1958-04-16', '1937-10-16'],
    'Age' : [37, 61]
}),
index=['a', 'b']
)
scientists

Unnamed: 0,Name,Occupation,Born,Died,Age
a,Rosaline Franklin,Chemist,1920-07-25,1958-04-16,37
b,William Gosset,Statistician,1876-06-13,1937-10-16,61


#### 시리즈 다루기 - 기초

In [101]:
type(scientists.iloc[0])

pandas.core.series.Series

In [102]:
scientists.iloc[0].index

Index(['Name', 'Occupation', 'Born', 'Died', 'Age'], dtype='object')

In [103]:
scientists.iloc[0].index[0]

'Name'

In [104]:
series1 = scientists.iloc[0]

In [105]:
series2 = scientists.iloc[1]

In [106]:
series1

Name          Rosaline Franklin
Occupation              Chemist
Born                 1920-07-25
Died                 1958-04-16
Age                          37
Name: a, dtype: object

In [107]:
series2

Name          William Gosset
Occupation      Statistician
Born              1876-06-13
Died              1937-10-16
Age                       61
Name: b, dtype: object

In [108]:
series1.append(series2)

Name          Rosaline Franklin
Occupation              Chemist
Born                 1920-07-25
Died                 1958-04-16
Age                          37
Name             William Gosset
Occupation         Statistician
Born                 1876-06-13
Died                 1937-10-16
Age                          61
dtype: object

In [109]:
type(series1)

pandas.core.series.Series

### 시리즈 다루기 - 응용

불린 추출은 특정 조건을 만족하는 값만 추출한다.

In [110]:
scientists = pd.read_csv('C:\myPyCode\easyspub\pandas\data\scientists.csv')

In [111]:
scientists

Unnamed: 0,Name,Born,Died,Age,Occupation
0,Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist
1,William Gosset,1876-06-13,1937-10-16,61,Statistician
2,Florence Nightingale,1820-05-12,1910-08-13,90,Nurse
3,Marie Curie,1867-11-07,1934-07-04,66,Chemist
4,Rachel Carson,1907-05-27,1964-04-14,56,Biologist
5,John Snow,1813-03-15,1858-06-16,45,Physician
6,Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician


In [112]:
#평균보다 나이가 많은 데이터 추출
scientists[scientists['Age'] > scientists['Age'].mean()]

Unnamed: 0,Name,Born,Died,Age,Occupation
1,William Gosset,1876-06-13,1937-10-16,61,Statistician
2,Florence Nightingale,1820-05-12,1910-08-13,90,Nurse
3,Marie Curie,1867-11-07,1934-07-04,66,Chemist
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician


### 실습! 과학자들이 살아있던 일수 구하기

In [121]:
# 날자 데이터는 datetime 자료형으로 바꾸는 것이 좋다.
born_datetime = pd.to_datetime(scientists['Born'], format='%Y-%m-%d')
born_datetime

0   1920-07-25
1   1876-06-13
2   1820-05-12
3   1867-11-07
4   1907-05-27
5   1813-03-15
6   1912-06-23
7   1777-04-30
Name: Born, dtype: datetime64[ns]

In [119]:
died_datetime = pd.to_datetime(scientists['Died'], format='%Y-%m-%d')
died_datetime

0   1958-04-16
1   1937-10-16
2   1910-08-13
3   1934-07-04
4   1964-04-14
5   1858-06-16
6   1954-06-07
7   1855-02-23
Name: Died, dtype: datetime64[ns]

In [127]:
scientists['Born'], scientists['Died'] = (born_datetime, died_datetime)
scientists

Unnamed: 0,Name,Born,Died,Age,Occupation,born_dt,died_dt
0,Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist,1920-07-25,1958-04-16
1,William Gosset,1876-06-13,1937-10-16,61,Statistician,1876-06-13,1937-10-16
2,Florence Nightingale,1820-05-12,1910-08-13,90,Nurse,1820-05-12,1910-08-13
3,Marie Curie,1867-11-07,1934-07-04,66,Chemist,1867-11-07,1934-07-04
4,Rachel Carson,1907-05-27,1964-04-14,56,Biologist,1907-05-27,1964-04-14
5,John Snow,1813-03-15,1858-06-16,45,Physician,1813-03-15,1858-06-16
6,Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist,1912-06-23,1954-06-07
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician,1777-04-30,1855-02-23


In [128]:
scientists['lived_day'] = (scientists['Died']-scientists['Born'])

In [129]:
scientists

Unnamed: 0,Name,Born,Died,Age,Occupation,born_dt,died_dt,lived_day
0,Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist,1920-07-25,1958-04-16,13779 days
1,William Gosset,1876-06-13,1937-10-16,61,Statistician,1876-06-13,1937-10-16,22404 days
2,Florence Nightingale,1820-05-12,1910-08-13,90,Nurse,1820-05-12,1910-08-13,32964 days
3,Marie Curie,1867-11-07,1934-07-04,66,Chemist,1867-11-07,1934-07-04,24345 days
4,Rachel Carson,1907-05-27,1964-04-14,56,Biologist,1907-05-27,1964-04-14,20777 days
5,John Snow,1813-03-15,1858-06-16,45,Physician,1813-03-15,1858-06-16,16529 days
6,Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist,1912-06-23,1954-06-07,15324 days
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician,1777-04-30,1855-02-23,28422 days
