In [1]:
import pandas as pd

In [8]:
# 방법 1 (배열)
data1 = pd.Series([0.25, 0.5, 0.75, 1.0], index = ['a', 'b', 'c', 'd'])
data2 = pd.Series([0.25, 0.5, 0.75, 1.0], index = [2, 3, 5, 7])
data2

2    0.25
3    0.50
5    0.75
7    1.00
dtype: float64

In [11]:
# 방법 2 (dictionary) - key가 index 역할 
price_dict = {'apple': 20000,
              'banana': 10000,
              'pineapple': 9000,
              'strawberry': 9000,
              'cherry': 17000}
price = pd.Series(price_dict)
price

apple         20000
banana        10000
pineapple      9000
strawberry     9000
cherry        17000
dtype: int64

In [14]:
validity_dict = {'apple': '2021-11-30',
                'banana': '2021-10-30',
                'pineapple': '2021-10-20',
                'strawberry': '2021-10-21',
                'cherry': '2021-10-22',}
validity = pd.Series(validity_dict)
validity

apple         2021-11-30
banana        2021-10-30
pineapple     2021-10-20
strawberry    2021-10-21
cherry        2021-10-22
dtype: object

In [18]:
fruit = pd.DataFrame({'price': price, 'validity': validity})
fruit

Unnamed: 0,price,validity
apple,20000,2021-11-30
banana,10000,2021-10-30
pineapple,9000,2021-10-20
strawberry,9000,2021-10-21
cherry,17000,2021-10-22


In [25]:
data = [{'a': i, 'b': 2*i} for i in range(3)]
pd.DataFrame(data)

Unnamed: 0,a,b
0,0,0
1,1,2
2,2,4


In [26]:
data = [{'a': 1, 'b':2}, {'b': 3, 'c':4}]
pd.DataFrame(data)

Unnamed: 0,a,b,c
0,1.0,2,
1,,3,4.0


In [28]:
import numpy as np

pd.DataFrame(np.random.rand(3,2), 
             columns =['foo', 'bar'],
            index = ['a', 'b', 'c'])

Unnamed: 0,foo,bar
a,0.697025,0.260833
b,0.171793,0.411688
c,0.687489,0.293112


In [33]:
ind = pd.Index([2,3,5,7,11])
ind[1] # 인덱싱
ind[::2] # 슬라이싱
# ind[1] = 0 # Type error 발생 (요소 변경 불가)

indA = pd.Index([1,3,5,7,9])
indB = pd.Index([2,3,5,7,11])
indA & indB # 집합 연산 가능
indA | indB 

Int64Index([1, 2, 3, 5, 7, 9, 11], dtype='int64')

In [51]:
data = pd.Series([0.25, 0.5, 0.75, 1.0], index = ['a', 'b', 'c', 'd'])

'a' in data # 'a'가 data에 들어있는지 확인
data.keys() # data의 key(index) 가져오기
list(data.items()) # data의 key-value 가져오기 (data.items() x)
data['e'] = 1.25 # 값 추가
data['d'] = 9.9999 # 값 치환

In [42]:
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [43]:
data['a':'c']

a    0.25
b    0.50
c    0.75
dtype: float64

In [46]:
data[0:3]

a    0.25
b    0.50
c    0.75
dtype: float64

In [48]:
data[(data>0.3) & (data<0.8)]

b    0.50
c    0.75
dtype: float64

In [49]:
data[[3,2]]

d    1.00
c    0.75
dtype: float64

In [52]:
data[['a', 'e']] # 팬시 인덱싱

a    0.25
e    1.25
dtype: float64

In [53]:
fruit

Unnamed: 0,price,validity
apple,20000,2021-11-30
banana,10000,2021-10-30
pineapple,9000,2021-10-20
strawberry,9000,2021-10-21
cherry,17000,2021-10-22


In [61]:
area = pd.Series([423967, 170312, 149995, 141297, 695662], 
                 index = ['California', 'Florida', 'Illinois', 'New York', 'Texas'])
pop = pd.Series([38332521, 19552860, 12882135, 19651127, 26448193],
                index = ['California', 'Florida', 'Illinois', 'New York', 'Texas'])

df = pd.DataFrame({'area':area, 'pop':pop})
df['density'] = df['pop']/df['area']
df

Unnamed: 0,area,pop,density
California,423967,38332521,90.413926
Florida,170312,19552860,114.806121
Illinois,149995,12882135,85.883763
New York,141297,19651127,139.076746
Texas,695662,26448193,38.01874


In [63]:
#df['California'] # 잘못된 방식
df.loc['California']

area       4.239670e+05
pop        3.833252e+07
density    9.041393e+01
Name: California, dtype: float64

In [67]:
df.iloc[0]

area       4.239670e+05
pop        3.833252e+07
density    9.041393e+01
Name: California, dtype: float64

In [72]:
# 팬시 인덱싱
ind1 = ['Florida', 'Texas']
df.loc[ind1]
df.iloc[[1,4]]

Unnamed: 0,area,pop,density
Florida,170312,19552860,114.806121
Texas,695662,26448193,38.01874


In [77]:
# 열단위 인덱싱은 인덱서를 활용하지 않고 특징 이름을 활용
# df.loc['pop'] # 잘못된 방식
df['pop']
type(df['pop']) # 결과는 series

pandas.core.series.Series

In [79]:
ind2 = ['pop', 'area']
df[ind2]

Unnamed: 0,pop,area
California,38332521,423967
Florida,19552860,170312
Illinois,12882135,149995
New York,19651127,141297
Texas,26448193,695662


In [82]:
df.loc['Florida':'Texas']

Unnamed: 0,area,pop,density
Florida,170312,19552860,114.806121
Illinois,149995,12882135,85.883763
New York,141297,19651127,139.076746
Texas,695662,26448193,38.01874


In [89]:
# dataframe의 column 인덱스 객체를 활용
df[df.columns[1:]]

# dataframe의 특징 이름 배열을 그대로 활용 
df[['pop', 'density']]

Unnamed: 0,pop,density
California,38332521,90.413926
Florida,19552860,114.806121
Illinois,12882135,85.883763
New York,19651127,139.076746
Texas,26448193,38.01874


In [91]:
# dataframe의 암묵적 idnex를 활용 
df.iloc[:,1:]

Unnamed: 0,pop,density
California,38332521,90.413926
Florida,19552860,114.806121
Illinois,12882135,85.883763
New York,19651127,139.076746
Texas,26448193,38.01874


In [92]:
ind4 = df['density']>=100
df[ind4]

Unnamed: 0,area,pop,density
Florida,170312,19552860,114.806121
New York,141297,19651127,139.076746


In [95]:
ind5 = df['area']>=150000
df[ind4 & ind5]

Unnamed: 0,area,pop,density
Florida,170312,19552860,114.806121
