In [3]:
import pandas as pd

#### Series 기본: 1차원 배열 자료형으로 인덱스와 값의 쌍으로 구성된다. ndarray에 인덱스가 부여된 형태의 데이터를 가지고 있다. 때문에 유니버설 함수와 브로드캐스팅 등이 적용된다. 

In [2]:
S = pd.Series({"a": 1, "b": 2, "c":3, "d":4}) # 사전의 형태 가능
S

a    1
b    2
c    3
d    4
dtype: int64

In [6]:
S = pd.Series([1, 2, 3, 4], index = ['a', 'b', 'c', 'd']) # 데이터를 리스트로 만든 뒤, 인덱스 부여 가능. 
S

a    1
b    2
c    3
d    4
dtype: int64

In [4]:
S = pd.Series([1, 2, 3, 4]) # 인덱스 부여하지 않으면 자동 인덱스 부여됨. 0부터 시작 함. 
S

0    1
1    2
2    3
3    4
dtype: int64

In [4]:
S.values  # dictionary.values()와 유사함. ndarray값이 부여됨. 

array([1, 2, 3, 4], dtype=int64)

In [5]:
type(S.values)

numpy.ndarray

In [10]:
S.index


RangeIndex(start=0, stop=4, step=1)

In [11]:
S ** 2 # 유니버설 함수와 브로드캐스팅이 적용됨

0     1
1     4
2     9
3    16
dtype: int64

#### DataFrame 기본

In [14]:
# 사전 이용 정의
df = pd.DataFrame({"col1": [1, 2, 3, 4],
                  "col2": [5, 6, 7, 8]}, 
                  index = ['a', 'b', 'c', 'd'])

In [16]:
df

Unnamed: 0,col1,col2
a,1,5
b,2,6
c,3,7
d,4,8


In [15]:
df.values

array([[1, 5],
       [2, 6],
       [3, 7],
       [4, 8]], dtype=int64)

In [10]:
type(df.values)

numpy.ndarray

In [18]:
df.index # 행 인덱스 출력

Index(['a', 'b', 'c', 'd'], dtype='object')

In [19]:
df.columns # 컬럼들을 출력

Index(['col1', 'col2'], dtype='object')

In [20]:
type(df['col1'])

pandas.core.series.Series

#### 인덱싱과 슬라이싱
![image.png](attachment:b2187d0f-155a-4693-b3b6-17bbc621b37a.png)
시작과 끝자리에 별다른 숫자를 부여하지 않으면, 기본값으로 맨처음, 맨마지막 값이 부여된다. 

In [21]:
S

0    1
1    2
2    3
3    4
dtype: int64

In [22]:
S.loc['a'] # 사전에서 키를 가지고 값을 찾는 것과 완벽히 동일

KeyError: 'a'

In [23]:
S.iloc[2]

3

In [31]:
S.loc['a':'c']

Series([], dtype: int64)

In [25]:
S.iloc[1:3]

1    2
2    3
dtype: int64

In [11]:
df

Unnamed: 0,col1,col2
a,1,5
b,2,6
c,3,7
d,4,8


In [19]:
df[['col1', 'col2']]

Unnamed: 0,col1,col2
a,1,5
b,2,6
c,3,7
d,4,8


In [12]:
df['col1'] # column name => Series

a    1
b    2
c    3
d    4
Name: col1, dtype: int64

In [33]:
df[['col1', 'col2']] # column name list => Data Frame. 

Unnamed: 0,col1,col2
a,1,5
b,2,6
c,3,7
d,4,8


In [34]:
df.loc['a', 'col2']

5

In [35]:
df.loc['a':'c', 'col1']

a    1
b    2
c    3
Name: col1, dtype: int64

In [37]:
df.iloc[1:3, 1]  # 명시적 인덱스가 부여되어 있더라도 암묵적으로는 숫자가 부여되어 있다. 그래서 iloc을 통해서 숫자로 인덱싱이 가능한 것이다. 

b    6
c    7
Name: col2, dtype: int64

#### 값 조회하기

In [18]:
# 모든 행과 모든 열을 보여주기
pd.set_option('display.max_rows', None) # none 자리에는 출력하고 싶은 행과 열의 개수를 지정하는 자리이다. 
pd.set_option('display.max_columns', None)

In [38]:
import numpy as np
df = pd.DataFrame(np.random.random(size = (500, 5)),
                 columns = ['X1', 'X2', 'X3', 'X4', 'X5'])
df

Unnamed: 0,X1,X2,X3,X4,X5
0,0.632122,0.704195,0.005282,0.439054,0.796572
1,0.434126,0.543274,0.319591,0.291103,0.459640
2,0.997208,0.744636,0.635731,0.516120,0.004685
3,0.526803,0.912069,0.835927,0.706832,0.343625
4,0.054502,0.202613,0.668503,0.267899,0.860569
...,...,...,...,...,...
495,0.784389,0.779756,0.079226,0.485198,0.947452
496,0.071345,0.182829,0.420931,0.539671,0.432224
497,0.405620,0.262082,0.768992,0.718252,0.076568
498,0.742086,0.819284,0.198197,0.090925,0.964067


In [39]:
df

Unnamed: 0,X1,X2,X3,X4,X5
0,0.632122,0.704195,0.005282,0.439054,0.796572
1,0.434126,0.543274,0.319591,0.291103,0.459640
2,0.997208,0.744636,0.635731,0.516120,0.004685
3,0.526803,0.912069,0.835927,0.706832,0.343625
4,0.054502,0.202613,0.668503,0.267899,0.860569
...,...,...,...,...,...
495,0.784389,0.779756,0.079226,0.485198,0.947452
496,0.071345,0.182829,0.420931,0.539671,0.432224
497,0.405620,0.262082,0.768992,0.718252,0.076568
498,0.742086,0.819284,0.198197,0.090925,0.964067


In [20]:
df.head()

Unnamed: 0,X1,X2,X3,X4,X5
0,0.360255,0.860208,0.310894,0.210855,0.366435
1,0.265531,0.592843,0.459606,0.610412,0.854391
2,0.862768,0.064806,0.94739,0.359989,0.103019
3,0.996828,0.405544,0.136605,0.877391,0.540334
4,0.623951,0.643929,0.194511,0.924403,0.268549


In [21]:
df.head(10)

Unnamed: 0,X1,X2,X3,X4,X5
0,0.360255,0.860208,0.310894,0.210855,0.366435
1,0.265531,0.592843,0.459606,0.610412,0.854391
2,0.862768,0.064806,0.94739,0.359989,0.103019
3,0.996828,0.405544,0.136605,0.877391,0.540334
4,0.623951,0.643929,0.194511,0.924403,0.268549
5,0.283158,0.456452,0.368212,0.093543,0.502361
6,0.482873,0.218449,0.983257,0.097496,0.28079
7,0.252569,0.916347,0.47807,0.811132,0.634281
8,0.489403,0.75429,0.055011,0.263123,0.057868
9,0.100635,0.258108,0.267563,0.225763,0.310467


In [22]:
df.tail()

Unnamed: 0,X1,X2,X3,X4,X5
495,0.6667,0.489944,0.163396,0.208741,0.657764
496,0.228033,0.25081,0.445692,0.969766,0.427578
497,0.292839,0.612684,0.451712,0.461109,0.914361
498,0.237353,0.470598,0.537053,0.859075,0.281607
499,0.999763,0.937872,0.794299,0.781174,0.398558


In [25]:
df.dtypes # 칼럼별 dtype이 나오게 된다. 그러면 만약 섞여 있으면???

X1    float64
X2    float64
X3    float64
X4    float64
X5    float64
dtype: object

In [44]:
df.iloc[3, 4] = 'Changed'
df.head()

Unnamed: 0,X1,X2,X3,X4,X5
0,1.0,0.704195,0.005282,0.439054,0.796572
1,0.434126,0.543274,0.319591,0.291103,0.45964
2,0.997208,0.744636,0.635731,0.51612,0.0046849
3,0.526803,0.912069,0.835927,0.706832,Changed
4,0.054502,0.202613,0.668503,0.267899,0.860569


In [25]:
df['X5']

0         0.366435
1         0.854391
2         0.103019
3          Changed
4         0.268549
5         0.502361
6          0.28079
7         0.634281
8        0.0578677
9         0.310467
10        0.409891
11        0.634554
12       0.0245229
13        0.774059
14        0.668186
15        0.193628
16      0.00627848
17        0.847597
18     0.000829548
19       0.0166729
20        0.891963
21        0.670684
22        0.521387
23        0.251626
24        0.785845
25        0.744182
26        0.145455
27       0.0510661
28         0.93496
29        0.966671
30        0.557408
31        0.192739
32        0.413848
33         0.96402
34        0.408195
35        0.943198
36        0.057859
37        0.318983
38         0.90067
39        0.427059
40        0.861314
41       0.0996072
42        0.563183
43        0.759948
44        0.945428
45        0.428043
46        0.602346
47        0.621307
48        0.875001
49        0.633638
50         0.56328
51        0.608123
52        0.

In [46]:
df.iloc[3:20, 2:4] = 'Changed2'
df.iloc[:25]

Unnamed: 0,X1,X2,X3,X4,X5
0,1.0,0.704195,0.00528249,0.439054,0.796572
1,0.434126,0.543274,0.319591,0.291103,0.45964
2,0.997208,0.744636,0.635731,0.51612,0.0046849
3,0.526803,0.912069,Changed2,Changed2,Changed
4,0.054502,0.202613,Changed2,Changed2,0.860569
5,0.61267,0.044698,Changed2,Changed2,0.98034
6,0.445289,0.265116,Changed2,Changed2,0.730726
7,0.705709,0.372972,Changed2,Changed2,0.857213
8,0.886013,0.147595,Changed2,Changed2,0.600863
9,0.622294,0.357988,Changed2,Changed2,0.14586
