In [1]:
import pandas as pd

## Series
- Series는 1차원 배열 자료형으로 인덱스와 값의 쌍으로 구성
![image.png](attachment:image.png)

![image.png](attachment:image.png)

In [2]:
S = pd.Series({"a": 1, "b": 2, "c":3, "d":4})
S

a    1
b    2
c    3
d    4
dtype: int64

In [3]:
S = pd.Series([1, 2, 3, 4], index = ['a', 'b', 'c', 'd'])
S

a    1
b    2
c    3
d    4
dtype: int64

In [5]:
S = pd.Series([1, 2, 3, 4])
S

0    1
1    2
2    3
3    4
dtype: int64

In [6]:
S.values

array([1, 2, 3, 4], dtype=int64)

In [7]:
type(S.values)

numpy.ndarray

In [8]:
S.index

RangeIndex(start=0, stop=4, step=1)

In [9]:
S ** 2 # 유니버설 함수와 브로드캐스팅이 적용됨

0     1
1     4
2     9
3    16
dtype: int64

## DataFrame
- DataFrame은 2차원 배열 자료형으로 값, 행 인덱스, 열 인덱스로 구성
![image.png](attachment:image.png)

![image.png](attachment:image.png)

In [10]:
df = pd.DataFrame({"col1": [1, 2, 3, 4],
                  "col2": [5, 6, 7, 8]}, 
                  index = ['a', 'b', 'c', 'd'])

In [11]:
df

Unnamed: 0,col1,col2
a,1,5
b,2,6
c,3,7
d,4,8


In [12]:
df.values

array([[1, 5],
       [2, 6],
       [3, 7],
       [4, 8]], dtype=int64)

In [13]:
type(df.values)

numpy.ndarray

In [14]:
df.index # 행 인덱스 출력

Index(['a', 'b', 'c', 'd'], dtype='object')

In [15]:
df.columns # 컬럼들을 출력

Index(['col1', 'col2'], dtype='object')

In [16]:
type(df['col1'])

pandas.core.series.Series

## 인덱싱과 슬라이싱
- 판다스의 객체는 암묵적인 인덱스(위치 인덱스)와 명시적인 인덱스라는 두 종류의 인덱스가 있어, 명시적인 인덱스를 참조하는 loc인덱서와 암묵적인 인덱스를 참조하는 iloc 인덱서가 존재한다.
![image.png](attachment:image.png)

![image.png](attachment:image.png)

In [18]:
S = pd.Series([1, 2, 3, 4], index = ['a', 'b', 'c', 'd'])
S

a    1
b    2
c    3
d    4
dtype: int64

In [19]:
S.loc['a'] # 사전에서 키를 가지고 값을 찾는 것과 완벽히 동일

1

In [20]:
S.iloc[2]

3

In [21]:
S.loc['a':'c']

a    1
b    2
c    3
dtype: int64

In [22]:
S.iloc[1:3]

b    2
c    3
dtype: int64

In [23]:
df

Unnamed: 0,col1,col2
a,1,5
b,2,6
c,3,7
d,4,8


In [24]:
df[['col1', 'col2']]

Unnamed: 0,col1,col2
a,1,5
b,2,6
c,3,7
d,4,8


In [25]:
df['col1'] # column name => Series

a    1
b    2
c    3
d    4
Name: col1, dtype: int64

In [26]:
df[['col1']] # column name list => Data Frame

Unnamed: 0,col1
a,1
b,2
c,3
d,4


In [27]:
df.loc['a', 'col2']

5

In [28]:
df.loc['a':'c', 'col1']

a    1
b    2
c    3
Name: col1, dtype: int64

In [29]:
df.loc[['a':'c', 'col1']]

SyntaxError: invalid syntax (<ipython-input-29-0c8ac5f1f83a>, line 1)

In [30]:
df.iloc[1:3, 1]

b    6
c    7
Name: col2, dtype: int64

## 값 조회하기

In [31]:
# 모든 행과 모든 열을 보여주기
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [32]:
import numpy as np
df = pd.DataFrame(np.random.random(size = (500,5)),
                 columns = ['X1','X2','X3','X4','X5'])
df

Unnamed: 0,X1,X2,X3,X4,X5
0,0.745747,0.398612,0.881344,0.665372,0.696246
1,0.579885,0.947979,0.548387,0.209516,0.809903
2,0.404495,0.106716,0.62836,0.946552,0.345433
3,0.304027,0.991132,0.80064,0.864326,0.546306
4,0.963996,0.085706,0.066296,0.649772,0.128781
5,0.401862,0.596736,0.242905,0.31858,0.957553
6,0.071065,0.012293,0.681411,0.394598,0.174657
7,0.77832,0.032002,0.313808,0.179239,0.62671
8,0.777305,0.862534,0.026716,0.236054,0.892115
9,0.48252,0.40044,0.228818,0.802488,0.859642


In [33]:
df.head()

Unnamed: 0,X1,X2,X3,X4,X5
0,0.745747,0.398612,0.881344,0.665372,0.696246
1,0.579885,0.947979,0.548387,0.209516,0.809903
2,0.404495,0.106716,0.62836,0.946552,0.345433
3,0.304027,0.991132,0.80064,0.864326,0.546306
4,0.963996,0.085706,0.066296,0.649772,0.128781


In [34]:
df.head(10)

Unnamed: 0,X1,X2,X3,X4,X5
0,0.745747,0.398612,0.881344,0.665372,0.696246
1,0.579885,0.947979,0.548387,0.209516,0.809903
2,0.404495,0.106716,0.62836,0.946552,0.345433
3,0.304027,0.991132,0.80064,0.864326,0.546306
4,0.963996,0.085706,0.066296,0.649772,0.128781
5,0.401862,0.596736,0.242905,0.31858,0.957553
6,0.071065,0.012293,0.681411,0.394598,0.174657
7,0.77832,0.032002,0.313808,0.179239,0.62671
8,0.777305,0.862534,0.026716,0.236054,0.892115
9,0.48252,0.40044,0.228818,0.802488,0.859642


In [35]:
df.tail()

Unnamed: 0,X1,X2,X3,X4,X5
495,0.50764,0.561489,0.204627,0.666875,0.639367
496,0.575576,0.814209,0.603913,0.534158,0.898709
497,0.965169,0.004119,0.084385,0.546236,0.890803
498,0.830181,0.192357,0.234541,0.713151,0.772996
499,0.536558,0.259368,0.054314,0.208785,0.426431


In [36]:
df.columns

Index(['X1', 'X2', 'X3', 'X4', 'X5'], dtype='object')

In [44]:
df.dtypes

X1    float64
X2    float64
X3     object
X4     object
X5     object
dtype: object

In [43]:
df.iloc[3, 4] = 'Changed'
df.iloc[3,3] = 100
df.head()

Unnamed: 0,X1,X2,X3,X4,X5
0,0.745747,0.398612,0.881344,0.665372,0.696246
1,0.579885,0.947979,0.548387,0.209516,0.809903
2,0.404495,0.106716,0.62836,0.946552,0.345433
3,0.304027,0.991132,Changed2,100,Changed
4,0.963996,0.085706,Changed2,Changed2,0.128781


In [39]:
df['X5']

0         0.696246
1         0.809903
2         0.345433
3          Changed
4         0.128781
5         0.957553
6         0.174657
7          0.62671
8         0.892115
9         0.859642
10         0.95642
11        0.494854
12        0.749082
13       0.0901997
14        0.466601
15        0.985872
16        0.142038
17        0.232034
18        0.417663
19        0.233748
20        0.507968
21       0.0333931
22        0.535617
23        0.375283
24        0.190448
25        0.728496
26        0.663303
27        0.765906
28        0.752499
29        0.900957
30        0.469877
31        0.120227
32        0.564593
33        0.659314
34        0.409919
35        0.130709
36        0.895944
37        0.335751
38         0.40874
39        0.488169
40        0.696411
41        0.606733
42        0.285341
43        0.650626
44        0.341999
45        0.318817
46        0.813337
47        0.909855
48         0.68609
49        0.815485
50        0.209807
51        0.274111
52        0.

In [40]:
df.iloc[3:20, 2:4] = 'Changed2'
df.iloc[:25]

Unnamed: 0,X1,X2,X3,X4,X5
0,0.745747,0.398612,0.881344,0.665372,0.696246
1,0.579885,0.947979,0.548387,0.209516,0.809903
2,0.404495,0.106716,0.62836,0.946552,0.345433
3,0.304027,0.991132,Changed2,Changed2,Changed
4,0.963996,0.085706,Changed2,Changed2,0.128781
5,0.401862,0.596736,Changed2,Changed2,0.957553
6,0.071065,0.012293,Changed2,Changed2,0.174657
7,0.77832,0.032002,Changed2,Changed2,0.62671
8,0.777305,0.862534,Changed2,Changed2,0.892115
9,0.48252,0.40044,Changed2,Changed2,0.859642
