# Pandas Tutorial
- Series data 생성
- DataFrame 생성
- DataFrame 원하는 행/열 선택

In [3]:
import pandas as pd
import numpy as np

## Series 생성
- index 기능을 추가한 1차원 배열

In [4]:
a = np.array([1,2,3,4])
s1 = pd.Series(a)
s1

0    1
1    2
2    3
3    4
dtype: int64

In [5]:
b = ['a','b','c','d']
s2 = pd.Series(b)
s2

0    a
1    b
2    c
3    d
dtype: object

In [6]:
s2 = pd.Series(b,index=a)
s2

1    a
2    b
3    c
4    d
dtype: object

In [7]:
d1 = {"국어":100,"수학":95,"영어":90}
s3 = pd.Series(d1)
s3

국어    100
수학     95
영어     90
dtype: int64

In [8]:
print(s3.index)
print(s3.values)
print(s3["국어"])
s3["국어"] = 97
s3["과학"] = 100
print(s3)

Index(['국어', '수학', '영어'], dtype='object')
[100  95  90]
100
국어     97
수학     95
영어     90
과학    100
dtype: int64


## DataFrame 생성
- Series가 1차원이면 Dataframe은 2차원
- 인덱스가 row, column으로 구성

In [9]:
basket_ball_csv = pd.read_csv('./basketball_stat.csv')
basket_ball_csv

Unnamed: 0,Player,Pos,3P,2P,TRB,AST,STL,BLK
0,Alex Abrines,SG,1.3,0.5,1.5,0.6,0.5,0.2
1,Steven Adams,C,0.0,6.0,9.5,1.6,1.5,1.0
2,Bam Adebayo,C,0.0,3.4,7.3,2.2,0.9,0.8
3,DeVaughn Akoon-Purcell,SG,0.0,0.4,0.6,0.9,0.3,0.0
4,LaMarcus Aldridge,C,0.1,8.3,9.2,2.4,0.5,1.3
...,...,...,...,...,...,...,...,...
195,Nik Stauskas,SG,1.0,1.0,1.9,1.2,0.3,0.1
196,D.J. Stephens,SG,0.0,1.0,0.0,0.0,1.0,0.0
197,Lance Stephenson,SG,1.1,1.6,3.2,2.1,0.6,0.1
198,Garrett Temple,SG,1.2,1.6,2.9,1.4,1.0,0.4


In [10]:
basket_ball_xlsx = pd.read_excel('./basketball_stat.xlsx')
basket_ball_xlsx

Unnamed: 0,Player,Pos,3P,2P,TRB,AST,STL,BLK
0,Alex Abrines,SG,1.3,0.5,1.5,0.6,0.5,0.2
1,Steven Adams,C,0.0,6.0,9.5,1.6,1.5,1.0
2,Bam Adebayo,C,0.0,3.4,7.3,2.2,0.9,0.8
3,DeVaughn Akoon-Purcell,SG,0.0,0.4,0.6,0.9,0.3,0.0
4,LaMarcus Aldridge,C,0.1,8.3,9.2,2.4,0.5,1.3
...,...,...,...,...,...,...,...,...
195,Nik Stauskas,SG,1.0,1.0,1.9,1.2,0.3,0.1
196,D.J. Stephens,SG,0.0,1.0,0.0,0.0,1.0,0.0
197,Lance Stephenson,SG,1.1,1.6,3.2,2.1,0.6,0.1
198,Garrett Temple,SG,1.2,1.6,2.9,1.4,1.0,0.4


In [11]:
basket_ball_csv.head()

Unnamed: 0,Player,Pos,3P,2P,TRB,AST,STL,BLK
0,Alex Abrines,SG,1.3,0.5,1.5,0.6,0.5,0.2
1,Steven Adams,C,0.0,6.0,9.5,1.6,1.5,1.0
2,Bam Adebayo,C,0.0,3.4,7.3,2.2,0.9,0.8
3,DeVaughn Akoon-Purcell,SG,0.0,0.4,0.6,0.9,0.3,0.0
4,LaMarcus Aldridge,C,0.1,8.3,9.2,2.4,0.5,1.3


In [12]:
basket_ball_csv.tail()

Unnamed: 0,Player,Pos,3P,2P,TRB,AST,STL,BLK
195,Nik Stauskas,SG,1.0,1.0,1.9,1.2,0.3,0.1
196,D.J. Stephens,SG,0.0,1.0,0.0,0.0,1.0,0.0
197,Lance Stephenson,SG,1.1,1.6,3.2,2.1,0.6,0.1
198,Garrett Temple,SG,1.2,1.6,2.9,1.4,1.0,0.4
199,Jared Terrell,SG,0.3,0.6,0.4,0.9,0.2,0.1


In [13]:
basket_ball_csv.shape

(200, 8)

In [14]:
basket_ball_csv.index

RangeIndex(start=0, stop=200, step=1)

In [16]:
basket_ball_csv.columns

Index(['Player', 'Pos', '3P', '2P', 'TRB', 'AST', 'STL', 'BLK'], dtype='object')

In [17]:
basket_ball_csv.describe()

Unnamed: 0,3P,2P,TRB,AST,STL,BLK
count,200.0,200.0,200.0,200.0,200.0,200.0
mean,0.8265,2.445,3.8665,1.6495,0.592,0.454
std,0.84321,1.885591,2.853901,1.342988,0.383892,0.491316
min,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,1.1,1.9,0.875,0.3,0.1
50%,0.65,1.9,3.2,1.2,0.5,0.3
75%,1.325,3.225,5.0,2.1,0.8,0.6
max,4.0,8.6,15.6,7.7,1.8,2.4


In [18]:
basket_ball_csv.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Player  200 non-null    object 
 1   Pos     200 non-null    object 
 2   3P      200 non-null    float64
 3   2P      200 non-null    float64
 4   TRB     200 non-null    float64
 5   AST     200 non-null    float64
 6   STL     200 non-null    float64
 7   BLK     200 non-null    float64
dtypes: float64(6), object(2)
memory usage: 12.6+ KB


In [19]:
# 하나의 col 선택
basket_ball_csv['3P']       # Series 형태로 가져옴

0      1.3
1      0.0
2      0.0
3      0.0
4      0.1
      ... 
195    1.0
196    0.0
197    1.1
198    1.2
199    0.3
Name: 3P, Length: 200, dtype: float64

In [20]:
# 하나의 col dataframe형태로 가져옴
basket_ball_csv[['3P']]

Unnamed: 0,3P
0,1.3
1,0.0
2,0.0
3,0.0
4,0.1
...,...
195,1.0
196,0.0
197,1.1
198,1.2


In [21]:
basket_ball_csv[['Player','AST','BLK']]

Unnamed: 0,Player,AST,BLK
0,Alex Abrines,0.6,0.2
1,Steven Adams,1.6,1.0
2,Bam Adebayo,2.2,0.8
3,DeVaughn Akoon-Purcell,0.9,0.0
4,LaMarcus Aldridge,2.4,1.3
...,...,...,...
195,Nik Stauskas,1.2,0.1
196,D.J. Stephens,0.0,0.0
197,Lance Stephenson,2.1,0.1
198,Garrett Temple,1.4,0.4


### Dataframe 조건으로 col 선택하기
- & 와 | 

In [22]:
basket_ball_csv[basket_ball_csv['3P'] > 0.5]

Unnamed: 0,Player,Pos,3P,2P,TRB,AST,STL,BLK
0,Alex Abrines,SG,1.3,0.5,1.5,0.6,0.5,0.2
6,Grayson Allen,SG,0.8,0.9,0.6,0.7,0.2,0.2
8,Kadeem Allen,SG,0.9,2.5,2.7,4.0,0.8,0.2
11,Dwayne Bacon,SG,0.9,2.0,2.1,1.1,0.3,0.1
14,Will Barton,SG,1.6,2.7,4.6,2.9,0.4,0.5
...,...,...,...,...,...,...,...,...
193,J.R. Smith,SG,1.1,1.4,1.6,1.9,1.0,0.3
194,Zhaire Smith,SG,1.0,1.3,2.2,1.7,0.3,0.3
195,Nik Stauskas,SG,1.0,1.0,1.9,1.2,0.3,0.1
197,Lance Stephenson,SG,1.1,1.6,3.2,2.1,0.6,0.1


In [23]:
basket_ball_csv[(basket_ball_csv['3P'] > 0.5) & (basket_ball_csv['2P'] > 2.0)]

Unnamed: 0,Player,Pos,3P,2P,TRB,AST,STL,BLK
8,Kadeem Allen,SG,0.9,2.5,2.7,4.0,0.8,0.2
14,Will Barton,SG,1.6,2.7,4.6,2.9,0.4,0.5
16,Kent Bazemore,SG,1.4,2.7,3.9,2.3,1.3,0.6
17,Bradley Beal,SG,2.5,6.8,5.0,5.5,1.5,0.7
18,Malik Beasley,SG,2.0,2.3,2.5,1.2,0.7,0.1
21,DeAndre' Bembry,SG,0.6,2.7,4.4,2.5,1.3,0.5
25,Antonio Blakeney,SG,0.6,2.3,1.9,0.7,0.2,0.2
26,Bogdan Bogdanovi?,SG,1.9,3.2,3.5,3.8,1.0,0.2
28,Devin Booker,SG,2.1,7.0,4.1,6.8,0.9,0.2
29,Avery Bradley,SG,1.4,2.6,2.8,2.4,0.7,0.3


### 원하는 행 선택하기
- loc -> 명시적인 인덱싱
- iloc -> numeric 인덱싱

In [24]:
basket_ball_csv.loc[0]

Player    Alex Abrines
Pos                 SG
3P                 1.3
2P                 0.5
TRB                1.5
AST                0.6
STL                0.5
BLK                0.2
Name: 0, dtype: object

In [35]:
basket_ball_csv.iloc[0:4]

Unnamed: 0,Player,Pos,3P,2P,TRB,AST,STL,BLK
0,Alex Abrines,SG,1.3,0.5,1.5,0.6,0.5,0.2
1,Steven Adams,C,0.0,6.0,9.5,1.6,1.5,1.0
2,Bam Adebayo,C,0.0,3.4,7.3,2.2,0.9,0.8
3,DeVaughn Akoon-Purcell,SG,0.0,0.4,0.6,0.9,0.3,0.0


In [36]:
# 행과 열 동시에 선택
basket_ball_csv.loc[[0,3,6],['Player','TRB','BLK']]
# basket_ball_csv.iloc[[0,3,6],[0,4,7]]

Unnamed: 0,Player,TRB,BLK
0,Alex Abrines,1.5,0.2
3,DeVaughn Akoon-Purcell,0.6,0.0
6,Grayson Allen,0.6,0.2
