# Pandas

In [6]:
# 모듈 import
import pandas as pd
import numpy as np

## Series

In [2]:
data = ['2017', '2018', '2019', '2020']
se = pd.Series(data)
se

0    2017
1    2018
2    2019
3    2020
dtype: object

### index, values

In [3]:
se.index

RangeIndex(start=0, stop=4, step=1)

In [4]:
se.values

array(['2017', '2018', '2019', '2020'], dtype=object)

### 특정값 추출 - 인덱싱, 슬라이싱

In [5]:
se[0]

'2017'

In [6]:
se[0:3]

0    2017
1    2018
2    2019
dtype: object

In [7]:
se[:]

0    2017
1    2018
2    2019
3    2020
dtype: object

### index 이름과 columns 이름 지정하기 : name

In [8]:
type(se)

pandas.core.series.Series

In [9]:
se.name = "Year"
se

0    2017
1    2018
2    2019
3    2020
Name: Year, dtype: object

In [11]:
se.index.name = "No."
se

No.
0    2017
1    2018
2    2019
3    2020
Name: Year, dtype: object

### Series 생성시 index 지정하기

In [12]:
se = pd.Series(data, index = ['a', 'b', 'c', 'd'])
se

a    2017
b    2018
c    2019
d    2020
dtype: object

In [13]:
se['a']

'2017'

In [14]:
se['a':'d']

a    2017
b    2018
c    2019
d    2020
dtype: object

In [15]:
se.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [16]:
se.values

array(['2017', '2018', '2019', '2020'], dtype=object)

In [17]:
se.index = [0, 1, 2, 3]
se

0    2017
1    2018
2    2019
3    2020
dtype: object

### masking

In [18]:
se > '2018'

0    False
1    False
2     True
3     True
dtype: bool

In [19]:
se[se > '2018']

2    2019
3    2020
dtype: object

In [20]:
se[-(se > '2018')]

0    2017
1    2018
dtype: object

### index 이름과 columns 이름 지정하기

In [3]:
data = {'name' : ['Lee', 'Hwang', 'Kim', 'Choi'],
        'score' : [100, 95, 80, 85],
        'grade' : ['A', 'A', 'B', 'B']}
df = pd.DataFrame(data)
df

Unnamed: 0,name,score,grade
0,Lee,100,A
1,Hwang,95,A
2,Kim,80,B
3,Choi,85,B


In [10]:
df = pd.DataFrame(data, columns=['name', 'grade', 'score']) # 순서 변경하기
df

Unnamed: 0,name,grade,score
0,Lee,A,100
1,Hwang,A,95
2,Kim,B,80
3,Choi,B,85


### index, columns, values

In [11]:
df.index

RangeIndex(start=0, stop=4, step=1)

In [12]:
df.columns

Index(['name', 'grade', 'score'], dtype='object')

In [13]:
df.values

array([['Lee', 'A', 100],
       ['Hwang', 'A', 95],
       ['Kim', 'B', 80],
       ['Choi', 'B', 85]], dtype=object)

### DataFrame의 index 설정
1) DataFrame 생성시 index 설정

In [14]:
df = pd.DataFrame(data, index = ['Lee', 'Hwang', 'Kim', 'Choi'])
df

Unnamed: 0,name,score,grade
Lee,Lee,100,A
Hwang,Hwang,95,A
Kim,Kim,80,B
Choi,Choi,85,B


In [17]:
df.drop('name', axis=1)   # index와 columns이 중복되므로 삭제할 경우

Unnamed: 0,score,grade
Lee,100,A
Hwang,95,A
Kim,80,B
Choi,85,B


In [18]:
df = df.set_index('name')  # 칼럼을 인덱스로 지정
df

Unnamed: 0_level_0,score,grade
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Lee,100,A
Hwang,95,A
Kim,80,B
Choi,85,B


In [4]:
df.index.name = 'No'
df.columns.name = 'Info'
df

Info,name,score,grade
No,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,Lee,100,A
1,Hwang,95,A
2,Kim,80,B
3,Choi,85,B


### 특정 칼럼 값 추출

In [7]:
df['name']  # Series로 추출

No
0      Lee
1    Hwang
2      Kim
3     Choi
Name: name, dtype: object

In [8]:
df['name'].values  # 값만 들고 오기

array(['Lee', 'Hwang', 'Kim', 'Choi'], dtype=object)

In [9]:
df.loc[1, 'name']   # index가 1이고, column이 name인 값

'Hwang'

### DataFrame 생성과 상호 변환
1) DataFrame과 리스트, 딕셔너리, 넘파이 ndarray 간의 상호 변환

In [19]:
# 리스트를 이용한 DataFrame 생성
data = [['Lee', 100, 'A'],
        ['Hwang', 95, 'A'],
        ['Kim', 80, 'B'],
        ['Choi', 85, 'B']]
df = pd.DataFrame(data, columns = ['name', 'score', 'grade'])
df

Unnamed: 0,name,score,grade
0,Lee,100,A
1,Hwang,95,A
2,Kim,80,B
3,Choi,85,B


In [21]:
df.values.tolist()

[['Lee', 100, 'A'], ['Hwang', 95, 'A'], ['Kim', 80, 'B'], ['Choi', 85, 'B']]

In [22]:
# 딕셔너리를 이용한 DataFrame 생성
data = {'name' : ['Lee', 'Hwang', 'Kim', 'Choi'],
        'score' : [100, 95, 90, 85],
        'grade' : ['A', 'A', 'B', 'B']}
df = pd.DataFrame(data)
df

Unnamed: 0,name,score,grade
0,Lee,100,A
1,Hwang,95,A
2,Kim,90,B
3,Choi,85,B


In [23]:
df.to_dict()

{'name': {0: 'Lee', 1: 'Hwang', 2: 'Kim', 3: 'Choi'},
 'score': {0: 100, 1: 95, 2: 90, 3: 85},
 'grade': {0: 'A', 1: 'A', 2: 'B', 3: 'B'}}

In [4]:
# ndarray를 이용한 DataFrame 생성
import numpy as np
array = np.array([['Lee', 100, 'A'],
                  ['Hwang', 95, 'A'],
                  ['Kim', 90, 'B'],
                  ['Choi', 85, 'B']])
df = pd.DataFrame(array, columns = ['name', 'score', 'grade'])
df

Unnamed: 0,name,score,grade
0,Lee,100,A
1,Hwang,95,A
2,Kim,90,B
3,Choi,85,B


In [6]:
arr = df.values
type(arr)

numpy.ndarray

In [7]:
arr

array([['Lee', '100', 'A'],
       ['Hwang', '95', 'A'],
       ['Kim', '90', 'B'],
       ['Choi', '85', 'B']], dtype=object)

### 칼럼값이 없는 경우 -> NaN 처리

In [8]:
data = {'name' : ['Lee', 'Hwang', 'Kim', 'Choi'],
        'score' : [100, 95, 90, 85],
        'grade' : ['A', 'A', 'B', 'B']}
c = ['name', 'grade', 'score', 'etc']
df2 = pd.DataFrame(data, columns= c)
df2

Unnamed: 0,name,grade,score,etc
0,Lee,A,100,
1,Hwang,A,95,
2,Kim,B,90,
3,Choi,B,85,


## 데이터 셀렉션 및 필터링

In [2]:
data = {'name' : ['Lee', 'Hwang', 'Kim', 'Choi'],
        'score' : [100, 95, 90, 85],
        'grade' : ['A', 'A', 'B', 'B']}

columns = ['name', 'grade', 'score']
df = pd.DataFrame(data, columns=columns)
df        

Unnamed: 0,name,grade,score
0,Lee,A,100
1,Hwang,A,95
2,Kim,B,90
3,Choi,B,85


1) [] - df.칼럼명, df['칼럼명']

 - 하나의 칼럼 검색

In [3]:
df.name

0      Lee
1    Hwang
2      Kim
3     Choi
Name: name, dtype: object

In [4]:
df['name']

0      Lee
1    Hwang
2      Kim
3     Choi
Name: name, dtype: object

 - 2개 이상의 컬럼 검색

In [5]:
df[['name', 'grade']]

Unnamed: 0,name,grade
0,Lee,A
1,Hwang,A
2,Kim,B
3,Choi,B


In [8]:
df[['name', 'score', 'grade']]   # 실제 값이 바뀌지는 않음

Unnamed: 0,name,score,grade
0,Lee,100,A
1,Hwang,95,A
2,Kim,90,B
3,Choi,85,B


2) loc[인덱스명, 칼럼명] - 명칭(label) 기반 인덱싱

 - 하나의 행 검색

In [10]:
df.loc[0]

name     Lee
grade      A
score    100
Name: 0, dtype: object

 - 2개 이상의 행 검색

In [11]:
df.loc[0:2]

Unnamed: 0,name,grade,score
0,Lee,A,100
1,Hwang,A,95
2,Kim,B,90


- 여러 행과 여러 열 검색

In [12]:
df.loc[[0, 2]]

Unnamed: 0,name,grade,score
0,Lee,A,100
2,Kim,B,90


In [14]:
df.loc[0:2, ['score', 'grade']]

Unnamed: 0,score,grade
0,100,A
1,95,A
2,90,B


3) iloc[인덱스, 컬럼인덱스] - 위치(position) 기반 인덱싱

- 하나의 행 검색

In [15]:
df.iloc[3]

name     Choi
grade       B
score      85
Name: 3, dtype: object

- 여러 행 검색

In [19]:
df.iloc[0:3]  # 숫자로 들고 옴

Unnamed: 0,name,grade,score
0,Lee,A,100
1,Hwang,A,95
2,Kim,B,90


In [18]:
df.loc[0:3]   # 이름을 들고 옴

Unnamed: 0,name,grade,score
0,Lee,A,100
1,Hwang,A,95
2,Kim,B,90
3,Choi,B,85


- 여러 행과 여러 열 검색

In [20]:
df.iloc[0:3, 0:2]

Unnamed: 0,name,grade
0,Lee,A
1,Hwang,A
2,Kim,B


4) 불린(Boolean) 인덱싱

In [21]:
df['score'] > 90

0     True
1     True
2    False
3    False
Name: score, dtype: bool

In [22]:
df[df['score'] > 90]

Unnamed: 0,name,grade,score
0,Lee,A,100
1,Hwang,A,95


In [23]:
df.loc[df['score'] > 90]

Unnamed: 0,name,grade,score
0,Lee,A,100
1,Hwang,A,95


In [24]:
df['grade'] == 'B'

0    False
1    False
2     True
3     True
Name: grade, dtype: bool

5) filter() 함수를 이용한 검색

 - filter() 함수

In [25]:
df.filter?

[1;31mSignature:[0m
[0mdf[0m[1;33m.[0m[0mfilter[0m[1;33m([0m[1;33m
[0m    [0mitems[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mlike[0m[1;33m:[0m [1;34m'str | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mregex[0m[1;33m:[0m [1;34m'str | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0maxis[0m[1;33m:[0m [1;34m'Axis | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m [1;33m->[0m [1;34m'Self'[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m
Subset the dataframe rows or columns according to the specified index labels.

Note that this routine does not filter a dataframe on its
contents. The filter is applied to the labels of the index.

Parameters
----------
items : list-like
    Keep labels from axis which are in items.
like : str
    Keep labels from axis for which "like in label == True".
regex : str (regular expression)
    Keep labels from axis for which re.search(regex

In [26]:
df.filter(items=['name', 'score'])

Unnamed: 0,name,score
0,Lee,100
1,Hwang,95
2,Kim,90
3,Choi,85


In [27]:
df.filter(like='g', axis=1)  # 칼럼명에 'g'가 포함된 칼럼 출력

Unnamed: 0,grade
0,A
1,A
2,B
3,B


 - 정규식을 이용한 filter() 함수

In [30]:
df.filter(regex = '^n', axis=1)  # n으로 시작하는 칼럼

Unnamed: 0,name
0,Lee
1,Hwang
2,Kim
3,Choi


In [31]:
df.filter(regex='e$', axis=1)   # e로 끝나는 칼럼

Unnamed: 0,name,grade,score
0,Lee,A,100
1,Hwang,A,95
2,Kim,B,90
3,Choi,B,85


## DataFrame 데이터 추가
1) Serial 객체를 이용하여 열 데이터 추가


In [33]:
points = pd.Series([1.5, 1.7, 2.4, 3.0], index = [0, 1, 2, 3])
points

0    1.5
1    1.7
2    2.4
3    3.0
dtype: float64

In [34]:
df['points'] = points
df

Unnamed: 0,name,grade,score,points
0,Lee,A,100,1.5
1,Hwang,A,95,1.7
2,Kim,B,90,2.4
3,Choi,B,85,3.0


In [35]:
points2 = pd.Series([1.5, 1.7, 2.4], index = [3, 1, 0])
points2

3    1.5
1    1.7
0    2.4
dtype: float64

In [36]:
df['points2'] = points2
df

Unnamed: 0,name,grade,score,points,points2
0,Lee,A,100,1.5,2.4
1,Hwang,A,95,1.7,1.7
2,Kim,B,90,2.4,
3,Choi,B,85,3.0,1.5


2) numpy 함수를 이용하여 열 데이터 추가


In [38]:
import numpy as np
np.zeros(4).astype(int)

array([0, 0, 0, 0])

In [39]:
df['etc'] = np.zeros(4).astype(int)
df

Unnamed: 0,name,grade,score,points,points2,etc
0,Lee,A,100,1.5,2.4,0
1,Hwang,A,95,1.7,1.7,0
2,Kim,B,90,2.4,,0
3,Choi,B,85,3.0,1.5,0


3) 연산을 통해 열 데이터 추가


In [40]:
df['score'] > 90

0     True
1     True
2    False
3    False
Name: score, dtype: bool

In [41]:
df['pass'] = df['score'] > 90
df

Unnamed: 0,name,grade,score,points,points2,etc,pass
0,Lee,A,100,1.5,2.4,0,True
1,Hwang,A,95,1.7,1.7,0,True
2,Kim,B,90,2.4,,0,False
3,Choi,B,85,3.0,1.5,0,False


4) loc() 함수를 이용하여 행 데이터 추가

In [43]:
df.loc[5, :] = ['Park', 'C', 70, 1.0, np.NaN, 0, False]
df

Unnamed: 0,name,grade,score,points,points2,etc,pass
0,Lee,A,100.0,1.5,2.4,0.0,True
1,Hwang,A,95.0,1.7,1.7,0.0,True
2,Kim,B,90.0,2.4,,0.0,False
3,Choi,B,85.0,3.0,1.5,0.0,False
5,Park,C,70.0,1.0,,0.0,False


In [48]:
df = df.reset_index(drop=True)
df

Unnamed: 0,name,grade,score,points,points2,etc,pass
0,Lee,A,100.0,1.5,2.4,0.0,True
1,Hwang,A,95.0,1.7,1.7,0.0,True
2,Kim,B,90.0,2.4,,0.0,False
3,Choi,B,85.0,3.0,1.5,0.0,False
4,Park,C,70.0,1.0,,0.0,False


## DataFrame 데이터 삭제
1) drop
 - DataFrame.drop(labels = None, axis = 0, index = None, columns = None, inplace = False)

In [52]:
df.drop(index=4)   # 실제로 삭제 X

Unnamed: 0,name,grade,score,points,points2,etc,pass
0,Lee,A,100.0,1.5,2.4,0.0,True
1,Hwang,A,95.0,1.7,1.7,0.0,True
2,Kim,B,90.0,2.4,,0.0,False
3,Choi,B,85.0,3.0,1.5,0.0,False


In [51]:
df

Unnamed: 0,name,grade,score,points,points2,etc,pass
0,Lee,A,100.0,1.5,2.4,0.0,True
1,Hwang,A,95.0,1.7,1.7,0.0,True
2,Kim,B,90.0,2.4,,0.0,False
3,Choi,B,85.0,3.0,1.5,0.0,False
4,Park,C,70.0,1.0,,0.0,False


In [55]:
df.drop(index=4, inplace = True)

KeyError: '[4] not found in axis'

In [56]:
df

Unnamed: 0,name,grade,score,points,points2,etc,pass
0,Lee,A,100.0,1.5,2.4,0.0,True
1,Hwang,A,95.0,1.7,1.7,0.0,True
2,Kim,B,90.0,2.4,,0.0,False
3,Choi,B,85.0,3.0,1.5,0.0,False


In [60]:
df.drop('etc', axis=1)

Unnamed: 0,name,grade,score,points,points2,pass
0,Lee,A,100.0,1.5,2.4,True
1,Hwang,A,95.0,1.7,1.7,True
2,Kim,B,90.0,2.4,,False
3,Choi,B,85.0,3.0,1.5,False


In [61]:
df.drop('etc', axis=1, inplace = True)

In [62]:
df

Unnamed: 0,name,grade,score,points,points2,pass
0,Lee,A,100.0,1.5,2.4,True
1,Hwang,A,95.0,1.7,1.7,True
2,Kim,B,90.0,2.4,,False
3,Choi,B,85.0,3.0,1.5,False


In [67]:
df2 = df.drop('points2', axis=1)

In [68]:
df2

Unnamed: 0,name,grade,score,points,pass
0,Lee,A,100.0,1.5,True
1,Hwang,A,95.0,1.7,True
2,Kim,B,90.0,2.4,False
3,Choi,B,85.0,3.0,False


In [69]:
df

Unnamed: 0,name,grade,score,points,points2,pass
0,Lee,A,100.0,1.5,2.4,True
1,Hwang,A,95.0,1.7,1.7,True
2,Kim,B,90.0,2.4,,False
3,Choi,B,85.0,3.0,1.5,False


2) del
   - del df["칼럼명"]

In [70]:
del df['points2']  # 바로 삭제
df

Unnamed: 0,name,grade,score,points,pass
0,Lee,A,100.0,1.5,True
1,Hwang,A,95.0,1.7,True
2,Kim,B,90.0,2.4,False
3,Choi,B,85.0,3.0,False


## 집합(Aggregation) 함수
- sum(), max(), min(), count() 등의 집합(aggregation) 함수로 연산 수행

In [71]:
df.sum()

name      LeeHwangKimChoi
grade                AABB
score               370.0
points                8.6
pass                    2
dtype: object

In [72]:
df.mean()

TypeError: Could not convert ['LeeHwangKimChoi' 'AABB' 370.0] to numeric

In [73]:
df.max()

name        Lee
grade         B
score     100.0
points      3.0
pass       True
dtype: object

In [74]:
df.min()

name       Choi
grade         A
score      85.0
points      1.5
pass      False
dtype: object

In [75]:
df.describe()

Unnamed: 0,points
count,4.0
mean,2.15
std,0.685565
min,1.5
25%,1.65
50%,2.05
75%,2.55
max,3.0


### 하나의 칼럼에 집합 함수 적용

In [76]:
df.score

0    100.0
1     95.0
2     90.0
3     85.0
Name: score, dtype: object

In [77]:
df.score.mean()

92.5

In [78]:
df['score'].mean()

92.5

In [79]:
df.loc[:, 'score'].mean()

92.5

In [81]:
df.iloc[:, 2].mean()

92.5

### 여러 칼럼에 집합 함수 적용

In [83]:
df[['score', 'points']]

Unnamed: 0,score,points
0,100.0,1.5
1,95.0,1.7
2,90.0,2.4
3,85.0,3.0


In [84]:
df[['score', 'points']].sum(axis=0)

score     370.0
points      8.6
dtype: object

In [85]:
df[['score', 'points']].sum(axis=1)

0    101.5
1     96.7
2     92.4
3     88.0
dtype: object

## index 순으로 정렬: sort_index()

In [2]:
data = {'name' : ['Lee', 'Hwang', 'Kim', 'Choi'],
        'score' : [100, 95, 80, 85],
        'grade' : ['A', 'A', 'B', 'B']}

df = pd.DataFrame(data)
df        

Unnamed: 0,name,score,grade
0,Lee,100,A
1,Hwang,95,A
2,Kim,80,B
3,Choi,85,B


In [3]:
df.sort_index()

Unnamed: 0,name,score,grade
0,Lee,100,A
1,Hwang,95,A
2,Kim,80,B
3,Choi,85,B


In [4]:
df.sort_index(axis=1)  # 칼럼순으로 정렬

Unnamed: 0,grade,name,score
0,A,Lee,100
1,A,Hwang,95
2,B,Kim,80
3,B,Choi,85


In [6]:
df.sort_index(axis=0, ascending = False)  # 칼럼순으로 정렬

Unnamed: 0,name,score,grade
3,Choi,85,B
2,Kim,80,B
1,Hwang,95,A
0,Lee,100,A


## 지정된 칼럼의 칼럼값 순으로 정렬: sort_values(by=['칼럼명'])

In [7]:
# 오름차순 정렬
df.sort_values(by=['score'])

Unnamed: 0,name,score,grade
2,Kim,80,B
3,Choi,85,B
1,Hwang,95,A
0,Lee,100,A


In [8]:
# 내림차순 정렬
df.sort_values(by=['score'], ascending=False)

Unnamed: 0,name,score,grade
0,Lee,100,A
1,Hwang,95,A
3,Choi,85,B
2,Kim,80,B


In [10]:
# 2개 이상의 칼럼값으로 정렬
df.sort_values(by=['grade', 'score'])  # grade 먼저 정렬한 뒤 score 정렬

Unnamed: 0,name,score,grade
1,Hwang,95,A
0,Lee,100,A
2,Kim,80,B
3,Choi,85,B


In [12]:
# 2개 이상의 칼럼값으로 정렬
df.sort_values(by=['grade', 'score'], ascending =[True, False])  # grade 먼저 정렬한 뒤 score는 높은 순으로 정렬

Unnamed: 0,name,score,grade
0,Lee,100,A
1,Hwang,95,A
3,Choi,85,B
2,Kim,80,B


## 결손 데이터(Missing Data) 처리
1) isna() 또는 isnull(): 주어진 칼럼 값들이 NaN인지 확인
2) fillna(): NaN 데이터를 찾아 value로 지정된 값으로 변환
3) dropna(): 행 데이터를 기준으로 NaN 값이 포함된 데이터를 제거

In [4]:
data = {'name' : ['Lee', 'Hwang', 'Kim', 'Choi'],
        'score' : [100, 95, 80, 85],
        'grade' : ['A', 'A', 'B', 'B']}

columns = ['name', 'grade', 'score']
df = pd.DataFrame(data, columns=columns)
df        

Unnamed: 0,name,grade,score
0,Lee,A,100
1,Hwang,A,95
2,Kim,B,80
3,Choi,B,85


In [7]:
# NaN 값을 가진 칼럼 추가
df['point'] = np.nan
df

Unnamed: 0,name,grade,score,point
0,Lee,A,100,
1,Hwang,A,95,
2,Kim,B,80,
3,Choi,B,85,


In [8]:
df.isna()

Unnamed: 0,name,grade,score,point
0,False,False,False,True
1,False,False,False,True
2,False,False,False,True
3,False,False,False,True


In [9]:
df.isnull()

Unnamed: 0,name,grade,score,point
0,False,False,False,True
1,False,False,False,True
2,False,False,False,True
3,False,False,False,True


In [10]:
df.fillna(value = 0, inplace=True)
df

Unnamed: 0,name,grade,score,point
0,Lee,A,100,0.0
1,Hwang,A,95,0.0
2,Kim,B,80,0.0
3,Choi,B,85,0.0


In [11]:
df['point2'] = [50, 100, 70, np.nan]
df

Unnamed: 0,name,grade,score,point,point2
0,Lee,A,100,0.0,50.0
1,Hwang,A,95,0.0,100.0
2,Kim,B,80,0.0,70.0
3,Choi,B,85,0.0,


In [13]:
df.fillna(value = df['point2'].mean())

Unnamed: 0,name,grade,score,point,point2
0,Lee,A,100,0.0,50.0
1,Hwang,A,95,0.0,100.0
2,Kim,B,80,0.0,70.0
3,Choi,B,85,0.0,73.333333


In [14]:
df

Unnamed: 0,name,grade,score,point,point2
0,Lee,A,100,0.0,50.0
1,Hwang,A,95,0.0,100.0
2,Kim,B,80,0.0,70.0
3,Choi,B,85,0.0,


In [15]:
df.dropna()

Unnamed: 0,name,grade,score,point,point2
0,Lee,A,100,0.0,50.0
1,Hwang,A,95,0.0,100.0
2,Kim,B,80,0.0,70.0


In [17]:
df.dropna(how='all')  # 모든 행이 NaN일 때 제외

Unnamed: 0,name,grade,score,point,point2
0,Lee,A,100,0.0,50.0
1,Hwang,A,95,0.0,100.0
2,Kim,B,80,0.0,70.0
3,Choi,B,85,0.0,
