# 데이터프레임 행(row) 삭제하기 - drop 함수 리턴값 사용

**판다스의 drop(...)함수는 일반적으로 데이터프레임의 행(Row)과 열(Column)을 삭제하기 위해 사용됨**

In [2]:
import pandas as pd

persons_dict = { 'Name' : [ 'John', 'Wei', 'Smith', 'Park', 'Lee' ],
                 'Country' : [ 'USA', 'China', 'USA', 'Korea', 'Korea' ],
                 'Age' : [ 31, 33, 28, 41, 55 ],
                 'Job' : ['Singer', 'Lawyer', 'Chef', 'Doctor', 'Chef'] } 

df1 = pd.DataFrame(data = persons_dict, 
                   columns = [ 'Name', 'Country', 'Age', 'Job' ] ) 

df1

Unnamed: 0,Name,Country,Age,Job
0,John,USA,31,Singer
1,Wei,China,33,Lawyer
2,Smith,USA,28,Chef
3,Park,Korea,41,Doctor
4,Lee,Korea,55,Chef


In [3]:
df1.drop(df1.index[0])
# 원본 df1 에는 어떤 변화도 없음
df1

Unnamed: 0,Name,Country,Age,Job
0,John,USA,31,Singer
1,Wei,China,33,Lawyer
2,Smith,USA,28,Chef
3,Park,Korea,41,Doctor
4,Lee,Korea,55,Chef


drop(...)함수는 원본데이터 프레임에서 인덱스 0인 행을 삭제시킨 (임시) 데이터프레임을 리턴함

In [4]:
df1 = df1.drop(df1.index[0])
# 원본 df1의 인덱스 0인 행이 삭제됨
df1

Unnamed: 0,Name,Country,Age,Job
1,Wei,China,33,Lawyer
2,Smith,USA,28,Chef
3,Park,Korea,41,Doctor
4,Lee,Korea,55,Chef


# 데이터프레임 행(row) 삭제하기 - drop 함수 inplace=True 옵션 사용

**drop()함수 사용시 inplace=True 옵션을 사용**하면 원본 데이터프레임에 리턴값을 대입하지 않아도 됨

In [5]:
df1 = pd.DataFrame(data = persons_dict, 
                   columns = [ 'Name', 'Country', 'Age', 'Job' ] ) 

df1

Unnamed: 0,Name,Country,Age,Job
0,John,USA,31,Singer
1,Wei,China,33,Lawyer
2,Smith,USA,28,Chef
3,Park,Korea,41,Doctor
4,Lee,Korea,55,Chef


In [6]:
# 인덱스 0인 1개의 행 삭제
df1.drop(df1.index[0], inplace=True)
df1

Unnamed: 0,Name,Country,Age,Job
1,Wei,China,33,Lawyer
2,Smith,USA,28,Chef
3,Park,Korea,41,Doctor
4,Lee,Korea,55,Chef


현재 df1의 0행은 Wei, 1행은 Smith, 2행은 Park, 3행은 Lee임  
*인덱스 번호와 혼동하면 안됨*

In [7]:
# 불연속적인 행 삭제시 리스트 사용
df1.drop(df1.index[[1,3]], inplace=True)
df1

Unnamed: 0,Name,Country,Age,Job
1,Wei,China,33,Lawyer
3,Park,Korea,41,Doctor


# 데이터프레임 열(column) 삭제하기 - drop 함수 axis=1 옵션 사용

**데이터프레임의 열(Column)을 삭제하기 위해서는 drop(...)함수에서 axis=1 옵션을 사용함**

In [8]:
df1 = pd.DataFrame(data = persons_dict, 
                   columns = [ 'Name', 'Country', 'Age', 'Job' ] ) 

df1

Unnamed: 0,Name,Country,Age,Job
0,John,USA,31,Singer
1,Wei,China,33,Lawyer
2,Smith,USA,28,Chef
3,Park,Korea,41,Doctor
4,Lee,Korea,55,Chef


In [9]:
# 열 삭제, axis = 1 사용함
df1.drop('Job', inplace=True, axis=1)
df1

Unnamed: 0,Name,Country,Age
0,John,USA,31
1,Wei,China,33
2,Smith,USA,28
3,Park,Korea,41
4,Lee,Korea,55


In [10]:
df1.drop(['Name', 'Country'], axis=1, inplace=True)

In [11]:
df1

Unnamed: 0,Age
0,31
1,33
2,28
3,41
4,55


In [14]:
df1 = pd.DataFrame(data = persons_dict, 
                   columns = [ 'Name', 'Country', 'Age', 'Job' ] ) 

df1['salary'] = 0
df1

Unnamed: 0,Name,Country,Age,Job,salary
0,John,USA,31,Singer,0
1,Wei,China,33,Lawyer,0
2,Smith,USA,28,Chef,0
3,Park,Korea,41,Doctor,0
4,Lee,Korea,55,Chef,0


In [16]:
import numpy as np

In [17]:
df1['salary'] = np.where(df1['Job'] != 'student', 'yes', 'no')
df1

Unnamed: 0,Name,Country,Age,Job,salary
0,John,USA,31,Singer,yes
1,Wei,China,33,Lawyer,yes
2,Smith,USA,28,Chef,yes
3,Park,Korea,41,Doctor,yes
4,Lee,Korea,55,Chef,yes


In [43]:
friend_dict_list = [{'name':'Jone', 'midterm':95, 'final':85},
                   {'name':'Jenney', 'midterm':85, 'final':80},
                   {'name':'Nate', 'midterm':30, 'final':10},
                   {'name':'ChangSu', 'midterm':100, 'final':100},]
df2 = pd.DataFrame(data = friend_dict_list,
                  columns = ['name', 'midterm', 'final'])
df2

Unnamed: 0,name,midterm,final
0,Jone,95,85
1,Jenney,85,80
2,Nate,30,10
3,ChangSu,100,100


In [44]:
df2['total'] = df2['midterm'] + df2['final']

In [45]:
df2

Unnamed: 0,name,midterm,final,total
0,Jone,95,85,180
1,Jenney,85,80,165
2,Nate,30,10,40
3,ChangSu,100,100,200


In [46]:
df2['average'] = df2['total'] / 2
df2

Unnamed: 0,name,midterm,final,total,average
0,Jone,95,85,180,90.0
1,Jenney,85,80,165,82.5
2,Nate,30,10,40,20.0
3,ChangSu,100,100,200,100.0


In [51]:
grades = []

for row in df2['average']:
    if row >= 90:
        grades.append('A')
    elif row >= 80:
        grades.append('B')
    else:
        grades.append('F')
        
df2['grade'] = grades
df2

Unnamed: 0,name,midterm,final,total,average,grade
0,Jone,95,85,180,90.0,A
1,Jenney,85,80,165,82.5,B
2,Nate,30,10,40,20.0,F
3,ChangSu,100,100,200,100.0,A


In [58]:
def pass_or_fail(row):
    if row != 'F':
        return "Pass"
    else:
        return "Fail"

In [59]:
df2.grade = df2.grade.apply(pass_or_fail)

In [61]:
df2.head()

Unnamed: 0,name,midterm,final,total,average,grade
0,Jone,95,85,180,90.0,Pass
1,Jenney,85,80,165,82.5,Pass
2,Nate,30,10,40,20.0,Fail
3,ChangSu,100,100,200,100.0,Pass


In [65]:
date_list = [
    {
        'yyyy-mm-dd' : '2000-06-27'
    },
    {
        'yyyy-mm-dd' : '2007-10-27'
    }
]

df = pd.DataFrame(date_list, columns= ['yyyy-mm-dd'])

In [66]:
df.head()

Unnamed: 0,yyyy-mm-dd
0,2000-06-27
1,2007-10-27


In [67]:
def extract_year(row):
    return row.split('-')[0]

In [68]:
df['year'] = df['yyyy-mm-dd'].apply(extract_year)

In [69]:
df.head()

Unnamed: 0,yyyy-mm-dd,year
0,2000-06-27,2000
1,2007-10-27,2007


In [74]:
friend_dict_list = [{'name':'Jone', 'midterm':95, 'final':85},
                   {'name':'Jenney', 'midterm':85, 'final':80},
                   {'name':'Nate', 'midterm':30, 'final':10},
                   {'name':'ChangSu', 'midterm':100, 'final':100},]
df= pd.DataFrame(data = friend_dict_list,
                  columns = ['name', 'midterm', 'final'])
df

Unnamed: 0,name,midterm,final
0,Jone,95,85
1,Jenney,85,80
2,Nate,30,10
3,ChangSu,100,100


In [75]:
df2 = pd.DataFrame([
    ['Ben', 50, 50]
], columns=['name', 'midterm', 'final'])

In [76]:
df2.head()

Unnamed: 0,name,midterm,final
0,Ben,50,50


In [77]:
df.append(df2, ignore_index=True)

Unnamed: 0,name,midterm,final
0,Jone,95,85
1,Jenney,85,80
2,Nate,30,10
3,ChangSu,100,100
4,Ben,50,50
