# Dataframe

- 2차원 행령(표)이자 Series를 묶어낸 자료형

In [218]:
import pandas as pd
import numpy as np

---

## Dataframe 생성

In [219]:
# 2차원 ndarray 활용
arr2d = np.random.randn(2,3)

# 방법 1
df1 = pd.DataFrame(arr2d, columns=["col1", "col2","col3"])
df1.index = ['ㄱ','ㄴ']
df1.columns = ['A','B','C']

# 방법 2: 키워드 인자로 주기
df2 = pd.DataFrame(arr2d,index=['a','b'] ,columns=["col1", "col2","col3"])

print(df1, '\n')
print(df2)

          A         B         C
ㄱ  0.123172 -1.445708  1.904291
ㄴ -0.044677 -2.125449 -0.929913 

       col1      col2      col3
a  0.123172 -1.445708  1.904291
b -0.044677 -2.125449 -0.929913


In [220]:
# Dict-list 활용
data = {
    'one':[1,2,3,4,5],
    'two':['가','나','다','라','마'],
    'three':[1.23, 2.34, 3.45, 4.56, 5.67],
    'four':True
}

df = pd.DataFrame(data)
df

Unnamed: 0,one,two,three,four
0,1,가,1.23,True
1,2,나,2.34,True
2,3,다,3.45,True
3,4,라,4.56,True
4,5,마,5.67,True


In [221]:
# list-dict 활6용
data = [
    {'a':1, 'b':2, 'c': 3},
    {'b':5, 'c':6},
    {'a':7, 'b':8, 'c':9},
]

df = pd.DataFrame(data)
df

Unnamed: 0,a,b,c
0,1.0,2,3
1,,5,6
2,7.0,8,9


In [222]:
df.index = ['다람쥐', '고릴라', '개구리']
df.columns = ['협동심', '성실도', '인내심']

print(df.index)
print(df.columns)
df

Index(['다람쥐', '고릴라', '개구리'], dtype='object')
Index(['협동심', '성실도', '인내심'], dtype='object')


Unnamed: 0,협동심,성실도,인내심
다람쥐,1.0,2,3
고릴라,,5,6
개구리,7.0,8,9


In [223]:
# 전치행렬 (행-열 변환)
df.T

Unnamed: 0,다람쥐,고릴라,개구리
협동심,1.0,,7.0
성실도,2.0,5.0,8.0
인내심,3.0,6.0,9.0


## DataFrame 속성

In [224]:
# df의 index만 모아서 출력
print(df.index)
# df의 column 모아서 출력
print(df.columns)
# df의 value만 모아서 출력
print(df.values, type(df.values))

Index(['다람쥐', '고릴라', '개구리'], dtype='object')
Index(['협동심', '성실도', '인내심'], dtype='object')
[[ 1.  2.  3.]
 [nan  5.  6.]
 [ 7.  8.  9.]] <class 'numpy.ndarray'>


In [225]:
# df의 전치행렬 출력
df.T

Unnamed: 0,다람쥐,고릴라,개구리
협동심,1.0,,7.0
성실도,2.0,5.0,8.0
인내심,3.0,6.0,9.0


In [226]:
# df의 형태 출력
print(df.shape)
# df의 요소 개수 출력
print(df.size)
# df의 깊이 출력
print(df.ndim)

print()
# df의 요소의 자료형 출력
print(df.dtypes)

(3, 3)
9
2

협동심    float64
성실도      int64
인내심      int64
dtype: object


---

## DataFrame 메소드

In [227]:
bank_client_df = pd.DataFrame({
    'Client ID': [1, 2, 3, 4],
    'Client Name': ['Aly', 'Steve', 'Nicole', 'Morris'],
    'Net worth [$]': [35000, 3000, 100000, 2000],
    'Years with bank': [4, 7, 10, 15]
})

In [228]:
bank_client_df.head()

Unnamed: 0,Client ID,Client Name,Net worth [$],Years with bank
0,1,Aly,35000,4
1,2,Steve,3000,7
2,3,Nicole,100000,10
3,4,Morris,2000,15


In [229]:
bank_client_df.tail()

Unnamed: 0,Client ID,Client Name,Net worth [$],Years with bank
0,1,Aly,35000,4
1,2,Steve,3000,7
2,3,Nicole,100000,10
3,4,Morris,2000,15


In [230]:
bank_client_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 4 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Client ID        4 non-null      int64 
 1   Client Name      4 non-null      object
 2   Net worth [$]    4 non-null      int64 
 3   Years with bank  4 non-null      int64 
dtypes: int64(3), object(1)
memory usage: 260.0+ bytes


In [231]:
bank_client_df.describe()

Unnamed: 0,Client ID,Net worth [$],Years with bank
count,4.0,4.0,4.0
mean,2.5,35000.0,9.0
std,1.290994,45963.753836,4.690416
min,1.0,2000.0,4.0
25%,1.75,2750.0,6.25
50%,2.5,19000.0,8.5
75%,3.25,51250.0,11.25
max,4.0,100000.0,15.0


In [232]:
# 인덱싱 & 슬라이딩
bank_client_df['Client Name']

0       Aly
1     Steve
2    Nicole
3    Morris
Name: Client Name, dtype: object

In [233]:
# 인덱싱 & 슬라이싱
# iloc -  행/열 순서로 조회
# loc - 행/열 순서로 조회

# 아래처럼 인덱싱 하면 내부적으로 메서드 호출해서 대괄호 안에 값으로 컬럼명을 찾음
#bank_client_df[0]

print(bank_client_df.iloc[0])
print(type(bank_client_df.iloc[0]))
print(bank_client_df.iloc[0].index)
print(bank_client_df.iloc[0].values)

Client ID              1
Client Name          Aly
Net worth [$]      35000
Years with bank        4
Name: 0, dtype: object
<class 'pandas.core.series.Series'>
Index(['Client ID', 'Client Name', 'Net worth [$]', 'Years with bank'], dtype='object')
[np.int64(1) 'Aly' np.int64(35000) np.int64(4)]


In [234]:
# 슬라이싱과 fancy indexing 비교

# 슬라이싱
print(bank_client_df.iloc[:2])
print(type(bank_client_df.iloc[:2]))

# Fancy indexing
# fancy indexing을 통한 조회는 Dataframe 타입으로 변환
print(bank_client_df.iloc[[0,1]])
print(type(bank_client_df.iloc[[0,1]]))

   Client ID Client Name  Net worth [$]  Years with bank
0          1         Aly          35000                4
1          2       Steve           3000                7
<class 'pandas.core.frame.DataFrame'>
   Client ID Client Name  Net worth [$]  Years with bank
0          1         Aly          35000                4
1          2       Steve           3000                7
<class 'pandas.core.frame.DataFrame'>


In [235]:
# fancy indexing을 통한 조회는 "결과가 1개여도" Dataframe 타입으로 변환
# -> Series를 반환한다는 것은 차원을 축소(제거) 하는 것
# -> Dataframe을 반환한다는 것은 차원 유지

print(bank_client_df.iloc[2].shape)
print(type(bank_client_df.iloc[2])) # series

print(bank_client_df.iloc[[2]].shape)
print(type(bank_client_df.iloc[[2]])) # df

(4,)
<class 'pandas.core.series.Series'>
(1, 4)
<class 'pandas.core.frame.DataFrame'>


In [236]:
# 2차원에 대한 indexing/slicing
bank_client_df.iloc[0,1] # (행 0, 열 1) 의 값 가져옴

'Aly'

In [237]:
print(bank_client_df.iloc[:2, 1])
print(type(bank_client_df.iloc[:2, 1]))

0      Aly
1    Steve
Name: Client Name, dtype: object
<class 'pandas.core.series.Series'>


In [238]:
print(bank_client_df.iloc[:2, 2:])
print(type(bank_client_df.iloc[:2, 2:]))

   Net worth [$]  Years with bank
0          35000                4
1           3000                7
<class 'pandas.core.frame.DataFrame'>


In [239]:
bank_client_df.index = ['client1', 'client2', 'client3', 'client4']

In [240]:
bank_client_df

Unnamed: 0,Client ID,Client Name,Net worth [$],Years with bank
client1,1,Aly,35000,4
client2,2,Steve,3000,7
client3,3,Nicole,100000,10
client4,4,Morris,2000,15


In [241]:
# iloc/loc 둘 다 행으로 찾기 때문에 아래처럼 하면 에러
#bank_client_df.loc['Client ID']

bank_client_df.loc['client1']

Client ID              1
Client Name          Aly
Net worth [$]      35000
Years with bank        4
Name: client1, dtype: object

In [242]:
bank_client_df.loc['client2':'client4':2] # pandas는 end 포함

Unnamed: 0,Client ID,Client Name,Net worth [$],Years with bank
client2,2,Steve,3000,7
client4,4,Morris,2000,15


In [243]:
bank_client_df.loc['client2':'client4':2, 'Client Name'] 

client2     Steve
client4    Morris
Name: Client Name, dtype: object

In [244]:
bank_client_df.loc['client2':'client4':2, ['Client Name', 'Years with bank']] 

Unnamed: 0,Client Name,Years with bank
client2,Steve,7
client4,Morris,15


In [245]:
bank_client_df.loc['client2':'client4':2, 'Client Name':'Years with bank'] 

Unnamed: 0,Client Name,Net worth [$],Years with bank
client2,Steve,3000,7
client4,Morris,2000,15


In [246]:
# 이름이 Steve인 고객 정보 출력

#bank_client_df.loc[bank_client_df['Client Name'] == 'Steve', :] 
bank_client_df[bank_client_df['Client Name'] == 'Steve'] # 위랑 같음

Unnamed: 0,Client ID,Client Name,Net worth [$],Years with bank
client2,2,Steve,3000,7


In [247]:
# Clinent Name만 출력
bank_client_df[['Client Name']]

Unnamed: 0,Client Name
client1,Aly
client2,Steve
client3,Nicole
client4,Morris


In [248]:
# client Name과 Net Worth [$] 출력

#bank_client_df.loc[:, ['Client Name', 'Net worth [$]']]
bank_client_df[['Client Name', 'Net worth [$]']] # 위도 같음

Unnamed: 0,Client Name,Net worth [$]
client1,Aly,35000
client2,Steve,3000
client3,Nicole,100000
client4,Morris,2000


#### .filter()

In [249]:
# filter()
bank_client_df.filter(items=['Client Name', 'Net worth [$]'])

Unnamed: 0,Client Name,Net worth [$]
client1,Aly,35000
client2,Steve,3000
client3,Nicole,100000
client4,Morris,2000


In [250]:
bank_client_df.filter(like='$', axis=1) # 열 축에서 $ 들어가 있는 거 찾음

Unnamed: 0,Net worth [$]
client1,35000
client2,3000
client3,100000
client4,2000


In [251]:
bank_client_df.filter(like='4', axis=0) # 행에서 4 들어간 것 찾음

Unnamed: 0,Client ID,Client Name,Net worth [$],Years with bank
client4,4,Morris,2000,15


In [252]:
# ID가 3인 고객 조회
bank_client_df.filter(like='3', axis=0)

Unnamed: 0,Client ID,Client Name,Net worth [$],Years with bank
client3,3,Nicole,100000,10


In [253]:
# 순자산(예치 금액)이 5000달러 넘는 고객 조회
bank_client_df[bank_client_df['Net worth [$]'] > 5000]

Unnamed: 0,Client ID,Client Name,Net worth [$],Years with bank
client1,1,Aly,35000,4
client3,3,Nicole,100000,10


In [254]:
# 거래 기간이 5년 이상인 고객
bank_client_df[bank_client_df['Years with bank'] > 5]

Unnamed: 0,Client ID,Client Name,Net worth [$],Years with bank
client2,2,Steve,3000,7
client3,3,Nicole,100000,10
client4,4,Morris,2000,15


In [255]:
# 이름이 S로 시작하는 고객 조회 (이거 풀어야 함)
bank_client_df.loc[bank_client_df['Client Name'].str.startswith('S')]

Unnamed: 0,Client ID,Client Name,Net worth [$],Years with bank
client2,2,Steve,3000,7


In [256]:
# 거래 기간이 5년 이상이면서 이름이 S로 시작하는 고객 (&, |, ~) (이거 풀어야함)
bank_client_df.loc[bank_client_df['Client Name'].str.startswith('S') & (bank_client_df['Years with bank'] >= 5)]

Unnamed: 0,Client ID,Client Name,Net worth [$],Years with bank
client2,2,Steve,3000,7


---

## 행 추가 및 삭제

In [257]:
students = [
    {'name': '호랑이', 'midterm': 95, 'final': 85},
    {'name': '늑대', 'midterm': 93, 'final': 90},
    {'name': '양', 'midterm': 100, 'final': 10}
]

df = pd.DataFrame(students)
df

Unnamed: 0,name,midterm,final
0,호랑이,95,85
1,늑대,93,90
2,양,100,10


In [258]:
# 행 추가 1. loc 이용
df.loc[3] = ['다람쥐', 100, 100]
df

Unnamed: 0,name,midterm,final
0,호랑이,95,85
1,늑대,93,90
2,양,100,10
3,다람쥐,100,100


In [259]:
# 행 추가 2. pd.concat(): 데이터프레임 병함
add_student_df = pd.DataFrame(
    [['곰', 99, 24]],
    columns=['name', 'midterm', 'final']
)


df = pd.concat([df,add_student_df], ignore_index=True) # ignore_index True = 새롭게 인덱스 부여
df

Unnamed: 0,name,midterm,final
0,호랑이,95,85
1,늑대,93,90
2,양,100,10
3,다람쥐,100,100
4,곰,99,24


In [260]:
# 행 삭제: drop 이용 (index) -> inplace 연산 아님
#df = df.drop(df.index[[0]])
df.drop(df.index[[4]], inplace=True) # index=4를 지우는게 아니라 4번째 위치에 있는 놈을 치우는 것
df

Unnamed: 0,name,midterm,final
0,호랑이,95,85
1,늑대,93,90
2,양,100,10
3,다람쥐,100,100


In [261]:
df.drop(df.index[2], inplace=True) # 이것도 되네...?
df

Unnamed: 0,name,midterm,final
0,호랑이,95,85
1,늑대,93,90
3,다람쥐,100,100


In [262]:
df.drop(df.index[2], inplace=True) # ????
df

Unnamed: 0,name,midterm,final
0,호랑이,95,85
1,늑대,93,90


---

# 컬럼 추가 및 삭제

In [263]:
df = pd.DataFrame({
    '이름': ['다람쥐', '판다', '코알라'],
    '위치': ['독산', '종로', '하남'],
    '성별': ['M', 'F', 'F'],
    '키': [179, 165, 157],
    '체중': [50.1, 48.2, 51.3]
})

df

Unnamed: 0,이름,위치,성별,키,체중
0,다람쥐,독산,M,179,50.1
1,판다,종로,F,165,48.2
2,코알라,하남,F,157,51.3


In [264]:
# 컬럼 추가 1. 기본값 이용
df['취미'] = '인공지능 공부' #  원래 없던 거 넣기
df

Unnamed: 0,이름,위치,성별,키,체중,취미
0,다람쥐,독산,M,179,50.1,인공지능 공부
1,판다,종로,F,165,48.2,인공지능 공부
2,코알라,하남,F,157,51.3,인공지능 공부


In [265]:
# 컬럼 추가 2. np.where(조건, True일 때 값, False일 때 값) -> ndarray 이용
df['성별(한글)'] = np.where(df['성별']=='M', '남성', '여성') # 조건에 해당하는 좌표 돌려줌
df

Unnamed: 0,이름,위치,성별,키,체중,취미,성별(한글)
0,다람쥐,독산,M,179,50.1,인공지능 공부,남성
1,판다,종로,F,165,48.2,인공지능 공부,여성
2,코알라,하남,F,157,51.3,인공지능 공부,여성


In [266]:
# BMI = kg / m^2
# 기존 컬럼 이용해서 연산 -> Series 이용
# df['BMI'] = ?
df['BMI'] = df['체중'] / ((df['키'] * 0.01)**2)
df

Unnamed: 0,이름,위치,성별,키,체중,취미,성별(한글),BMI
0,다람쥐,독산,M,179,50.1,인공지능 공부,남성,15.636216
1,판다,종로,F,165,48.2,인공지능 공부,여성,17.704316
2,코알라,하남,F,157,51.3,인공지능 공부,여성,20.812203


In [267]:
# 컬럼 추가 3. apply() 이용
def get_type(value):
    return '저체중' if value < 18 else '표준'

# !!함수 뒤에 () 붙으면 '호출'하는 것, 안붙으면 함수를 '전달'하는 것!!
df['BMI type'] = df['BMI'].apply(get_type) # apply(): 고차함수 / get_type: 콜백함수
df

Unnamed: 0,이름,위치,성별,키,체중,취미,성별(한글),BMI,BMI type
0,다람쥐,독산,M,179,50.1,인공지능 공부,남성,15.636216,저체중
1,판다,종로,F,165,48.2,인공지능 공부,여성,17.704316,저체중
2,코알라,하남,F,157,51.3,인공지능 공부,여성,20.812203,표준


In [268]:
# 컬럼 삭제 drop() 이용
df.drop('BMI type', axis=1, inplace=True) # axis 기본 값 = 0
df

Unnamed: 0,이름,위치,성별,키,체중,취미,성별(한글),BMI
0,다람쥐,독산,M,179,50.1,인공지능 공부,남성,15.636216
1,판다,종로,F,165,48.2,인공지능 공부,여성,17.704316
2,코알라,하남,F,157,51.3,인공지능 공부,여성,20.812203


## 정렬


In [269]:
df = pd.read_csv("./data/bank_client_information.csv")
df

Unnamed: 0,First Name,Last Name,Email,Postal Code,Net Worth,Years with Bank
0,Bird,Steve,bird@gmail.com,N94 3M0,5000.0,5
1,Noah,Small,nsmall@hotmail.com,N8S 14K,10000.0,6
2,Nina,Keller,azikez@gahew.mr,S1T 4E6,9072.02,7
3,Chanel,Steve,chanel@gmail.com,N7T 3E6,11072.02,10
4,Kate,Noor,kate@hotmail.com,K8N 5H6,5000.0,22
5,Samer,Mo,samer@gmail.com,J7H 3HY,100000.0,26
6,Heba,Ismail,heba.ismail@hotmail.com,K8Y 3M8,50000.0,11
7,Laila,Ahmed,Laila.a@hotmail.com,J8Y 3M0,20000.0,3
8,Joseph,Patton,daafeja@boh.jm,M6U 5U7,2629.13,1
9,Noah,Moran,guutodi@bigwoc.kw,K2D 4M9,8626.96,13


In [270]:
desc_sorted_worth = df.sort_values('Net Worth', ascending=False) # Net Worth 기준 내림차순
desc_sorted_worth

Unnamed: 0,First Name,Last Name,Email,Postal Code,Net Worth,Years with Bank
5,Samer,Mo,samer@gmail.com,J7H 3HY,100000.0,26
6,Heba,Ismail,heba.ismail@hotmail.com,K8Y 3M8,50000.0,11
7,Laila,Ahmed,Laila.a@hotmail.com,J8Y 3M0,20000.0,3
3,Chanel,Steve,chanel@gmail.com,N7T 3E6,11072.02,10
1,Noah,Small,nsmall@hotmail.com,N8S 14K,10000.0,6
2,Nina,Keller,azikez@gahew.mr,S1T 4E6,9072.02,7
9,Noah,Moran,guutodi@bigwoc.kw,K2D 4M9,8626.96,13
0,Bird,Steve,bird@gmail.com,N94 3M0,5000.0,5
4,Kate,Noor,kate@hotmail.com,K8N 5H6,5000.0,22
8,Joseph,Patton,daafeja@boh.jm,M6U 5U7,2629.13,1


In [271]:
# 오래된 고객순으로 출력
df.sort_values('Years with Bank', ascending=False)

Unnamed: 0,First Name,Last Name,Email,Postal Code,Net Worth,Years with Bank
5,Samer,Mo,samer@gmail.com,J7H 3HY,100000.0,26
4,Kate,Noor,kate@hotmail.com,K8N 5H6,5000.0,22
9,Noah,Moran,guutodi@bigwoc.kw,K2D 4M9,8626.96,13
6,Heba,Ismail,heba.ismail@hotmail.com,K8Y 3M8,50000.0,11
3,Chanel,Steve,chanel@gmail.com,N7T 3E6,11072.02,10
2,Nina,Keller,azikez@gahew.mr,S1T 4E6,9072.02,7
1,Noah,Small,nsmall@hotmail.com,N8S 14K,10000.0,6
0,Bird,Steve,bird@gmail.com,N94 3M0,5000.0,5
7,Laila,Ahmed,Laila.a@hotmail.com,J8Y 3M0,20000.0,3
8,Joseph,Patton,daafeja@boh.jm,M6U 5U7,2629.13,1


In [272]:
# 순자산이 많고 오래된 고객순으로 출력
df.sort_values(['Net Worth','Years with Bank'], ascending=[False,False])

# 정렬 기준으로 n개의 컬럼을 사용할 수 있으며,
# 이 떄 ascending 속성 역시 짝을 맞추어 n 개를 전달해야 함

Unnamed: 0,First Name,Last Name,Email,Postal Code,Net Worth,Years with Bank
5,Samer,Mo,samer@gmail.com,J7H 3HY,100000.0,26
6,Heba,Ismail,heba.ismail@hotmail.com,K8Y 3M8,50000.0,11
7,Laila,Ahmed,Laila.a@hotmail.com,J8Y 3M0,20000.0,3
3,Chanel,Steve,chanel@gmail.com,N7T 3E6,11072.02,10
1,Noah,Small,nsmall@hotmail.com,N8S 14K,10000.0,6
2,Nina,Keller,azikez@gahew.mr,S1T 4E6,9072.02,7
9,Noah,Moran,guutodi@bigwoc.kw,K2D 4M9,8626.96,13
4,Kate,Noor,kate@hotmail.com,K8N 5H6,5000.0,22
0,Bird,Steve,bird@gmail.com,N94 3M0,5000.0,5
8,Joseph,Patton,daafeja@boh.jm,M6U 5U7,2629.13,1


In [273]:
df['Rank'] = df['Net Worth'].rank(ascending=False).astype(int)
df.sort_values('Rank')

Unnamed: 0,First Name,Last Name,Email,Postal Code,Net Worth,Years with Bank,Rank
5,Samer,Mo,samer@gmail.com,J7H 3HY,100000.0,26,1
6,Heba,Ismail,heba.ismail@hotmail.com,K8Y 3M8,50000.0,11,2
7,Laila,Ahmed,Laila.a@hotmail.com,J8Y 3M0,20000.0,3,3
3,Chanel,Steve,chanel@gmail.com,N7T 3E6,11072.02,10,4
1,Noah,Small,nsmall@hotmail.com,N8S 14K,10000.0,6,5
2,Nina,Keller,azikez@gahew.mr,S1T 4E6,9072.02,7,6
9,Noah,Moran,guutodi@bigwoc.kw,K2D 4M9,8626.96,13,7
0,Bird,Steve,bird@gmail.com,N94 3M0,5000.0,5,8
4,Kate,Noor,kate@hotmail.com,K8N 5H6,5000.0,22,8
8,Joseph,Patton,daafeja@boh.jm,M6U 5U7,2629.13,1,10


# [실습] 기본 실습

In [26]:
import pandas as pd
import numpy as np

data = {
    "Name": ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    "Age": [25, 30, 35, 40, 28],
    "City": ['New York', 'Los Angeles', 'Chicago', 'Houston', 'San Francisco'],
    "Salary": [70000, 80000, 120000, 90000, 75000]
}

info_df = pd.DataFrame(data)
info_df

Unnamed: 0,Name,Age,City,Salary
0,Alice,25,New York,70000
1,Bob,30,Los Angeles,80000
2,Charlie,35,Chicago,120000
3,David,40,Houston,90000
4,Eve,28,San Francisco,75000


In [27]:
# Name과 City 열을 선택하여 출력하세요.
info_df[['Name', 'City']]

Unnamed: 0,Name,City
0,Alice,New York
1,Bob,Los Angeles
2,Charlie,Chicago
3,David,Houston
4,Eve,San Francisco


In [28]:
# 나이가 30 이상인 행을 선택하여 출력하세요.
info_df[info_df['Age'] >= 30]

Unnamed: 0,Name,Age,City,Salary
1,Bob,30,Los Angeles,80000
2,Charlie,35,Chicago,120000
3,David,40,Houston,90000


In [29]:
# Country 열을 추가하고, 모두 'USA'로 설정하세요.
info_df['Country'] = 'USA'
info_df

Unnamed: 0,Name,Age,City,Salary,Country
0,Alice,25,New York,70000,USA
1,Bob,30,Los Angeles,80000,USA
2,Charlie,35,Chicago,120000,USA
3,David,40,Houston,90000,USA
4,Eve,28,San Francisco,75000,USA


In [30]:
# 새로운 행을 추가하세요. (Name: Frank, Age: 33, City: Seattle, Salary: 85000)
add_person_df = pd.DataFrame(
    [['Frank', 33, "Seatle", 85000]],
    columns=["Name", "Age", "City", "Salary"]
)

info_df = pd.concat([info_df, add_person_df], ignore_index=True)
info_df

Unnamed: 0,Name,Age,City,Salary,Country
0,Alice,25,New York,70000,USA
1,Bob,30,Los Angeles,80000,USA
2,Charlie,35,Chicago,120000,USA
3,David,40,Houston,90000,USA
4,Eve,28,San Francisco,75000,USA
5,Frank,33,Seatle,85000,


In [31]:
# Salary 열을 삭제하세요.
info_no_salary_df = info_df.drop('Salary', axis=1)
info_no_salary_df

Unnamed: 0,Name,Age,City,Country
0,Alice,25,New York,USA
1,Bob,30,Los Angeles,USA
2,Charlie,35,Chicago,USA
3,David,40,Houston,USA
4,Eve,28,San Francisco,USA
5,Frank,33,Seatle,


In [32]:
# Age가 35 이상인 사람들의 Salary를 5% 증가시키세요.
""" 
def salary_increase(value):
    return value*1.05
"""

info_df.loc[info_df['Age']>=35, 'Salary'] = info_df['Salary']*1.05
info_df

Unnamed: 0,Name,Age,City,Salary,Country
0,Alice,25,New York,70000,USA
1,Bob,30,Los Angeles,80000,USA
2,Charlie,35,Chicago,126000,USA
3,David,40,Houston,94500,USA
4,Eve,28,San Francisco,75000,USA
5,Frank,33,Seatle,85000,


In [33]:
# 'Alice'의 City를 'Boston'으로 수정하세요.
info_df['City'] = np.where(info_df['Name']=='Alice', 'Boston', info_df['City'])
info_df

Unnamed: 0,Name,Age,City,Salary,Country
0,Alice,25,Boston,70000,USA
1,Bob,30,Los Angeles,80000,USA
2,Charlie,35,Chicago,126000,USA
3,David,40,Houston,94500,USA
4,Eve,28,San Francisco,75000,USA
5,Frank,33,Seatle,85000,


In [34]:
# Age와 Salary의 데이터 타입을 float로 변환하세요.
info_df[['Age', 'Salary']] = info_df.loc[:, ['Age','Salary']].astype(float)
info_df

Unnamed: 0,Name,Age,City,Salary,Country
0,Alice,25.0,Boston,70000.0,USA
1,Bob,30.0,Los Angeles,80000.0,USA
2,Charlie,35.0,Chicago,126000.0,USA
3,David,40.0,Houston,94500.0,USA
4,Eve,28.0,San Francisco,75000.0,USA
5,Frank,33.0,Seatle,85000.0,


In [35]:
# 'David'가 있는 행을 삭제하세요.
info_df.drop(3, inplace=True)
info_df

Unnamed: 0,Name,Age,City,Salary,Country
0,Alice,25.0,Boston,70000.0,USA
1,Bob,30.0,Los Angeles,80000.0,USA
2,Charlie,35.0,Chicago,126000.0,USA
4,Eve,28.0,San Francisco,75000.0,USA
5,Frank,33.0,Seatle,85000.0,
