# 📍 데이터수정
### ✅ 엑셀로 열기
: ```pd.read_excel('파일명.xlsx', index_col='column')```

In [1]:
import pandas as pd

df = pd.read_excel('score.xlsx', index_col='지원번호') # index 설정
df

Unnamed: 0_level_0,이름,학교,키,국어,영어,수학,과학,사회,SW특기
지원번호,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1번,채치수,북산고,197,90,85,100,95,85,Python
2번,정대만,북산고,184,40,35,50,55,25,Java
3번,송태섭,북산고,168,80,75,70,80,75,Javascript
4번,서태웅,북산고,187,40,60,70,75,80,
5번,강백호,북산고,188,15,20,10,35,10,
6번,변덕규,능남고,202,80,100,95,85,80,C
7번,황태산,능남고,188,55,65,45,40,35,PYTHON
8번,윤대협,능남고,190,100,85,90,95,95,C#


### ✅ Column 데이터 수정
: ```df['column'].replace({'old_column' : 'new_column'})```

In [2]:
# 북산고는 상북고로 수정
df['학교'].replace({'북산고':'상북고'}, inplace=True)
df

Unnamed: 0_level_0,이름,학교,키,국어,영어,수학,과학,사회,SW특기
지원번호,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1번,채치수,상북고,197,90,85,100,95,85,Python
2번,정대만,상북고,184,40,35,50,55,25,Java
3번,송태섭,상북고,168,80,75,70,80,75,Javascript
4번,서태웅,상북고,187,40,60,70,75,80,
5번,강백호,상북고,188,15,20,10,35,10,
6번,변덕규,능남고,202,80,100,95,85,80,C
7번,황태산,능남고,188,55,65,45,40,35,PYTHON
8번,윤대협,능남고,190,100,85,90,95,95,C#


### ✅ 소문자 대체
: ```df.str.lower()```

In [3]:
df['SW특기'] = df['SW특기'].str.lower()
df

Unnamed: 0_level_0,이름,학교,키,국어,영어,수학,과학,사회,SW특기
지원번호,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1번,채치수,상북고,197,90,85,100,95,85,python
2번,정대만,상북고,184,40,35,50,55,25,java
3번,송태섭,상북고,168,80,75,70,80,75,javascript
4번,서태웅,상북고,187,40,60,70,75,80,
5번,강백호,상북고,188,15,20,10,35,10,
6번,변덕규,능남고,202,80,100,95,85,80,c
7번,황태산,능남고,188,55,65,45,40,35,python
8번,윤대협,능남고,190,100,85,90,95,95,c#


### ✅ str 합
: ```'str' + 'str'```

In [4]:
df['학교'] = df['학교'] + '등학교'
df

Unnamed: 0_level_0,이름,학교,키,국어,영어,수학,과학,사회,SW특기
지원번호,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1번,채치수,상북고등학교,197,90,85,100,95,85,python
2번,정대만,상북고등학교,184,40,35,50,55,25,java
3번,송태섭,상북고등학교,168,80,75,70,80,75,javascript
4번,서태웅,상북고등학교,187,40,60,70,75,80,
5번,강백호,상북고등학교,188,15,20,10,35,10,
6번,변덕규,능남고등학교,202,80,100,95,85,80,c
7번,황태산,능남고등학교,188,55,65,45,40,35,python
8번,윤대협,능남고등학교,190,100,85,90,95,95,c#


### ✅ 함수적용
: ```df['column'].apply(function)```

In [5]:
# 데이터타입 불일치(int + str)로 error 발생
df['키'] = df['키'] + 'cm'
df

UFuncTypeError: ufunc 'add' did not contain a loop with signature matching types (dtype('int64'), dtype('<U2')) -> None

In [6]:
# 키 뒤에 cm를 붙이는 함수
def add_cm(height):
    return str(height) + 'cm'

df['키'] = df['키'].apply(add_cm) # 키 데이터에 대해 add_cm함수를 호출한 결과 데이터에 반영
df

Unnamed: 0_level_0,이름,학교,키,국어,영어,수학,과학,사회,SW특기
지원번호,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1번,채치수,상북고등학교,197cm,90,85,100,95,85,python
2번,정대만,상북고등학교,184cm,40,35,50,55,25,java
3번,송태섭,상북고등학교,168cm,80,75,70,80,75,javascript
4번,서태웅,상북고등학교,187cm,40,60,70,75,80,
5번,강백호,상북고등학교,188cm,15,20,10,35,10,
6번,변덕규,능남고등학교,202cm,80,100,95,85,80,c
7번,황태산,능남고등학교,188cm,55,65,45,40,35,python
8번,윤대협,능남고등학교,190cm,100,85,90,95,95,c#


In [7]:
# 첫 글자는 대문자로, 나머지는 소문자로
def capitalize(lang):
    if pd.notnull(lang):
        return lang.capitalize()
    return lang

df['SW특기'] = df['SW특기'].apply(capitalize)
df

Unnamed: 0_level_0,이름,학교,키,국어,영어,수학,과학,사회,SW특기
지원번호,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1번,채치수,상북고등학교,197cm,90,85,100,95,85,Python
2번,정대만,상북고등학교,184cm,40,35,50,55,25,Java
3번,송태섭,상북고등학교,168cm,80,75,70,80,75,Javascript
4번,서태웅,상북고등학교,187cm,40,60,70,75,80,
5번,강백호,상북고등학교,188cm,15,20,10,35,10,
6번,변덕규,능남고등학교,202cm,80,100,95,85,80,C
7번,황태산,능남고등학교,188cm,55,65,45,40,35,Python
8번,윤대협,능남고등학교,190cm,100,85,90,95,95,C#


### ✅ Cell 수정
: ```df.loc['index', 'column'] = data```

In [8]:
df = pd.read_excel('score.xlsx', index_col='지원번호')
df.loc['4번', 'SW특기'] = 'Python' # 4번 학생의 SW특기 데이터를 Python으로 변경
df

Unnamed: 0_level_0,이름,학교,키,국어,영어,수학,과학,사회,SW특기
지원번호,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1번,채치수,북산고,197,90,85,100,95,85,Python
2번,정대만,북산고,184,40,35,50,55,25,Java
3번,송태섭,북산고,168,80,75,70,80,75,Javascript
4번,서태웅,북산고,187,40,60,70,75,80,Python
5번,강백호,북산고,188,15,20,10,35,10,
6번,변덕규,능남고,202,80,100,95,85,80,C
7번,황태산,능남고,188,55,65,45,40,35,PYTHON
8번,윤대협,능남고,190,100,85,90,95,95,C#


In [9]:
df.loc['5번', ['학교', 'SW특기']] = ['능남고', 'C'] # 5번 학생의 학교는 능남고, SW특기는 C로 수정
df

Unnamed: 0_level_0,이름,학교,키,국어,영어,수학,과학,사회,SW특기
지원번호,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1번,채치수,북산고,197,90,85,100,95,85,Python
2번,정대만,북산고,184,40,35,50,55,25,Java
3번,송태섭,북산고,168,80,75,70,80,75,Javascript
4번,서태웅,북산고,187,40,60,70,75,80,Python
5번,강백호,능남고,188,15,20,10,35,10,C
6번,변덕규,능남고,202,80,100,95,85,80,C
7번,황태산,능남고,188,55,65,45,40,35,PYTHON
8번,윤대협,능남고,190,100,85,90,95,95,C#


### ✅ Column 순서 변경
: ```df[[new_column_list]]```

In [10]:
cols = list(df.columns)
cols

['이름', '학교', '키', '국어', '영어', '수학', '과학', '사회', 'SW특기']

In [11]:
# 맨 뒤에 있는 SW특기 column을 앞으로 가져오고 나머지 column들과 합쳐서 순서 변경
df = df[[cols[-1]] + cols[0:-1]]
df

Unnamed: 0_level_0,SW특기,이름,학교,키,국어,영어,수학,과학,사회
지원번호,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1번,Python,채치수,북산고,197,90,85,100,95,85
2번,Java,정대만,북산고,184,40,35,50,55,25
3번,Javascript,송태섭,북산고,168,80,75,70,80,75
4번,Python,서태웅,북산고,187,40,60,70,75,80
5번,C,강백호,능남고,188,15,20,10,35,10
6번,C,변덕규,능남고,202,80,100,95,85,80
7번,PYTHON,황태산,능남고,188,55,65,45,40,35
8번,C#,윤대협,능남고,190,100,85,90,95,95


### ✅ Column 이름 변경
: ```df.columns = [new_column_list]```

In [12]:
df = df[['이름', '학교']]
df

Unnamed: 0_level_0,이름,학교
지원번호,Unnamed: 1_level_1,Unnamed: 2_level_1
1번,채치수,북산고
2번,정대만,북산고
3번,송태섭,북산고
4번,서태웅,북산고
5번,강백호,능남고
6번,변덕규,능남고
7번,황태산,능남고
8번,윤대협,능남고


In [13]:
df.columns

Index(['이름', '학교'], dtype='object')

In [14]:
df.columns = ['Name', 'School']
df

Unnamed: 0_level_0,Name,School
지원번호,Unnamed: 1_level_1,Unnamed: 2_level_1
1번,채치수,북산고
2번,정대만,북산고
3번,송태섭,북산고
4번,서태웅,북산고
5번,강백호,능남고
6번,변덕규,능남고
7번,황태산,능남고
8번,윤대협,능남고
