## 판다스?

테이터프레임(엑셀과 비슷) 과 시리즈라는 자료형  
데이터 분석을 위한 다양한 기능을 제공하는 라이브러리  
R의 데이터프레임에 영향  
내부적으로 numpy 를 사용하므로 함께 import  

http://pandas.pydata.org

기능요약  
https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf


# Series

In [1]:
import pandas as pd
import numpy as np

In [2]:
x = [1, 2, 3, 4, 5]
pd.Series(x)

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [3]:
x = np.array([1, 2, 3, 4, 5])
pd.Series(x)

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [4]:
x = [1, 2, 3, 4, 5]
x = pd.Series(x)

print(x[0])
print(x[1: 3])
print(x[::-1])
print(x[x > 3])
print(x + 1)
print(x * 10)
print(x + x)

1
1    2
2    3
dtype: int64
4    5
3    4
2    3
1    2
0    1
dtype: int64
3    4
4    5
dtype: int64
0    2
1    3
2    4
3    5
4    6
dtype: int64
0    10
1    20
2    30
3    40
4    50
dtype: int64
0     2
1     4
2     6
3     8
4    10
dtype: int64


# Series Index

In [5]:
import pandas as pd
import numpy as np

x = [1, 2, 3, 4, 5]
x = pd.Series(x)
print(x.index)
print(x.values)

RangeIndex(start=0, stop=5, step=1)
[1 2 3 4 5]


In [6]:
x = [1, 2, 3, 4, 5]
x = pd.Series(x, index=['a','b','c','d','e']) #인덱스설정
print(x)

print(x['a']) #명시적 인덱스접근
print(x[0]) #묵시적 인덱스접근
print(x[['a','e']]) #팬시색인, 한번에 여러값 접근
print(x.a)

a    1
b    2
c    3
d    4
e    5
dtype: int64
1
1
a    1
e    5
dtype: int64
1


In [7]:
x = [1, 2, 3, 4, 5]
x = pd.Series(x, index=[1, 2, 3, 4, 5])

# x[0] 오류
print(x.iloc[0]) #묵시적 인덱스로만
print(x.loc[1]) #명시적 인덱스로만

1
1


## 딕셔너리 to Series

In [8]:
x = {"수학":90, "영어":80, "과학":95, "미술":80}
x = pd.Series(x)
x

수학    90
영어    80
과학    95
미술    80
dtype: int64

In [9]:
print(x['수학'])

90


In [10]:
print(x['영어':])

영어    80
과학    95
미술    80
dtype: int64


In [11]:
x = {"수학":90, "영어":80, "과학":95, "미술":80}
x = pd.Series(x, index=["수학", "영어", "과학"])
x

수학    90
영어    80
과학    95
dtype: int64

## Multi Index

In [12]:
student_1 = {"수학":90, "영어":80, "과학":95, "미술":80}
student_2 = {"수학":70, "영어":90, "과학":100, "미술":70}

#index_1 = ['홍길동','홍길동','홍길동','홍길동','이몽룡','이몽룡','이몽룡','이몽룡']
index_1 = ['홍길동' for i in range(len(student_1))] + ['이몽룡' for i in range(len(student_2))]

#index_2 = ['수학','영어','과학','미술','수학','영어','과학','미술']
index_2 = [i for i in student_1] + [i for i in student_2]

value_all = list(student_1.values()) + list(student_2.values())

students = pd.Series(value_all, index=[index_1, index_2])
students

홍길동  수학     90
     영어     80
     과학     95
     미술     80
이몽룡  수학     70
     영어     90
     과학    100
     미술     70
dtype: int64

In [13]:
students['홍길동']

수학    90
영어    80
과학    95
미술    80
dtype: int64

In [14]:
students['이몽룡']

수학     70
영어     90
과학    100
미술     70
dtype: int64

## 결측값 처리방법 (NaN. None)

In [64]:
x = [1, None, 2, None, 3, 4, None]
x = pd.Series(x)
print(x)

print(x.sum())
print(x.max())
print(x.min())

#결측값 개수
print(x.isnull().sum())
print(x.notnull().sum())

#결측값 제거
print(x.dropna())

#결측값을 다른값으로 채우기
x.fillna(0) 

0    1.0
1    NaN
2    2.0
3    NaN
4    3.0
5    4.0
6    NaN
dtype: float64
10.0
4.0
1.0
3
4
0    1.0
2    2.0
4    3.0
5    4.0
dtype: float64


0    1.0
1    0.0
2    2.0
3    0.0
4    3.0
5    4.0
6    0.0
dtype: float64

0    1
1    2
2    3
3    4
4    5
dtype: int64

## concat

In [16]:
x = pd.Series([1, 2, 3])
y = pd.Series([4, 5, 6])
z = pd.Series([7, 8, 9])

pd.concat([x, y, z])

0    1
1    2
2    3
0    4
1    5
2    6
0    7
1    8
2    9
dtype: int64

In [18]:
pd.concat([x, y, z], verify_integrity=True) #같은 값의 인덱스가 있을경우 오류

ValueError: Indexes have overlapping values: Int64Index([0, 1, 2], dtype='int64')

In [19]:
pd.concat([x, y, z], verify_integrity=False, ignore_index=True)

0    1
1    2
2    3
3    4
4    5
5    6
6    7
7    8
8    9
dtype: int64

In [20]:
pd.concat([x, y, z], verify_integrity=False, ignore_index=True, axis=1)

Unnamed: 0,0,1,2
0,1,4,7
1,2,5,8
2,3,6,9


In [21]:
x = pd.Series([1, 2, 3, 4])
y = pd.Series([4, 5, 6])
z = pd.Series([7, 8, 9])

In [22]:
pd.concat([x, y, z], verify_integrity=False, ignore_index=True, axis=1, join='inner')

Unnamed: 0,0,1,2
0,1,4,7
1,2,5,8
2,3,6,9


In [23]:
pd.concat([x, y, z], verify_integrity=False, ignore_index=True, axis=1, join='outer')

Unnamed: 0,0,1,2
0,1,4.0,7.0
1,2,5.0,8.0
2,3,6.0,9.0
3,4,,


## 연산과 집계함수

In [65]:
x = pd.Series([1, 2, 3, 4, 5])
y = pd.Series([6, 7, 8, 9, 0])

#더하기
#print(x.add(10))
print(x.add(y))

#빼기
print(x.sub(y))

#곱하기
print(x.mul(y))

#나누기
print(x.floordiv(2))
print(x.div(2))
print(x.mod(2))

#제곱
print(x.pow(2))

#기초통계
print(x.count())
print(x.min())
print(x.max())
print(x.mean())
print(x.median()) #중간값
print(x.sum())
print(x.std()) # 표준편차
print(x.var()) # 분산
print(x.mad()) # 절대표준편차
print(x.describe()) #기초통계모두

print(x.head(2)) #앞의 일부데이터 확인
print(x.tail(2)) #뒤의 일부데이터 확인

5
1
5
3.0
3.0
15
1.5811388300841898
2.5
1.2
count    5.000000
mean     3.000000
std      1.581139
min      1.000000
25%      2.000000
50%      3.000000
75%      4.000000
max      5.000000
dtype: float64
0    1
1    2
dtype: int64
3    4
4    5
dtype: int64


## 실습1

In [11]:
x = [np.nan,1,2,3,4,5]
y = pd.Series([x[np.random.randint(0,6)] for i in range(20)])
x = pd.Series(x)
x=x.dropna()
x=pd.Series(x)
y=y.fillna(y.mean())
print(y)

0     5.000000
1     4.000000
2     1.000000
3     4.000000
4     3.000000
5     5.000000
6     2.000000
7     2.000000
8     3.000000
9     5.000000
10    1.000000
11    2.882353
12    1.000000
13    2.882353
14    4.000000
15    4.000000
16    3.000000
17    2.882353
18    1.000000
19    1.000000
dtype: float64


# DataFrame

In [25]:
sales_data = {    
    '연도':[2015, 2016, 2017, 2018, 2019, 2020],
    '판매량':[103, 70, 130, 160, 190, 230],
    '매출':[500000, 300000, 400000, 550000, 700000, 680000],
    '순이익':[370000, 190000, 300000, 480000, 600000, 590000]
}

sales_data = pd.DataFrame(sales_data)
sales_data

Unnamed: 0,연도,판매량,매출,순이익
0,2015,103,500000,370000
1,2016,70,300000,190000
2,2017,130,400000,300000
3,2018,160,550000,480000
4,2019,190,700000,600000
5,2020,230,680000,590000


In [26]:
sales_data['판매량']

0    103
1     70
2    130
3    160
4    190
5    230
Name: 판매량, dtype: int64

In [27]:
sales_data.iloc[1]

연도       2016
판매량        70
매출     300000
순이익    190000
Name: 1, dtype: int64

In [28]:
sales_data = {    
    '연도':[2015, 2016, 2017, 2018, 2019, 2020],
    '판매량':[103, 70, 130, 160, 190, 230],
    '매출':[500000, 300000, 400000, 550000, 700000, 680000],
    '순이익':[370000, 190000, 300000, 480000, 600000, 590000]
}

pd.DataFrame(sales_data, columns=['판매량','매출','순이익'], index=sales_data['연도'])

Unnamed: 0,판매량,매출,순이익
2015,103,500000,370000
2016,70,300000,190000
2017,130,400000,300000
2018,160,550000,480000
2019,190,700000,600000
2020,230,680000,590000


## 파일생성

In [29]:
%%writefile sales_data.csv
연도,판매량,매출,순이익
2015,103,500000,370000
2016,70,300000,190000
2017,130,400000,300000
2018,60,550000,480000
2019,190,700000,600000
2020,230,680000,590000

Writing sales_data.csv


## CSV 파일읽기

In [76]:
sales_data = pd.read_csv('sales_data.csv', index_col='연도', header=0, sep=',')
sales_data

Unnamed: 0_level_0,판매량,매출,순이익
연도,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015,103,500000,370000
2016,70,300000,190000
2017,130,400000,300000
2018,60,550000,480000
2019,190,700000,600000
2020,230,680000,590000


## CSV 파일쓰기

In [31]:
sales_data.to_csv('sales_data_save.csv', encoding='utf-8-sig')

## 데이터 추가

In [71]:
sales_data = {    
    '연도':[2015, 2016, 2017, 2018, 2019, 2020],
    '판매량':[103, 70, 130, 160, 190, 230],
    '매출':[500000, 300000, 400000, 550000, 700000, 680000],
    '순이익':[370000, 190000, 300000, 480000, 600000, 590000]
}

sales_data = pd.DataFrame(sales_data)
sales_data


Unnamed: 0,연도,판매량,매출,순이익
0,2015,103,500000,370000
1,2016,70,300000,190000
2,2017,130,400000,300000
3,2018,160,550000,480000
4,2019,190,700000,600000
5,2020,230,680000,590000


In [73]:
sales_data['순이익율'] = (sales_data['순이익']/sales_data['매출']) * 100
sales_data

Unnamed: 0,연도,판매량,매출,순이익,순이익율
0,2015,103,500000,370000,74.0
1,2016,70,300000,190000,63.333333
2,2017,130,400000,300000,75.0
3,2018,160,550000,480000,87.272727
4,2019,190,700000,600000,85.714286
5,2020,230,680000,590000,86.764706


In [74]:
def check(n):
    if n > 80:
        return '높음'
    else:
        return '낮음'

sales_data['순이익율_비교'] = sales_data['순이익율'].apply(check)
#sales_data['순이익율_비교'] = sales_data['순이익율'].apply(lambda n: '높음' if n > 80 else '낮음')
sales_data

Unnamed: 0,연도,판매량,매출,순이익,순이익율,순이익율_비교
0,2015,103,500000,370000,74.0,낮음
1,2016,70,300000,190000,63.333333,낮음
2,2017,130,400000,300000,75.0,낮음
3,2018,160,550000,480000,87.272727,높음
4,2019,190,700000,600000,85.714286,높음
5,2020,230,680000,590000,86.764706,높음


## 데이터 추가 2

In [35]:
sales_data = {    
    '연도':[2015, 2016, 2017, 2018, 2019, 2020],
    '판매량':[103, 70, 130, 160, 190, 230],
    '매출':[500000, 300000, 400000, 550000, 700000, 680000],
    '순이익':[370000, 190000, 300000, 480000, 600000, 590000]
}

sales_data = pd.DataFrame(sales_data)
sales_data

Unnamed: 0,연도,판매량,매출,순이익
0,2015,103,500000,370000
1,2016,70,300000,190000
2,2017,130,400000,300000
3,2018,160,550000,480000
4,2019,190,700000,600000
5,2020,230,680000,590000


In [36]:
sales_data[sales_data['매출'] > 300000]

Unnamed: 0,연도,판매량,매출,순이익
0,2015,103,500000,370000
2,2017,130,400000,300000
3,2018,160,550000,480000
4,2019,190,700000,600000
5,2020,230,680000,590000


In [82]:
sales_data['테스트1'] = np.where(sales_data['판매량'] > 200, 0, sales_data['판매량'])
sales_data

Unnamed: 0_level_0,판매량,매출,순이익,테스트1
연도,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015,103,500000,370000,103
2016,70,300000,190000,70
2017,130,400000,300000,130
2018,60,550000,480000,60
2019,190,700000,600000,190
2020,230,680000,590000,0


In [38]:
sales_data['테스트2'] = sales_data[sales_data['판매량'] < 100]['판매량'] + 50
sales_data

Unnamed: 0,연도,판매량,매출,순이익,테스트1,테스트2
0,2015,103,500000,370000,103,
1,2016,70,300000,190000,70,120.0
2,2017,130,400000,300000,130,
3,2018,160,550000,480000,160,
4,2019,190,700000,600000,190,
5,2020,230,680000,590000,0,


In [39]:
sales_data.loc[6] = [2021, 720000,650000, 360, 0, 0]
sales_data

Unnamed: 0,연도,판매량,매출,순이익,테스트1,테스트2
0,2015,103,500000,370000,103,
1,2016,70,300000,190000,70,120.0
2,2017,130,400000,300000,130,
3,2018,160,550000,480000,160,
4,2019,190,700000,600000,190,
5,2020,230,680000,590000,0,
6,2021,720000,650000,360,0,0.0


In [40]:
sales_data.loc[7] = sales_data.loc[5] + 100
sales_data

Unnamed: 0,연도,판매량,매출,순이익,테스트1,테스트2
0,2015.0,103.0,500000.0,370000.0,103.0,
1,2016.0,70.0,300000.0,190000.0,70.0,120.0
2,2017.0,130.0,400000.0,300000.0,130.0,
3,2018.0,160.0,550000.0,480000.0,160.0,
4,2019.0,190.0,700000.0,600000.0,190.0,
5,2020.0,230.0,680000.0,590000.0,0.0,
6,2021.0,720000.0,650000.0,360.0,0.0,0.0
7,2120.0,330.0,680100.0,590100.0,100.0,


## 데이터 삭제

In [41]:
sales_data = {    
    '연도':[2015, 2016, 2017, 2018, 2019, 2020],
    '판매량':[103, 70, 130, 160, 190, 230],
    '매출':[500000, 300000, 400000, 550000, 700000, 680000],
    '순이익':[370000, 190000, 300000, 480000, 600000, 590000]
}

sales_data = pd.DataFrame(sales_data)
sales_data['테스트1'] = 'test1'
sales_data['테스트2'] = 'test2'
sales_data['테스트3'] = 'test3'

sales_data

Unnamed: 0,연도,판매량,매출,순이익,테스트1,테스트2,테스트3
0,2015,103,500000,370000,test1,test2,test3
1,2016,70,300000,190000,test1,test2,test3
2,2017,130,400000,300000,test1,test2,test3
3,2018,160,550000,480000,test1,test2,test3
4,2019,190,700000,600000,test1,test2,test3
5,2020,230,680000,590000,test1,test2,test3


In [42]:
del sales_data['테스트1']
sales_data

Unnamed: 0,연도,판매량,매출,순이익,테스트2,테스트3
0,2015,103,500000,370000,test2,test3
1,2016,70,300000,190000,test2,test3
2,2017,130,400000,300000,test2,test3
3,2018,160,550000,480000,test2,test3
4,2019,190,700000,600000,test2,test3
5,2020,230,680000,590000,test2,test3


In [43]:
sales_data.drop(['테스트2'], axis='columns', inplace=True)
sales_data

Unnamed: 0,연도,판매량,매출,순이익,테스트3
0,2015,103,500000,370000,test3
1,2016,70,300000,190000,test3
2,2017,130,400000,300000,test3
3,2018,160,550000,480000,test3
4,2019,190,700000,600000,test3
5,2020,230,680000,590000,test3


In [44]:
sales_data.drop(['테스트3'], axis='columns', inplace=True)
sales_data

Unnamed: 0,연도,판매량,매출,순이익
0,2015,103,500000,370000
1,2016,70,300000,190000
2,2017,130,400000,300000
3,2018,160,550000,480000
4,2019,190,700000,600000
5,2020,230,680000,590000


In [45]:
sales_data.drop(sales_data.columns[[0, 2]], axis='columns', inplace=True)
sales_data

Unnamed: 0,판매량,순이익
0,103,370000
1,70,190000
2,130,300000
3,160,480000
4,190,600000
5,230,590000


In [46]:
sales_data.drop(0, inplace=True)
sales_data

Unnamed: 0,판매량,순이익
1,70,190000
2,130,300000
3,160,480000
4,190,600000
5,230,590000


In [47]:
sales_data.drop([3, 4, 5], inplace=True)
sales_data

Unnamed: 0,판매량,순이익
1,70,190000
2,130,300000


## Dataframe MultiIndex

In [4]:
df = pd.DataFrame(np.random.randint(1, 100, size=(4, 4)), index=[['A','A','B','B'],['a','b','a','b']], columns=[['가가','가가','나나','나나'],['가','나','가','나']])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,가가,가가,나나,나나
Unnamed: 0_level_1,Unnamed: 1_level_1,가,나,가,나
A,a,83,66,72,63
A,b,20,38,42,18
B,a,85,83,8,90
B,b,87,89,7,4


In [5]:
df['가가']

Unnamed: 0,Unnamed: 1,가,나
A,a,83,66
A,b,20,38
B,a,85,83
B,b,87,89


In [9]:
df.loc['A']

Unnamed: 0_level_0,가가,가가,나나,나나
Unnamed: 0_level_1,가,나,가,나
a,83,66,72,63
b,20,38,42,18


In [10]:
df.index

MultiIndex([('A', 'a'),
            ('A', 'b'),
            ('B', 'a'),
            ('B', 'b')],
           )

In [52]:
df.columns

MultiIndex([('가가', '가'),
            ('가가', '나'),
            ('나나', '가'),
            ('나나', '나')],
           )

## GroupBy

In [25]:
df = pd.DataFrame(np.random.randint(1, 100, size=(8, 2)), index=[['A창고','A창고','A창고','A창고','B창고','B창고','B창고','B창고'],['사과','배','바나나','사과','사과','배','바나나','배']], columns=['판매','재고'])
df.index.names = ['창고명','상품명']
df

Unnamed: 0_level_0,Unnamed: 1_level_0,판매,재고
창고명,상품명,Unnamed: 2_level_1,Unnamed: 3_level_1
A창고,사과,99,96
A창고,배,79,17
A창고,바나나,74,6
A창고,사과,20,60
B창고,사과,96,20
B창고,배,77,2
B창고,바나나,40,8
B창고,배,15,66


In [26]:
df.groupby('창고명').sum()

Unnamed: 0_level_0,판매,재고
창고명,Unnamed: 1_level_1,Unnamed: 2_level_1
A창고,272,179
B창고,228,96


In [27]:
df.groupby('창고명').max()

Unnamed: 0_level_0,판매,재고
창고명,Unnamed: 1_level_1,Unnamed: 2_level_1
A창고,99,96
B창고,96,66


In [28]:
df.groupby('창고명').min()

Unnamed: 0_level_0,판매,재고
창고명,Unnamed: 1_level_1,Unnamed: 2_level_1
A창고,20,6
B창고,15,2


In [31]:
df.groupby('상품명').sum()

Unnamed: 0_level_0,판매,재고
상품명,Unnamed: 1_level_1,Unnamed: 2_level_1
바나나,114,14
배,171,85
사과,215,176


In [36]:
df.groupby(['창고명','상품명']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,판매,재고
창고명,상품명,Unnamed: 2_level_1,Unnamed: 3_level_1
A창고,바나나,74,6
A창고,배,79,17
A창고,사과,119,156
B창고,바나나,40,8
B창고,배,92,68
B창고,사과,96,20


## sort_values

In [17]:
df = pd.DataFrame(np.random.randint(1, 100, size=(8, 2)), index=[['A창고','A창고','A창고','A창고','B창고','B창고','B창고','B창고'],['사과','배','바나나','사과','사과','배','바나나','배']], columns=['판매','재고'])
df

Unnamed: 0,Unnamed: 1,판매,재고
A창고,사과,55,77
A창고,배,14,98
A창고,바나나,32,70
A창고,사과,55,35
B창고,사과,83,79
B창고,배,59,73
B창고,바나나,49,76
B창고,배,8,71


In [22]:
df.sort_values(by="판매", ascending=True)

Unnamed: 0,Unnamed: 1,판매,재고
B창고,배,8,71
A창고,배,14,98
A창고,바나나,32,70
B창고,바나나,49,76
A창고,사과,55,77
A창고,사과,55,35
B창고,배,59,73
B창고,사과,83,79


In [20]:
df.sort_values(by=["판매", "재고"], ascending=[True, False])

Unnamed: 0,Unnamed: 1,판매,재고
B창고,사과,83,79
B창고,배,59,73
A창고,사과,55,35
A창고,사과,55,77
B창고,바나나,49,76
A창고,바나나,32,70
A창고,배,14,98
B창고,배,8,71


In [44]:
df.loc["A창고","사과"] = (81,79)
df

  df.loc["A창고","사과"] = (81,79)


Unnamed: 0_level_0,Unnamed: 1_level_0,판매,재고
창고명,상품명,Unnamed: 2_level_1,Unnamed: 3_level_1
A창고,사과,81,79
A창고,배,79,17
A창고,바나나,74,6
A창고,사과,81,79
B창고,사과,96,20
B창고,배,77,2
B창고,바나나,40,8
B창고,배,15,66


## 실습2

In [1]:
import pandas as pd
import numpy as np

school = pd.DataFrame(np.random.randint(1, 100, size=(15, 3)), index=[['1학년','1학년','1학년','1학년','1학년','2학년','2학년','2학년','2학년','2학년','3학년','3학년','3학년','3학년','3학년'],['1반','2반','3반','4반','5반','1반','2반','3반','4반','5반','1반','2반','3반','4반','5반']], columns=['국어','영어','과학'])
school

Unnamed: 0,Unnamed: 1,국어,영어,과학
1학년,1반,58,12,53
1학년,2반,60,81,18
1학년,3반,23,56,47
1학년,4반,36,84,21
1학년,5반,24,89,58
2학년,1반,2,26,1
2학년,2반,12,92,2
2학년,3반,87,4,46
2학년,4반,97,30,52
2학년,5반,15,33,36


In [2]:
school['총점'] = school['국어']+school['영어']+school['과학']
school

Unnamed: 0,Unnamed: 1,국어,영어,과학,총점
1학년,1반,58,12,53,123
1학년,2반,60,81,18,159
1학년,3반,23,56,47,126
1학년,4반,36,84,21,141
1학년,5반,24,89,58,171
2학년,1반,2,26,1,29
2학년,2반,12,92,2,106
2학년,3반,87,4,46,137
2학년,4반,97,30,52,179
2학년,5반,15,33,36,84


In [3]:
school['평균']=(school['국어']+school['영어']+school['과학']) / 3
school

Unnamed: 0,Unnamed: 1,국어,영어,과학,총점,평균
1학년,1반,58,12,53,123,41.0
1학년,2반,60,81,18,159,53.0
1학년,3반,23,56,47,126,42.0
1학년,4반,36,84,21,141,47.0
1학년,5반,24,89,58,171,57.0
2학년,1반,2,26,1,29,9.666667
2학년,2반,12,92,2,106,35.333333
2학년,3반,87,4,46,137,45.666667
2학년,4반,97,30,52,179,59.666667
2학년,5반,15,33,36,84,28.0


In [5]:
school.index.names=('학년','')
school.groupby('학년').sum()

Unnamed: 0_level_0,국어,영어,과학,총점,평균
학년,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1학년,201,322,197,720,240.0
2학년,213,185,137,535,178.333333
3학년,357,263,134,754,251.333333


## 실습3

[31mERROR: Invalid requirement: '–c'[0m[31m
[0mNote: you may need to restart the kernel to use updated packages.
