# 2. DataFrame
> class pandas.DataFrame(data=None, index=None, columns=None, dtype=None, copy=None

Two-dimensional, size-mutable, potentially heterogeneous tabular data.

Data structure also contains labeled axes (rows and columns). Arithmetic operations align on both row and column labels. Can be thought of as a dict-like container for Series objects. The primary pandas data structure.

https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html

## Prepare Data

In [107]:
data = {
    'Name' : ['Tom','Noah','Lucas','Mia','Amelia','Sophia', 'James', 'Ava'],
    'Height' : [175, 190, 185, 165, 170, 160, 183, 172],
    'Weight' : [80, 90, 83, 50, 56, 60, 72, 55],
    'School' : ['MIT', 'MIT', 'Harvard', 'MIT', 'Harvard', 'MIT', 'Harvard', 'MIT'],
    'Gender' : ['M', 'M', 'M', 'F', 'F', 'F', 'M', 'F'],
    'Python' : [80, 90, 30, 66, 72, 93, 100, 76],
    'Java' : [75, 92, 25, 54, 77, 95, 97, 73],
    'Javascript' : [77, 95, 15, 59, 62, 90, 100, 70],
    'C#' : [97, 87, 23, 61, 74, 88, 95, 66],
    'C++' : [92, 80, 13, 54, 72, 85, 91, 68],
    'HTML' :[72, 93, 50, 54, 69, 82, 99, 73],
}
data

{'Name': ['Tom', 'Noah', 'Lucas', 'Mia', 'Amelia', 'Sophia', 'James', 'Ava'],
 'Height': [175, 190, 185, 165, 170, 160, 183, 172],
 'Weight': [80, 90, 83, 50, 56, 60, 72, 55],
 'School': ['MIT',
  'MIT',
  'Harvard',
  'MIT',
  'Harvard',
  'MIT',
  'Harvard',
  'MIT'],
 'Gender': ['M', 'M', 'M', 'F', 'F', 'F', 'M', 'F'],
 'Python': [80, 90, 30, 66, 72, 93, 100, 76],
 'Java': [75, 92, 25, 54, 77, 95, 97, 73],
 'Javascript': [77, 95, 15, 59, 62, 90, 100, 70],
 'C#': [97, 87, 23, 61, 74, 88, 95, 66],
 'C++': [92, 80, 13, 54, 72, 85, 91, 68],
 'HTML': [72, 93, 50, 54, 69, 82, 99, 73]}

In [108]:
data['Name']

['Tom', 'Noah', 'Lucas', 'Mia', 'Amelia', 'Sophia', 'James', 'Ava']

In [109]:
data['School']

['MIT', 'MIT', 'Harvard', 'MIT', 'Harvard', 'MIT', 'Harvard', 'MIT']

## Constructing DataFrame from a dictionary.

In [110]:
import pandas as pd
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Height,Weight,School,Gender,Python,Java,Javascript,C#,C++,HTML
0,Tom,175,80,MIT,M,80,75,77,97,92,72
1,Noah,190,90,MIT,M,90,92,95,87,80,93
2,Lucas,185,83,Harvard,M,30,25,15,23,13,50
3,Mia,165,50,MIT,F,66,54,59,61,54,54
4,Amelia,170,56,Harvard,F,72,77,62,74,72,69
5,Sophia,160,60,MIT,F,93,95,90,88,85,82
6,James,183,72,Harvard,M,100,97,100,95,91,99
7,Ava,172,55,MIT,F,76,73,70,66,68,73


In [111]:
df.dtypes # 데이터 타입 확인

Name          object
Height         int64
Weight         int64
School        object
Gender        object
Python         int64
Java           int64
Javascript     int64
C#             int64
C++            int64
HTML           int64
dtype: object

## DataFrame 객체에 Index 지정

In [112]:
df = pd.DataFrame(data, index = ['No.1','No.2','No.3','No.4','No.5','No.6','No.7','No.8'])
df

Unnamed: 0,Name,Height,Weight,School,Gender,Python,Java,Javascript,C#,C++,HTML
No.1,Tom,175,80,MIT,M,80,75,77,97,92,72
No.2,Noah,190,90,MIT,M,90,92,95,87,80,93
No.3,Lucas,185,83,Harvard,M,30,25,15,23,13,50
No.4,Mia,165,50,MIT,F,66,54,59,61,54,54
No.5,Amelia,170,56,Harvard,F,72,77,62,74,72,69
No.6,Sophia,160,60,MIT,F,93,95,90,88,85,82
No.7,James,183,72,Harvard,M,100,97,100,95,91,99
No.8,Ava,172,55,MIT,F,76,73,70,66,68,73


In [113]:
df.index

Index(['No.1', 'No.2', 'No.3', 'No.4', 'No.5', 'No.6', 'No.7', 'No.8'], dtype='object')

In [114]:
df.columns

Index(['Name', 'Height', 'Weight', 'School', 'Gender', 'Python', 'Java',
       'Javascript', 'C#', 'C++', 'HTML'],
      dtype='object')

## 특정 데이터에 접근

In [115]:
df['Name']

No.1       Tom
No.2      Noah
No.3     Lucas
No.4       Mia
No.5    Amelia
No.6    Sophia
No.7     James
No.8       Ava
Name: Name, dtype: object

In [116]:
df[['Name','School']]

Unnamed: 0,Name,School
No.1,Tom,MIT
No.2,Noah,MIT
No.3,Lucas,Harvard
No.4,Mia,MIT
No.5,Amelia,Harvard
No.6,Sophia,MIT
No.7,James,Harvard
No.8,Ava,MIT


In [117]:
# 원하는 Columns만을 가지고 DataFrame 객체를 생성할 수 있다.
tmp = pd.DataFrame(data, columns=['Name','Gender','School'])
tmp

Unnamed: 0,Name,Gender,School
0,Tom,M,MIT
1,Noah,M,MIT
2,Lucas,M,Harvard
3,Mia,F,MIT
4,Amelia,F,Harvard
5,Sophia,F,MIT
6,James,M,Harvard
7,Ava,F,MIT


# DataFrame 파일 저장 및 불러오기

## .csv 파일로 저장

In [118]:
df.to_csv('info.csv', index=False) # 앞서 설정한 Index 빼고 저장
# df.to_csv('info.csv',encoding='utf-8-sig') # 데이터에 한글이 있다면, encoding을 해주어야 한다.

## .txt 파일로 저장

In [119]:
df.to_csv('info.txt', sep='\t', index=False) # 분리자 (tap으로 구분 된 텍스트 파일)

## .xlsx 파일로 저장

In [120]:
df.to_excel('info.xlsx', index=False)

***
## .csv 파일 열기

In [121]:
df = pd.read_csv('info.csv')
df

Unnamed: 0,Name,Height,Weight,School,Gender,Python,Java,Javascript,C#,C++,HTML
0,Tom,175,80,MIT,M,80,75,77,97,92,72
1,Noah,190,90,MIT,M,90,92,95,87,80,93
2,Lucas,185,83,Harvard,M,30,25,15,23,13,50
3,Mia,165,50,MIT,F,66,54,59,61,54,54
4,Amelia,170,56,Harvard,F,72,77,62,74,72,69
5,Sophia,160,60,MIT,F,93,95,90,88,85,82
6,James,183,72,Harvard,M,100,97,100,95,91,99
7,Ava,172,55,MIT,F,76,73,70,66,68,73


In [122]:
df = pd.read_csv('info.csv', skiprows=3) # 3개의 row 스킵하고 불러옴
df

Unnamed: 0,Lucas,185,83,Harvard,M,30,25,15,23,13,50
0,Mia,165,50,MIT,F,66,54,59,61,54,54
1,Amelia,170,56,Harvard,F,72,77,62,74,72,69
2,Sophia,160,60,MIT,F,93,95,90,88,85,82
3,James,183,72,Harvard,M,100,97,100,95,91,99
4,Ava,172,55,MIT,F,76,73,70,66,68,73


In [123]:
df = pd.read_csv('info.csv', skiprows=[1,2,6]) # 지정한 row 스킵하고 불러옴
df

Unnamed: 0,Name,Height,Weight,School,Gender,Python,Java,Javascript,C#,C++,HTML
0,Lucas,185,83,Harvard,M,30,25,15,23,13,50
1,Mia,165,50,MIT,F,66,54,59,61,54,54
2,Amelia,170,56,Harvard,F,72,77,62,74,72,69
3,James,183,72,Harvard,M,100,97,100,95,91,99
4,Ava,172,55,MIT,F,76,73,70,66,68,73


In [124]:
df = pd.read_csv('info.csv', skiprows=3, nrows=4) # 처음 3개 row스킵 후 4개의 row불러옴
df

Unnamed: 0,Lucas,185,83,Harvard,M,30,25,15,23,13,50
0,Mia,165,50,MIT,F,66,54,59,61,54,54
1,Amelia,170,56,Harvard,F,72,77,62,74,72,69
2,Sophia,160,60,MIT,F,93,95,90,88,85,82
3,James,183,72,Harvard,M,100,97,100,95,91,99


## .txt 파일 열기

In [125]:
df = pd.read_csv('info.txt', sep='\t') # 분리자로 구분하여 저장했기에 sep옵션을 추가하여 불러옴.
df

Unnamed: 0,Name,Height,Weight,School,Gender,Python,Java,Javascript,C#,C++,HTML
0,Tom,175,80,MIT,M,80,75,77,97,92,72
1,Noah,190,90,MIT,M,90,92,95,87,80,93
2,Lucas,185,83,Harvard,M,30,25,15,23,13,50
3,Mia,165,50,MIT,F,66,54,59,61,54,54
4,Amelia,170,56,Harvard,F,72,77,62,74,72,69
5,Sophia,160,60,MIT,F,93,95,90,88,85,82
6,James,183,72,Harvard,M,100,97,100,95,91,99
7,Ava,172,55,MIT,F,76,73,70,66,68,73


## .xlsx 파일 열기

- 해당 기능은 잠재적인 안정성 이슈때문에 지원을 중단했으며, 사용하고 싶으면 openpyxl 라이브러리를 사용해야 한다.

In [106]:
# df = pd.read_excel('info.xlsx')