## Defining DataFrame

In [1]:
import pandas as pd
import numpy as np


In [2]:
dataset = np.array([['kor', 70], ['math', 80]])
df = pd.DataFrame(dataset, columns=['class', 'score'])
df

Unnamed: 0,class,score
0,kor,70
1,math,80


## other ways

In [3]:
df = pd.DataFrame([['kor', 70], ['math', 80]], columns=['class', 'score'])
df = pd.DataFrame({'class': ['kor', 'math'], 'score': [70, 80]})
df

Unnamed: 0,class,score
0,kor,70
1,math,80


##  Defining Series

In [4]:
SR = pd.Series({'idx 1': 10, 'idx 2': 20}, name = 'num')
SR

idx 1    10
idx 2    20
Name: num, dtype: int64

## Read & Save DataFrame

In [5]:
filepath = 'data/iris.csv'
data = pd.read_csv(filepath, na_values='NA', encoding='utf8')

In [6]:
data.to_csv('result.csv', header=True, index=True, encoding='utf8') 

## Print DataFrame

In [7]:
from sklearn.datasets import load_iris
iris = load_iris()
iris = pd.DataFrame(iris.data, columns=iris.feature_names)
iris

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [8]:
iris.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [9]:
iris.tail()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3
149,5.9,3.0,5.1,1.8


## Print All cols & rows

In [10]:
# pd.set_option('display.max_columns', None)

# pd.set_option('display.max_rows', None)

## Basic stats of DataFrame 

In [11]:
iris.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
dtypes: float64(4)
memory usage: 4.8 KB


In [12]:
iris.describe()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


## DataFrame Index

In [13]:
df

Unnamed: 0,class,score
0,kor,70
1,math,80


In [14]:
df.index

RangeIndex(start=0, stop=2, step=1)

In [15]:
list(df.index)

[0, 1]

In [16]:
df.index = ['A', 'B']

In [17]:
df.index

Index(['A', 'B'], dtype='object')

In [18]:
df

Unnamed: 0,class,score
A,kor,70
B,math,80


## set_index()

**DataFrame.set_index(keys, drop=True, append=False, inplace=True)**

In [19]:
df.set_index('class', drop=True, append=False, inplace=True)
df

Unnamed: 0_level_0,score
class,Unnamed: 1_level_1
kor,70
math,80


## reset_index()

**DataFrame.reset_index(drop=False, inplace=False)**

In [20]:
df.reset_index(drop=False, inplace=True)
df

Unnamed: 0,class,score
0,kor,70
1,math,80


## DataFrame columns

In [21]:
iris.columns

Index(['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',
       'petal width (cm)'],
      dtype='object')

In [22]:
iris.columns = ['sepal lenth', 'sepal width', 'petal length', 'petal width']
iris

Unnamed: 0,sepal lenth,sepal width,petal length,petal width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


## replace() 

**DataFrame.columns.str.replace('A', 'B')**

In [23]:
iris.columns = iris.columns.str.replace(' ', '_')
iris.head()

Unnamed: 0,sepal_lenth,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


## Data type - cols

In [24]:
iris.dtypes

sepal_lenth     float64
sepal_width     float64
petal_length    float64
petal_width     float64
dtype: object

In [26]:
iris['sepal_length'] = iris['sepal_width'].astype('int')
iris[['sepal_width', 'petal_length']] = iris[['sepal_width', 'petal_length']].astype('int')
iris.head()

Unnamed: 0,sepal_lenth,sepal_width,petal_length,petal_width,sepal_length
0,5.1,3,1,0.2,3
1,4.9,3,1,0.2,3
2,4.7,3,1,0.2,3
3,4.6,3,1,0.2,3
4,5.0,3,1,0.2,3
