In [1]:
import pandas as pd

## Load Dataset

In [2]:
pd.read_csv("transaction.csv", index_col = "Name")

Unnamed: 0_level_0,date,amount,result
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Kang,2017-01-01,500,confirmed
Kim,2017-01-03,700,confirmed
Choi,2017-01-05,800,confirmed
Park,2017-01-07,500,canceled
Lee,2017-01-09,700,confirmed
Yoon,2017-01-10,200,canceled


In [3]:
pd.read_csv("transaction.csv", index_col = "Name", nrows=3)

Unnamed: 0_level_0,date,amount,result
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Kang,2017-01-01,500,confirmed
Kim,2017-01-03,700,confirmed
Choi,2017-01-05,800,confirmed


In [4]:
# pd.read_csv("transaction.csv", index_col = "Name", usecols=["Name", "date"])
pd.read_csv("transaction.csv", index_col = "Name", usecols=[0, 1]).dtypes

date    object
dtype: object

In [5]:
pd.read_csv("transaction.csv", index_col = "Name", parse_dates=["date"]).dtypes

date      datetime64[ns]
amount             int64
result            object
dtype: object

In [6]:
transaction = pd.read_csv("transaction.csv", index_col = "Name", parse_dates=["date"])
transaction

Unnamed: 0_level_0,date,amount,result
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Kang,2017-01-01,500,confirmed
Kim,2017-01-03,700,confirmed
Choi,2017-01-05,800,confirmed
Park,2017-01-07,500,canceled
Lee,2017-01-09,700,confirmed
Yoon,2017-01-10,200,canceled


In [7]:
transaction.to_csv("transaction(result).csv")

In [8]:
transaction = pd.read_csv("transaction.tsv", sep="\t")
transaction

Unnamed: 0,Name,date,amount
0,Kang,2017-01-01,500
1,Kim,2017-01-03,700
2,Choi,2017-01-05,800
3,Park,2017-01-07,500
4,Lee,2017-01-09,700
5,Yoon,2017-01-10,200


In [9]:
# pd.read_excel("transaction.xls")

In [10]:
# pd.read_hdf("transaction.h5")

In [11]:
# transaction_url = "https://goo.gl/WhZcFA"
# pd.read_csv(transaction_url)

In [12]:
pd.read_csv("transaction.tsv", sep="\t", engine="python") # 한글같이 깨지는 경우 engine="python" 을 써주면 된다.

Unnamed: 0,Name,date,amount
0,Kang,2017-01-01,500
1,Kim,2017-01-03,700
2,Choi,2017-01-05,800
3,Park,2017-01-07,500
4,Lee,2017-01-09,700
5,Yoon,2017-01-10,200


## 데이터베이스에서 바로 읽기

In [13]:
import sqlite3

In [14]:
# pd.read_sql("SELECT * FROM 'transaction'", sqlite3.connect("pandas-transaction.db"))
connection = sqlite3.connect("pandas-transaction.db")

query = "SELECT * FROM 'transaction'"

transaction = pd.read_sql(query, connection)
transaction

Unnamed: 0,Name,date,amount,result
0,Kang,2017-01-01,500,confirmed
1,Kim,2017-01-03,700,confirmed
2,Choi,2017-01-05,800,confirmed
3,Park,2017-01-07,500,canceled
4,Lee,2017-01-09,700,confirmed
5,Yoon,2017-01-10,200,canceled


In [15]:
query = "SELECT date, amount FROM 'transaction' LIMIT 3"

transaction = pd.read_sql(query, connection)
transaction

Unnamed: 0,date,amount
0,2017-01-01,500
1,2017-01-03,700
2,2017-01-05,800


## 여러 개의 데이터를 하나로 합치기

In [16]:
transaction2017 = pd.read_csv("transaction.csv")
transaction2017.head()

Unnamed: 0,Name,date,amount,result
0,Kang,2017-01-01,500,confirmed
1,Kim,2017-01-03,700,confirmed
2,Choi,2017-01-05,800,confirmed
3,Park,2017-01-07,500,canceled
4,Lee,2017-01-09,700,confirmed


In [17]:
transaction2016 = pd.read_csv("transaction2016.csv")
transaction2016.head()

Unnamed: 0,Name,amount,date,result
0,Lee,400,2016-01-01,confirmed
1,Young,300,2016-01-05,confirmed
2,Kim,700,2016-01-06,confirmed
3,Choi,800,2016-01-07,canceled
4,Yoon,500,2016-01-11,confirmed


In [18]:
transaction2015 = pd.read_csv("transaction2015.csv")
transaction2015.head()

Unnamed: 0,Name,amount,date,result
0,Park,400,2015-01-01,confirmed
1,Kim,800,2015-01-02,confirmed
2,Choi,500,2015-01-05,canceled


In [19]:
# concat == concatenate, colums이 동일해야 함
pd.concat([transaction2015, transaction2016, transaction2017], axis=0) #리스트로 여러개의 데이터를 넣을 수 있음

Unnamed: 0,Name,amount,date,result
0,Park,400,2015-01-01,confirmed
1,Kim,800,2015-01-02,confirmed
2,Choi,500,2015-01-05,canceled
0,Lee,400,2016-01-01,confirmed
1,Young,300,2016-01-05,confirmed
2,Kim,700,2016-01-06,confirmed
3,Choi,800,2016-01-07,canceled
4,Yoon,500,2016-01-11,confirmed
5,Jang,200,2016-01-13,confirmed
6,Park,300,2016-01-20,canceled


In [20]:
transaction2015.append([transaction2016, transaction2017])

Unnamed: 0,Name,amount,date,result
0,Park,400,2015-01-01,confirmed
1,Kim,800,2015-01-02,confirmed
2,Choi,500,2015-01-05,canceled
0,Lee,400,2016-01-01,confirmed
1,Young,300,2016-01-05,confirmed
2,Kim,700,2016-01-06,confirmed
3,Choi,800,2016-01-07,canceled
4,Yoon,500,2016-01-11,confirmed
5,Jang,200,2016-01-13,confirmed
6,Park,300,2016-01-20,canceled


In [21]:
transaction2015 = transaction2015.set_index("Name")
transaction2015

Unnamed: 0_level_0,amount,date,result
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Park,400,2015-01-01,confirmed
Kim,800,2015-01-02,confirmed
Choi,500,2015-01-05,canceled


In [24]:
how2015 = [
    {'how' : "무통장 입금", 'installment' : "없음"},
    {'how' : "카드 결제", 'installment' : "0개월"},
#     {'how' : "카드 결제", 'installment' : "3개월"},
]

# how2015 = pd.DataFrame(how2015, index=["Park", "Kim", "Choi"])
how2015 = pd.DataFrame(how2015, index=["Park", "Kim"])
how2015

Unnamed: 0,how,installment
Park,무통장 입금,없음
Kim,카드 결제,0개월


In [25]:
pd.concat([transaction2015, how2015], axis=1) # index가 맞아야 붙는다.

Unnamed: 0,amount,date,result,how,installment
Choi,500,2015-01-05,canceled,,
Kim,800,2015-01-02,confirmed,카드 결제,0개월
Park,400,2015-01-01,confirmed,무통장 입금,없음
