# `Pandas DataFrame - 資料處理與轉換的重要好夥伴`
### 作者：徐子皓
***

## 基本操作

### 導入pandas套件包、原始資料集

In [1]:
import pandas as pd
transaction = pd.read_csv('CH2-10_transaction.csv')
transaction

Unnamed: 0,tid,uid,product,quantity,price
0,T0001,1,lemon,5,12.4
1,T0002,5,banana,6,42.16
2,T0003,4,orange,4,6.2
3,T0003,3,cherry,3,74.4
4,T0003,3,guava,2,24.8
5,T0004,2,banana,10,42.16
6,T0004,2,orange,5,6.2
7,T0004,2,guava,2,24.8
8,T0005,5,orange,4,6.2
9,T0005,5,lemon,5,12.4


### 製作字典(dictionary)格式的資料，並轉換為pandas dataframe

In [2]:
data = {'uid':[1,2,3,4,5],
        'name':['Howard','Lily','Kai',
                'Jojo','Ivan'],
        'age':[25,21,35,18,15]}
data

{'uid': [1, 2, 3, 4, 5],
 'name': ['Howard', 'Lily', 'Kai', 'Jojo', 'Ivan'],
 'age': [25, 21, 35, 18, 15]}

In [3]:
member = pd.DataFrame(data)
member

Unnamed: 0,uid,name,age
0,1,Howard,25
1,2,Lily,21
2,3,Kai,35
3,4,Jojo,18
4,5,Ivan,15


### 查看資料集頭五筆資料

In [4]:
member.head()

Unnamed: 0,uid,name,age
0,1,Howard,25
1,2,Lily,21
2,3,Kai,35
3,4,Jojo,18
4,5,Ivan,15


### 查看資料集特定欄位內容

In [5]:
member['name']

0    Howard
1      Lily
2       Kai
3      Jojo
4      Ivan
Name: name, dtype: object

In [6]:
member[ ['name','age'] ]

Unnamed: 0,name,age
0,Howard,25
1,Lily,21
2,Kai,35
3,Jojo,18
4,Ivan,15


### 基本統計指標

In [7]:
# 取平均
member['age'].mean() 

22.8

In [8]:
# 完整統計指標
member['age'].describe()

count     5.000000
mean     22.800000
std       7.758866
min      15.000000
25%      18.000000
50%      21.000000
75%      25.000000
max      35.000000
Name: age, dtype: float64

## 資料排序

### 遞增排序

In [9]:
member['age'].sort_values()

4    15
3    18
1    21
0    25
2    35
Name: age, dtype: int64

### 遞減排序

In [10]:
member['age'].sort_values(ascending = False)

2    35
0    25
1    21
3    18
4    15
Name: age, dtype: int64

### 根據特定欄位進行排序

In [11]:
member.sort_values(['age'])

Unnamed: 0,uid,name,age
4,5,Ivan,15
3,4,Jojo,18
1,2,Lily,21
0,1,Howard,25
2,3,Kai,35


## 進階應用

### 刪除欄位

In [12]:
member2 = member.drop(columns=['uid'])
member2

Unnamed: 0,name,age
0,Howard,25
1,Lily,21
2,Kai,35
3,Jojo,18
4,Ivan,15


### 判斷資料是否符合條件

In [13]:
transaction['product'] == 'lemon'

0     True
1    False
2    False
3    False
4    False
5    False
6    False
7    False
8    False
9     True
Name: product, dtype: bool

### 篩選符合條件的資料

In [14]:
transaction[transaction['product'] == 'lemon']

Unnamed: 0,tid,uid,product,quantity,price
0,T0001,1,lemon,5,12.4
9,T0005,5,lemon,5,12.4


### 將資料集轉換回串列型態

In [15]:
member.values.tolist()

[[1, 'Howard', 25],
 [2, 'Lily', 21],
 [3, 'Kai', 35],
 [4, 'Jojo', 18],
 [5, 'Ivan', 15]]