# 1122_DS_Lab4 Pandas 數據分析套件

In [1]:
import pandas as pd
import numpy as np
import requests

# Pandas 主要資料結構介紹 - Series

### Series 為附帶index的一維numpy array(ndarray)，index是可重複的。
##### Series API Reference: https://pandas.pydata.org/pandas-docs/stable/reference/series.html

#### Series 可儲存不同類別的資料

In [2]:
s = pd.Series(['value1', 1.0, 3])
s

0    value1
1       1.0
2         3
dtype: object

In [15]:
s = pd.Series([4, 7, -5, 3])
s

0    4
1    7
2   -5
3    3
dtype: int64

In [16]:
s.values

array([ 4,  7, -5,  3], dtype=int64)

In [19]:
s = pd.Series(['Value1', 1.0, 3])
s

0    Value1
1       1.0
2         3
dtype: object

In [20]:
s[0:2] # Get value with row index

0    Value1
1       1.0
dtype: object

## Operation

#### 兩Series的Operation(+,-,/,..)會根據相同的index計算，長度不需相同。
#### 計算時只處理index在兩Series index交集的值，若不在交集中則為NaN

In [21]:
a = pd.Series([1,2,3,4,5], index=[0,1,2,3,4])
a

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [22]:
b = pd.Series([1,2,3,4,5],index=[1,2,3,4,5])
b

1    1
2    2
3    3
4    4
5    5
dtype: int64

In [23]:
a+b

0    NaN
1    3.0
2    5.0
3    7.0
4    9.0
5    NaN
dtype: float64

#### 若是出現重複的index時則會將所有組合都計算。

#### 下面的例子中 Series a 中有兩個 index 0，因此0在結果(a+b)的index中也出現了兩次，分別是2(1+1)和3(1+2)

In [25]:
a = pd.Series([1,2,3], index=[0,0,1])
b = pd.Series([1,2,3], index=[0,1,2])
print(a)
print(b)
print(a+b)

0    1
0    2
1    3
dtype: int64
0    1
1    2
2    3
dtype: int64
0    2.0
0    3.0
1    5.0
2    NaN
dtype: float64


#### 若是inedx在Series a 和 b中都重複出現時，則會如下所示。
#### index 0在 a和b中各出現了兩次，因此結果便有4個index為0的值，分別為2(1+1)、4(1+3)、3(2+1)、5(2+3)

In [27]:
a = pd.Series([1,2,3], index=[0,0,1])
b = pd.Series([1,2,3], index=[0,1,0])
print(a+b)

0    2
0    4
0    3
0    5
1    5
dtype: int64


# Pandas 主要資料結構介紹 - DataFrame
### dataframe 為二維的、大小可更動的表格資料，可視為dictionary-like的Series。
##### DataFrame API  Reference: https://pandas.pydata.org/pandas-docs/stable/reference/frame.html

In [28]:
df = pd.DataFrame({'column1':[0,1,2,3], 'column2':[4,5,6,7]}, index=[0,1,2,3]) 
df

Unnamed: 0,column1,column2
0,0,4
1,1,5
2,2,6
3,3,7


In [29]:
df['column2']

0    4
1    5
2    6
3    7
Name: column2, dtype: int64

In [32]:
df['column2'].isin([5])  # isin() 可以判斷DataFrame、Series 以及索引是否包含傳入的指定值，返回True表示有包含。

0    False
1     True
2    False
3    False
Name: column2, dtype: bool

In [33]:
df.column2.isin(['5'])

0    False
1    False
2    False
3    False
Name: column2, dtype: bool

In [36]:
df.drop? 

In [37]:
df.drop(['column2'],axis=1) # drop() 删除一行或一列

Unnamed: 0,column1
0,0
1,1
2,2
3,3


In [39]:
df=pd.DataFrame([[0,4],[1,5],[2,6],[3,7]], columns=['Column1','Column2'])
df

Unnamed: 0,Column1,Column2
0,0,4
1,1,5
2,2,6
3,3,7


In [40]:
df = pd.DataFrame({'column1':[0,1,2,3], 'column2':[4,5,6,7]}, index=[1,2,3,4])#若是沒有指定index，則index從0開始
df

Unnamed: 0,column1,column2
1,0,4
2,1,5
3,2,6
4,3,7


### 如同Series，可在同一Data Frame中儲存不同型別的資料。

In [42]:
df = pd.DataFrame({'column1':[0, 1, 2, 3], 'column2':[4.0, 'value5', 6, 7]}, index=[1, 2, 3, 4])
df

Unnamed: 0,column1,column2
1,0,4.0
2,1,value5
3,2,6
4,3,7


In [43]:
df['column1']

1    0
2    1
3    2
4    3
Name: column1, dtype: int64

In [44]:
df['column2']

1       4.0
2    value5
3         6
4         7
Name: column2, dtype: object

## Operation
#### 運算邏輯大致與Series相同，計算時以行、列兩者index皆相同者計算。

#### 兩者計算時只有column name 為'column1'且row index為[1,2,3]的值存在於兩者交集之中，
#### 因此結果為 1(0+1)、3(1+2)、5(2+3)，其餘則為空值。

In [45]:
df1 = pd.DataFrame({'column1':[0,1,2,3], 'column2':[4,5,6,7]}, index=[1,2,3,4])
df2 = pd.DataFrame({'column1':[0,1,2,3], 'column3':[4,5,6,7]})
df1

Unnamed: 0,column1,column2
1,0,4
2,1,5
3,2,6
4,3,7


In [46]:
df2

Unnamed: 0,column1,column3
0,0,4
1,1,5
2,2,6
3,3,7


In [47]:
df1+df2

Unnamed: 0,column1,column2,column3
0,,,
1,1.0,,
2,3.0,,
3,5.0,,
4,,,


## index重複時運算處理

### 若是inedx重複出現時，分為兩種情況。

### 1.重複出現次數一致時

In [48]:
df1 = pd.DataFrame({'column1':[0,1,2,3], 'column2':[4,5,6,7]}, index=[1,2,3,3])
df1

Unnamed: 0,column1,column2
1,0,4
2,1,5
3,2,6
3,3,7


In [49]:
df2 = pd.DataFrame({'column1':[0,1,2,3], 'column2':[4,5,6,7]}, index=[1,2,3,3])
df2

Unnamed: 0,column1,column2
1,0,4
2,1,5
3,2,6
3,3,7


#### 此時不像Series出現4筆資料，而是根據位置只計算兩組資料。

In [50]:
df1+df2

Unnamed: 0,column1,column2
1,0,8
2,2,10
3,4,12
3,6,14


### 2.重複出現次數不一致

In [51]:
df1 = pd.DataFrame({'column1':[0,1,2,3], 'column2':[4,5,6,7]}, index=[1,2,3,3])
df1

Unnamed: 0,column1,column2
1,0,4
2,1,5
3,2,6
3,3,7


In [52]:
df2 = pd.DataFrame({'column1':[0,1,2,3], 'column2':[4,5,6,7]}, index=[1,3,3,3])
df2

Unnamed: 0,column1,column2
1,0,4
3,1,5
3,2,6
3,3,7


#### 此時則會計算所有的組合結果

In [53]:
df1+df2 #df2中並沒有 row '2' 因此為空值

Unnamed: 0,column1,column2
1,0.0,8.0
2,,
3,3.0,11.0
3,4.0,12.0
3,5.0,13.0
3,4.0,12.0
3,5.0,13.0
3,6.0,14.0


# 資料處理
### 以上一章節中使用的股市資料示範如何利用Pandas進行資料篩選以及處理。

### 股市資料

In [55]:
date = "20240227"
url = f'https://www.twse.com.tw/exchangeReport/MI_INDEX?response=json&date={date}&type=ALLBUT0999'
response = requests.get(url)
response_json = response.json()
stock = pd.DataFrame(response_json['data9'], columns=response_json['fields9'])

In [56]:
stock.to_csv('stock_20240227.csv', index=False)

In [58]:
# 用上次閜仔的資料，使用pd.read_csv(filepath)即可
pd.read_csv('stock_20240227.csv').head()

Unnamed: 0,證券代號,證券名稱,成交股數,成交筆數,成交金額,開盤價,最高價,最低價,收盤價,漲跌(+/-),漲跌價差,最後揭示買價,最後揭示買量,最後揭示賣價,最後揭示賣量,本益比
0,50,元大台灣50,6507934,10372,933504679,143.95,144.2,142.65,143.35,<p style= color:green>-</p>,0.6,143.3,7,143.35,40,0.0
1,51,元大中型100,57037,268,4344737,76.65,77.0,75.55,76.15,<p style= color:green>-</p>,0.35,76.15,1,76.2,31,0.0
2,52,富邦科技,718096,675,103446258,145.3,145.3,143.2,144.3,<p style= color:green>-</p>,0.75,144.3,27,144.4,18,0.0
3,53,元大電子,10526,84,824634,78.65,78.65,78.25,78.3,<p style= color:green>-</p>,0.35,78.25,1,78.45,23,0.0
4,55,元大MSCI金融,227513,270,5408153,23.8,23.89,23.71,23.75,<p style= color:green>-</p>,0.05,23.74,1,23.75,8,0.0


In [60]:
stock.head()

Unnamed: 0,證券代號,證券名稱,成交股數,成交筆數,成交金額,開盤價,最高價,最低價,收盤價,漲跌(+/-),漲跌價差,最後揭示買價,最後揭示買量,最後揭示賣價,最後揭示賣量,本益比
0,50,元大台灣50,6507934,10372,933504679,143.95,144.2,142.65,143.35,<p style= color:green>-</p>,0.6,143.3,7,143.35,40,0.0
1,51,元大中型100,57037,268,4344737,76.65,77.0,75.55,76.15,<p style= color:green>-</p>,0.35,76.15,1,76.2,31,0.0
2,52,富邦科技,718096,675,103446258,145.3,145.3,143.2,144.3,<p style= color:green>-</p>,0.75,144.3,27,144.4,18,0.0
3,53,元大電子,10526,84,824634,78.65,78.65,78.25,78.3,<p style= color:green>-</p>,0.35,78.25,1,78.45,23,0.0
4,55,元大MSCI金融,227513,270,5408153,23.8,23.89,23.71,23.75,<p style= color:green>-</p>,0.05,23.74,1,23.75,8,0.0


## 資料篩選 - slice
#### 將第0到第2(3-1)筆資料取出

In [61]:
stock[0:3]

Unnamed: 0,證券代號,證券名稱,成交股數,成交筆數,成交金額,開盤價,最高價,最低價,收盤價,漲跌(+/-),漲跌價差,最後揭示買價,最後揭示買量,最後揭示賣價,最後揭示賣量,本益比
0,50,元大台灣50,6507934,10372,933504679,143.95,144.2,142.65,143.35,<p style= color:green>-</p>,0.6,143.3,7,143.35,40,0.0
1,51,元大中型100,57037,268,4344737,76.65,77.0,75.55,76.15,<p style= color:green>-</p>,0.35,76.15,1,76.2,31,0.0
2,52,富邦科技,718096,675,103446258,145.3,145.3,143.2,144.3,<p style= color:green>-</p>,0.75,144.3,27,144.4,18,0.0


### Select by location - iloc、iat
#### 取出"位置"在第二筆的資料
#### iloc[ ] 含頭不含尾

In [62]:
stock.iloc[1] #start from 0

證券代號                              0051
證券名稱                           元大中型100
成交股數                            57,037
成交筆數                               268
成交金額                         4,344,737
開盤價                              76.65
最高價                              77.00
最低價                              75.55
收盤價                              76.15
漲跌(+/-)    <p style= color:green>-</p>
漲跌價差                              0.35
最後揭示買價                           76.15
最後揭示買量                               1
最後揭示賣價                           76.20
最後揭示賣量                              31
本益比                               0.00
Name: 1, dtype: object

In [65]:
stock.iloc[1:4] 

Unnamed: 0,證券代號,證券名稱,成交股數,成交筆數,成交金額,開盤價,最高價,最低價,收盤價,漲跌(+/-),漲跌價差,最後揭示買價,最後揭示買量,最後揭示賣價,最後揭示賣量,本益比
1,51,元大中型100,57037,268,4344737,76.65,77.0,75.55,76.15,<p style= color:green>-</p>,0.35,76.15,1,76.2,31,0.0
2,52,富邦科技,718096,675,103446258,145.3,145.3,143.2,144.3,<p style= color:green>-</p>,0.75,144.3,27,144.4,18,0.0
3,53,元大電子,10526,84,824634,78.65,78.65,78.25,78.3,<p style= color:green>-</p>,0.35,78.25,1,78.45,23,0.0


#### 取出"位置"在第二筆第二行的資料
#### iat[ , ] 只能訪問單個元素

In [64]:
stock.iat[1,1] #start from 0

'元大中型100'

### Slice by index - loc
#### 取出row index為 0到2的值
#### loc[ ] 含頭也含尾

In [66]:
stock.loc[0:2]

Unnamed: 0,證券代號,證券名稱,成交股數,成交筆數,成交金額,開盤價,最高價,最低價,收盤價,漲跌(+/-),漲跌價差,最後揭示買價,最後揭示買量,最後揭示賣價,最後揭示賣量,本益比
0,50,元大台灣50,6507934,10372,933504679,143.95,144.2,142.65,143.35,<p style= color:green>-</p>,0.6,143.3,7,143.35,40,0.0
1,51,元大中型100,57037,268,4344737,76.65,77.0,75.55,76.15,<p style= color:green>-</p>,0.35,76.15,1,76.2,31,0.0
2,52,富邦科技,718096,675,103446258,145.3,145.3,143.2,144.3,<p style= color:green>-</p>,0.75,144.3,27,144.4,18,0.0


取出row index從0到3且column index從"開盤價"到"收盤價"的值

In [67]:
stock.loc[0:3, "開盤價":"收盤價"]

Unnamed: 0,開盤價,最高價,最低價,收盤價
0,143.95,144.2,142.65,143.35
1,76.65,77.0,75.55,76.15
2,145.3,145.3,143.2,144.3
3,78.65,78.65,78.25,78.3


### 資料篩選 - Mask
#### 透過 loc 以Mask的方式擷取所需的資料

取出row index小於5的資料

In [68]:
stock.loc[stock.index < 5]

Unnamed: 0,證券代號,證券名稱,成交股數,成交筆數,成交金額,開盤價,最高價,最低價,收盤價,漲跌(+/-),漲跌價差,最後揭示買價,最後揭示買量,最後揭示賣價,最後揭示賣量,本益比
0,50,元大台灣50,6507934,10372,933504679,143.95,144.2,142.65,143.35,<p style= color:green>-</p>,0.6,143.3,7,143.35,40,0.0
1,51,元大中型100,57037,268,4344737,76.65,77.0,75.55,76.15,<p style= color:green>-</p>,0.35,76.15,1,76.2,31,0.0
2,52,富邦科技,718096,675,103446258,145.3,145.3,143.2,144.3,<p style= color:green>-</p>,0.75,144.3,27,144.4,18,0.0
3,53,元大電子,10526,84,824634,78.65,78.65,78.25,78.3,<p style= color:green>-</p>,0.35,78.25,1,78.45,23,0.0
4,55,元大MSCI金融,227513,270,5408153,23.8,23.89,23.71,23.75,<p style= color:green>-</p>,0.05,23.74,1,23.75,8,0.0


複數條件篩選 - row index 大於5且小於10的資料

In [69]:
stock.loc[(stock.index > 5) & (stock.index < 10)]

Unnamed: 0,證券代號,證券名稱,成交股數,成交筆數,成交金額,開盤價,最高價,最低價,收盤價,漲跌(+/-),漲跌價差,最後揭示買價,最後揭示買量,最後揭示賣價,最後揭示賣量,本益比
6,57,富邦摩台,14018,105,1504493,107.75,107.9,106.65,106.95,<p style= color:green>-</p>,0.6,107.0,25,107.25,20,0.0
7,61,元大寶滬深,457635,201,7562068,16.58,16.61,16.45,16.56,<p style= color:red>+</p>,0.05,16.55,4,16.56,3,0.0
8,6203,元大MSCI台灣,6301,64,435791,69.3,69.3,69.0,69.0,<p style= color:green>-</p>,0.3,68.7,1,69.0,1,0.0
9,6204,永豐臺灣加權,4876,96,457604,93.75,93.75,93.75,93.75,<p style= color:green>-</p>,0.4,93.35,50,93.75,32,0.0


## 型別轉換
#### 在資料中"最高價"被儲存為字串，我們可以將其轉換為數值方便計算。

In [70]:
print(type(stock.最高價[0]))
stock.最高價[0]

<class 'str'>


'144.20'

#### 使用astype將字串轉為float

In [71]:
stock.最高價.head().astype("float")

0    144.20
1     77.00
2    145.30
3     78.65
4     23.89
Name: 最高價, dtype: float64

##### could not convert string to float: '--'
#### 在轉換的過程中出現無法解析的字串時便會發生錯誤

In [72]:
stock.最高價.astype("float")

ValueError: could not convert string to float: '--'

### 使用to_numeric進行轉換，errors參數可指定解析錯誤時的處理，
#### 　　raise:  中斷執行，拋出例外。
#### 　　ignore: 不進行轉換，回傳原值。
#### 　　corece: 回傳 NaN

In [73]:
stock.最高價 = pd.to_numeric(stock.最高價, errors='coerce', downcast='float')

In [75]:
stock.最高價.isnull()  # 判斷缺失值

0       False
1       False
2       False
3       False
4       False
        ...  
1221    False
1222    False
1223    False
1224    False
1225    False
Name: 最高價, Length: 1226, dtype: bool

In [76]:
stock.最高價[stock.最高價.isnull()]#查看所有轉換後為NaN的值

14     NaN
29     NaN
77     NaN
166    NaN
169    NaN
170    NaN
175    NaN
184    NaN
327    NaN
419    NaN
478    NaN
555    NaN
664    NaN
701    NaN
722    NaN
803    NaN
816    NaN
837    NaN
939    NaN
1027   NaN
1077   NaN
1209   NaN
Name: 最高價, dtype: float32

## 使用describe快速了解資料
#### describe根據資料型別，計算簡單的統計結果。

In [77]:
stock.最高價.describe()

count    1204.000000
mean       70.298210
std       100.772469
min         1.180000
25%        19.887501
50%        36.895000
75%        74.025002
max       970.000000
Name: 最高價, dtype: float64

# 資料取代
在"漲跌(+/-)"中資料被儲存為HTML格式，我們將其取代為boolean格式以方便計算。

### 透過使用value_counts查看有多少種不同的值以及其出現次數。

##### *空值將不會出現以及計算出現次數。*

In [78]:
stock['漲跌(+/-)'].value_counts()

漲跌(+/-)
<p style= color:green>-</p>    799
<p style= color:red>+</p>      300
<p> </p>                       116
<p>X</p>                        11
Name: count, dtype: int64

### 取代資料中的 + 資料為True
### 使用 replace() 進行批次替換

In [82]:
stock['漲跌(+/-)'].replace('<p style= color:red>+</p>', True).head(10)

0    <p style= color:green>-</p>
1    <p style= color:green>-</p>
2    <p style= color:green>-</p>
3    <p style= color:green>-</p>
4    <p style= color:green>-</p>
5    <p style= color:green>-</p>
6    <p style= color:green>-</p>
7                           True
8    <p style= color:green>-</p>
9    <p style= color:green>-</p>
Name: 漲跌(+/-), dtype: object

### 也可一次指定多筆取代操作

In [83]:
replace_map = {"<p style= color:red>+</p>":True, "<p style= color:green>-</p>":False}
stock['漲跌(+/-)'].replace(replace_map).value_counts()

漲跌(+/-)
False       799
True        300
<p> </p>    116
<p>X</p>     11
Name: count, dtype: int64

## 使用map取代資料
### replace只將所有符合的字串取代
### 可利用map將其餘值轉換為NaN方便計算

In [84]:
replace_map = {"<p style= color:red>+</p>":True, "<p style= color:green>-</p>":False, True:True, False:False}
#keep True as True, False as False

stock['漲跌(+/-)'] = stock['漲跌(+/-)'].map(replace_map)
stock['漲跌(+/-)'].value_counts() #not include nan

漲跌(+/-)
False    799
True     300
Name: count, dtype: int64

### 使用isnull計算空值數量

In [85]:
stock['漲跌(+/-)'].isnull().sum()

127

### 使用isnull構成mask查詢'漲跌(+/-)'為空值的資料，若是想要查詢'漲跌(+/-)'非空值的資料可使用notnull。

In [86]:
stock[stock['漲跌(+/-)'].isnull()]

Unnamed: 0,證券代號,證券名稱,成交股數,成交筆數,成交金額,開盤價,最高價,最低價,收盤價,漲跌(+/-),漲跌價差,最後揭示買價,最後揭示買量,最後揭示賣價,最後揭示賣量,本益比
12,006207,復華滬深,568317,188,12255599,21.51,21.709999,21.51,21.71,,0.00,21.71,1,21.72,155,0.00
14,00625K,富邦上証+R,0,0,0,--,,--,--,,0.00,6.54,5,6.59,5,0.00
18,00634R,富邦上証反1,150000,30,722270,4.81,4.830000,4.80,4.80,,0.00,4.80,774,4.82,499,0.00
29,00643K,群益深証中小+R,0,0,0,--,,--,--,,0.00,2.50,2,2.56,1,0.00
72,00700,富邦恒生國企,770208,249,8586725,11.19,11.240000,11.08,11.21,,0.00,11.21,6,11.22,3,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1188,9902,台火,144065,103,1856854,12.90,13.000000,12.85,12.90,,0.00,12.90,7,12.95,8,61.43
1203,9925,新保,340783,203,13681173,40.10,40.200001,40.10,40.15,,0.00,40.15,2,40.20,14,16.66
1209,9931,欣高,186,9,7063,--,,--,--,,0.00,38.50,2,38.95,1,29.84
1218,9941A,裕融甲特,77070,19,3861307,50.10,50.200001,50.10,50.10,,0.00,50.00,47,50.10,7,0.00


In [87]:
stock['漲跌(+/-)'].notnull().sum()

1099

In [89]:
stock[stock['漲跌(+/-)'].notnull()].head(10)

Unnamed: 0,證券代號,證券名稱,成交股數,成交筆數,成交金額,開盤價,最高價,最低價,收盤價,漲跌(+/-),漲跌價差,最後揭示買價,最後揭示買量,最後揭示賣價,最後揭示賣量,本益比
0,50,元大台灣50,6507934,10372,933504679,143.95,144.199997,142.65,143.35,False,0.6,143.3,7,143.35,40,0.0
1,51,元大中型100,57037,268,4344737,76.65,77.0,75.55,76.15,False,0.35,76.15,1,76.2,31,0.0
2,52,富邦科技,718096,675,103446258,145.3,145.300003,143.2,144.3,False,0.75,144.3,27,144.4,18,0.0
3,53,元大電子,10526,84,824634,78.65,78.650002,78.25,78.3,False,0.35,78.25,1,78.45,23,0.0
4,55,元大MSCI金融,227513,270,5408153,23.8,23.889999,23.71,23.75,False,0.05,23.74,1,23.75,8,0.0
5,56,元大高股息,22779907,17409,855248893,37.68,37.830002,37.25,37.51,False,0.17,37.51,31,37.52,2,0.0
6,57,富邦摩台,14018,105,1504493,107.75,107.900002,106.65,106.95,False,0.6,107.0,25,107.25,20,0.0
7,61,元大寶滬深,457635,201,7562068,16.58,16.610001,16.45,16.56,True,0.05,16.55,4,16.56,3,0.0
8,6203,元大MSCI台灣,6301,64,435791,69.3,69.300003,69.0,69.0,False,0.3,68.7,1,69.0,1,0.0
9,6204,永豐臺灣加權,4876,96,457604,93.75,93.75,93.75,93.75,False,0.4,93.35,50,93.75,32,0.0


## apply介紹
### 使用apply將函式套用在所有資料上
#### 上表中我們發現資料集中還是有很多不能直接轉換為數值的字串( 例如超過1000元有含有"',"的符號  1,000 )
#### 透過apply一次將所有可轉為數值的資料進行轉換。

In [93]:
data = stock.loc[:, "成交股數" : "收盤價"]
data.head()

Unnamed: 0,成交股數,成交筆數,成交金額,開盤價,最高價,最低價,收盤價
0,6507934,10372,933504679,143.95,144.199997,142.65,143.35
1,57037,268,4344737,76.65,77.0,75.55,76.15
2,718096,675,103446258,145.3,145.300003,143.2,144.3
3,10526,84,824634,78.65,78.650002,78.25,78.3
4,227513,270,5408153,23.8,23.889999,23.71,23.75


#### replace 參數regex=true 使用正則表達式搜尋並且取代

In [91]:
replace_map = {",":"", "--":np.nan}
data = data.replace(replace_map, regex=True)
data.head()

Unnamed: 0,成交股數,成交筆數,成交金額,開盤價,最高價,最低價,收盤價
0,6507934,10372,933504679,143.95,144.199997,142.65,143.35
1,57037,268,4344737,76.65,77.0,75.55,76.15
2,718096,675,103446258,145.3,145.300003,143.2,144.3
3,10526,84,824634,78.65,78.650002,78.25,78.3
4,227513,270,5408153,23.8,23.889999,23.71,23.75


In [97]:
data = data.apply(pd.to_numeric, errors='coerce') #errors為to_numeric之參數
data.head()

Unnamed: 0,成交股數,成交筆數,成交金額,開盤價,最高價,最低價,收盤價
0,,,,143.95,144.199997,142.65,143.35
1,,268.0,,76.65,77.0,75.55,76.15
2,,675.0,,145.3,145.300003,143.2,144.3
3,,84.0,,78.65,78.650002,78.25,78.3
4,,270.0,,23.8,23.889999,23.71,23.75


## 練習: 請問2024/3/18日台灣股市收盤價每一股超過1000元的股票有幾家公司? 這幾家每股超過1000元的平均收盤價是多少? 目前股王是哪一家公司? 