# Chapter 5 使用Pandas

## 算數運算與資料對齊

In [1]:
import pandas as pd

In [2]:
import numpy as np

In [3]:
s1= pd.Series([3,6,9,12],index=['b','a','d','c'])

In [4]:
s2= pd.Series([4,8,12,16,20],index=['a','b','c','e','f'])

In [5]:
s1

b     3
a     6
d     9
c    12
dtype: int64

In [6]:
s2

a     4
b     8
c    12
e    16
f    20
dtype: int64

In [7]:
s1+s2

a    10.0
b    11.0
c    24.0
d     NaN
e     NaN
f     NaN
dtype: float64

### 資料沒對上，相加的值就會是NaN。

In [8]:
df1= pd.DataFrame(np.arange(9).reshape((3,3)), columns=list('bcd'),index=['sun','moon','lake'])

In [9]:
df2= pd.DataFrame(np.arange(12).reshape((4,3)), columns=list('bde'),index=['chocolate','sun','moon','cake'])

In [10]:
df1

Unnamed: 0,b,c,d
sun,0,1,2
moon,3,4,5
lake,6,7,8


In [11]:
df2

Unnamed: 0,b,d,e
chocolate,0,1,2
sun,3,4,5
moon,6,7,8
cake,9,10,11


In [12]:
df1+df2

Unnamed: 0,b,c,d,e
cake,,,,
chocolate,,,,
lake,,,,
moon,9.0,,12.0,
sun,3.0,,6.0,


### 只有moon, sun, b,d 產生交集的地方有數字，其餘都是NaN。

In [13]:
df1= pd.DataFrame({'A':[1,2]})

In [14]:
df2= pd.DataFrame({'B':[3,4]})

In [15]:
df1

Unnamed: 0,A
0,1
1,2


In [16]:
df2

Unnamed: 0,B
0,3
1,4


In [17]:
df1-df2

Unnamed: 0,A,B
0,,
1,,


### 算數運算與填值

In [18]:
df1= pd.DataFrame(np.arange(12).reshape(3,4),columns=list('abcd'))

In [19]:
df2= pd.DataFrame(np.arange(20).reshape(4,5),columns=list('abcde'))

In [20]:
df1

Unnamed: 0,a,b,c,d
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11


In [21]:
df2.loc[1,'b',]=np.nan
df2

Unnamed: 0,a,b,c,d,e
0,0,1.0,2,3,4
1,5,,7,8,9
2,10,11.0,12,13,14
3,15,16.0,17,18,19


In [22]:
df1+df2

Unnamed: 0,a,b,c,d,e
0,0.0,2.0,4.0,6.0,
1,9.0,,13.0,15.0,
2,18.0,20.0,22.0,24.0,
3,,,,,


### add(frame, fill_value=?) ：可以將NaN的地方，用0取代。

In [23]:
df1.add(df2,fill_value=0)

Unnamed: 0,a,b,c,d,e
0,0.0,2.0,4.0,6.0,4.0
1,9.0,5.0,13.0,15.0,9.0
2,18.0,20.0,22.0,24.0,14.0
3,15.0,16.0,17.0,18.0,19.0


### 倒數: 1/df1=df1.rdiv(1)

In [24]:
1/df1

Unnamed: 0,a,b,c,d
0,inf,1.0,0.5,0.333333
1,0.25,0.2,0.166667,0.142857
2,0.125,0.111111,0.1,0.090909


In [25]:
df1.rdiv(1)

Unnamed: 0,a,b,c,d
0,inf,1.0,0.5,0.333333
1,0.25,0.2,0.166667,0.142857
2,0.125,0.111111,0.1,0.090909


###可以透過reindex把欄位數跟df2切齊，並且透過fill_value，把NaN改為0。

In [26]:
df1.reindex(columns=df2.columns, fill_value=0)

Unnamed: 0,a,b,c,d,e
0,0,1,2,3,0
1,4,5,6,7,0
2,8,9,10,11,0


### frame的加減運算

In [27]:
df1.add(df2)

Unnamed: 0,a,b,c,d,e
0,0.0,2.0,4.0,6.0,
1,9.0,,13.0,15.0,
2,18.0,20.0,22.0,24.0,
3,,,,,


In [28]:
df1.radd(df2)

Unnamed: 0,a,b,c,d,e
0,0.0,2.0,4.0,6.0,
1,9.0,,13.0,15.0,
2,18.0,20.0,22.0,24.0,
3,,,,,


In [29]:
df1.sub(df2) ### 減法

Unnamed: 0,a,b,c,d,e
0,0.0,0.0,0.0,0.0,
1,-1.0,,-1.0,-1.0,
2,-2.0,-2.0,-2.0,-2.0,
3,,,,,


In [30]:
df1.rsub(df2)

Unnamed: 0,a,b,c,d,e
0,0.0,0.0,0.0,0.0,
1,1.0,,1.0,1.0,
2,2.0,2.0,2.0,2.0,
3,,,,,


In [31]:
df1.div(df2) ### 除法

Unnamed: 0,a,b,c,d,e
0,,1.0,1.0,1.0,
1,0.8,,0.857143,0.875,
2,0.8,0.818182,0.833333,0.846154,
3,,,,,


In [32]:
df1.floordiv(df2) ### 整除

Unnamed: 0,a,b,c,d,e
0,,1.0,1.0,1.0,
1,0.0,,0.0,0.0,
2,0.0,0.0,0.0,0.0,
3,,,,,


In [33]:
df1.mul(df2) ### 相乘

Unnamed: 0,a,b,c,d,e
0,0.0,1.0,4.0,9.0,
1,20.0,,42.0,56.0,
2,80.0,99.0,120.0,143.0,
3,,,,,


In [34]:
df1.pow(df2) ### 取冪

Unnamed: 0,a,b,c,d,e
0,1.0,1.0,4.0,27.0,
1,1024.0,,279936.0,5764801.0,
2,1073742000.0,31381060000.0,1000000000000.0,34522710000000.0,
3,,,,,


### 在DataFrame與Series之間的運算

In [35]:
arr=np.arange(12).reshape(3,4)
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [36]:
arr[0]

array([0, 1, 2, 3])

In [37]:
arr-arr[0]

array([[0, 0, 0, 0],
       [4, 4, 4, 4],
       [8, 8, 8, 8]])

### 當進行減法時，每一列都會進行一次，這個動作被稱為廣播（broadcasting)。

In [38]:
frame=pd.DataFrame(np.arange(12).reshape(4,3),columns=list('bde'),index=['Light','Dark','Full','Empty'])

In [39]:
series=frame.iloc[0]

In [40]:
frame

Unnamed: 0,b,d,e
Light,0,1,2
Dark,3,4,5
Full,6,7,8
Empty,9,10,11


In [41]:
series

b    0
d    1
e    2
Name: Light, dtype: int64

In [42]:
frame-series  ### 每一列都被減一次，稱為廣播。

Unnamed: 0,b,d,e
Light,0,0,0
Dark,3,3,3
Full,6,6,6
Empty,9,9,9


In [43]:
series2=pd.Series(range(3),index=['b','e','f'])

In [44]:
frame+series2

Unnamed: 0,b,d,e,f
Light,0.0,,3.0,
Dark,3.0,,6.0,
Full,6.0,,9.0,
Empty,9.0,,12.0,


In [45]:
series3=frame['d']

In [46]:
frame

Unnamed: 0,b,d,e
Light,0,1,2
Dark,3,4,5
Full,6,7,8
Empty,9,10,11


In [47]:
series3

Light     1
Dark      4
Full      7
Empty    10
Name: d, dtype: int64

In [48]:
frame.sub(series3,axis='index')

Unnamed: 0,b,d,e
Light,-1,0,1
Dark,-1,0,1
Full,-1,0,1
Empty,-1,0,1


### apply和applymap

In [49]:
frame= pd.DataFrame(np.random.randn(4,3),
                    columns=list('bde'),index=['Giant','Specialized','Trek','Raise & Muller'])

In [50]:
frame

Unnamed: 0,b,d,e
Giant,1.672977,-1.752818,-1.233243
Specialized,-2.032933,0.102801,-1.228356
Trek,1.392529,1.915983,1.413667
Raise & Muller,0.621836,-0.92839,-1.967772


In [51]:
np.abs(frame)

Unnamed: 0,b,d,e
Giant,1.672977,1.752818,1.233243
Specialized,2.032933,0.102801,1.228356
Trek,1.392529,1.915983,1.413667
Raise & Muller,0.621836,0.92839,1.967772


In [52]:
f= lambda x: x.max() -x.min()

### apply(funciton, axis=0) 因為axis=0是預設值，所以計算出來為欄的最大與最小差異。

In [53]:
frame.apply(f)

b    3.705910
d    3.668801
e    3.381439
dtype: float64

### apply(funciton, axis=1) 因為有特別說明axis='columns'，所以計算出來為列的最大與最小差異。

In [54]:
frame.apply(f,axis='columns')

Giant             3.425795
Specialized       2.135734
Trek              0.523454
Raise & Muller    2.589608
dtype: float64

### 除了計算值出來外，也可以透過def回傳dataframe/Series

In [55]:
def f(x):
    return pd.Series([x.min(),x.max()],index=['min','max'])

In [56]:
frame.apply(f)

Unnamed: 0,b,d,e
min,-2.032933,-1.752818,-1.967772
max,1.672977,1.915983,1.413667


In [57]:
format= lambda x: '%.2f' % x

### applymap(): 用來執行lambda的函式。

In [58]:
frame.applymap(format)

Unnamed: 0,b,d,e
Giant,1.67,-1.75,-1.23
Specialized,-2.03,0.1,-1.23
Trek,1.39,1.92,1.41
Raise & Muller,0.62,-0.93,-1.97


### map可以對Series級別做運作。

In [59]:
Series1=pd.Series(np.arange(5), index=['a','b','c','d','e'])

In [60]:
frame['b'].map[format]

TypeError: 'method' object is not subscriptable

### 排序和排名

### sort_index(): 可以幫index()排名。

In [61]:
obj=pd.Series(range(4),index=['d','a','b','c'])

In [62]:
obj.sort_index()

a    1
b    2
c    3
d    0
dtype: int64

In [63]:
frame=pd.DataFrame(np.arange(8).reshape((2,4)),index=['three','one'],columns=['d','a','b','c'])
frame

Unnamed: 0,d,a,b,c
three,0,1,2,3
one,4,5,6,7


In [64]:
frame.sort_index()

Unnamed: 0,d,a,b,c
one,4,5,6,7
three,0,1,2,3


In [65]:
frame.sort_index(axis=1)

Unnamed: 0,a,b,c,d
three,1,2,3,0
one,5,6,7,4


### sort_index(axis=1, ascending=boolean)： 除了可以讓column排序外，可以指定column要由小到大(預設值），還是由大到小排序。

In [66]:
frame.sort_index(axis=1,ascending=False)

Unnamed: 0,d,c,b,a
three,0,3,2,1
one,4,7,6,5


In [67]:
frame.sort_index(axis=1,ascending=True)

Unnamed: 0,a,b,c,d
three,1,2,3,0
one,5,6,7,4


### sort_values: 可以對Series的值做排序。

In [68]:
obj=pd.Series([-3,6,9,-12,15])

In [69]:
obj.sort_values()

3   -12
0    -3
1     6
2     9
4    15
dtype: int64

### 如果有NaN，會被排在最後面。

In [70]:
obj=pd.Series([np.nan, 2,-4,np.nan,6,-8,np.nan,10])

In [71]:
obj.sort_values()

5    -8.0
2    -4.0
1     2.0
4     6.0
7    10.0
0     NaN
3     NaN
6     NaN
dtype: float64

In [72]:
frame=pd.DataFrame({'b':[5,-10,15,-20],'a':[7, 14,-21,-28]})
frame

Unnamed: 0,b,a
0,5,7
1,-10,14
2,15,-21
3,-20,-28


### frame的排序可以指定某一欄位做由小到大的排序。

In [73]:
frame.sort_values(by='b')

Unnamed: 0,b,a
3,-20,-28
1,-10,14
0,5,7
2,15,-21


### frame也可以依據多個欄位做排序，寫入[x,y]即可。

### 寫入的順序，會影響以誰先做排序。

In [74]:
frame.sort_values(by=['a','b'])

Unnamed: 0,b,a
3,-20,-28
2,15,-21
0,5,7
1,-10,14


### rank(): 可以給Series排名，從1開始排名，遇到一樣大的數值時，會採用他們的平均排名值。

In [75]:
obj=pd.Series([7,-5,7,6,-4,6,5])

### 這裡的index代表value原本在Series的所在位置，比如說index=0，指的是7這個數字。
### 因為7在這個數列中，並列第6名與第7名，所以排名顯示為6.5。
### -5在Series中是最小的，因此顯示為第1名。

In [76]:
obj.rank()

0    6.5
1    1.0
2    6.5
3    4.5
4    2.0
5    4.5
6    3.0
dtype: float64

### rank(method= first()): 代表即時是一樣的值，先出現，就先排名，也就不用平均排名值了。

In [77]:
obj.rank(method='first')

0    6.0
1    1.0
2    7.0
3    4.0
4    2.0
5    5.0
6    3.0
dtype: float64

### rank(ascending=Boolean): 預設值為True，會由小到大排序，相反的話，會由大到小。
### rank(method='max'): 表示遇到一樣的值時，排名取2個最大的，由於7並列第1名與第2名，故顯示2。

In [78]:
obj.rank(ascending=False, method='max')

0    2.0
1    7.0
2    2.0
3    4.0
4    6.0
5    4.0
6    5.0
dtype: float64

### rank(method='min'): 表示遇到一樣的值時，排名取2個最小的，由於7並列第6名與第7名，故顯示6。

In [79]:
obj.rank(method='min')

0    6.0
1    1.0
2    6.0
3    4.0
4    2.0
5    4.0
6    3.0
dtype: float64

### rank(method='dense'): 表示遇到一樣的值時，排名取最小的，此外，不可以跳過，要接續排名。

In [80]:
obj.rank(method='dense')

0    5.0
1    1.0
2    5.0
3    4.0
4    2.0
5    4.0
6    3.0
dtype: float64

In [81]:
frame= pd.DataFrame({'b':[2,4,6,8,-10],'a':[4,8,12,16,-20],'c':[5,10,15,20,-25]})
frame

Unnamed: 0,b,a,c
0,2,4,5
1,4,8,10
2,6,12,15
3,8,16,20
4,-10,-20,-25


### rank(axis=1): 表示幫每行做排序。（橫向）

In [82]:
frame.rank(axis='columns')

Unnamed: 0,b,a,c
0,1.0,2.0,3.0
1,1.0,2.0,3.0
2,1.0,2.0,3.0
3,1.0,2.0,3.0
4,3.0,2.0,1.0


### rank(): 預設值為axis=0，表示幫每欄做排序。（直向）

In [83]:
frame.rank()

Unnamed: 0,b,a,c
0,2.0,2.0,2.0
1,3.0,3.0,3.0
2,4.0,4.0,4.0
3,5.0,5.0,5.0
4,1.0,1.0,1.0


### 軸index有重複標籤: 不論是Series, DataFrame的允許有重複的index標籤。

In [84]:
obj=pd.Series(range(5),index=['a','a','b','b','c'])
obj

a    0
a    1
b    2
b    3
c    4
dtype: int64

In [85]:
obj.index.is_unique ### 表示index有重複值。

False

In [86]:
obj['a']

a    0
a    1
dtype: int64

In [87]:
obj['c']

4

In [88]:
df=pd.DataFrame(np.random.randn(4,3),index=['a','a','b','b'])
df

Unnamed: 0,0,1,2
a,-0.476466,0.202394,-1.740731
a,1.42909,0.512764,0.760216
b,-0.019014,0.148319,-0.427029
b,-1.978611,0.225556,-1.026688


In [89]:
df.loc['b']

Unnamed: 0,0,1,2
b,-0.019014,0.148319,-0.427029
b,-1.978611,0.225556,-1.026688


### 匯總和計算描述性統計量

In [90]:
df=pd.DataFrame([[2,np.nan],[7,-4],[np.nan,np.nan],[0.75,-1.3]],index=['a','b','c','d'],columns=['one','two'])

In [91]:
df

Unnamed: 0,one,two
a,2.0,
b,7.0,-4.0
c,,
d,0.75,-1.3


### sum(): 每欄加總。 sum(axis=1): 每列加總。

In [92]:
df.sum()

one    9.75
two   -5.30
dtype: float64

In [93]:
df.sum(axis='columns')

a    2.00
b    3.00
c    0.00
d   -0.55
dtype: float64

### skipna=False: 在進行運算時，na通常會被忽略，但如果不想被忽略可以寫上skipna=False。

In [94]:
df.mean(axis=1, skipna=False)

a      NaN
b    1.500
c      NaN
d   -0.275
dtype: float64

### idxmax(): 回傳每一欄中，哪一個index呈現的值最大，並回傳index。

In [95]:
df.idxmax()

one    b
two    d
dtype: object

### idxmin(): 回傳每一欄中，哪一個index呈現的值最小，並回傳index。

In [96]:
df.idxmin()

one    d
two    b
dtype: object

### cumsum()=a, a+b, a+b+c, a+b+C+d, ...，在frame中，會呈現每欄的累進加總。

In [97]:
df.cumsum()

Unnamed: 0,one,two
a,2.0,
b,9.0,-4.0
c,,
d,9.75,-5.3


### describe(): 針對該frame，提供多個統計值。

In [98]:
df.describe()

Unnamed: 0,one,two
count,3.0,2.0
mean,3.25,-2.65
std,3.307189,1.909188
min,0.75,-4.0
25%,1.375,-3.325
50%,2.0,-2.65
75%,4.5,-1.975
max,7.0,-1.3


### 如果資料不是數值，describe()會顯示另一種統計值。

In [99]:
obj=pd.Series(['a','b','c','d','e']*4)

In [100]:
obj.describe()

count     20
unique     5
top        a
freq       4
dtype: object

### 相關係數與共變異數

### 前置作業：增加一個新的環境：conda install pandas-datareader

### 由於2017年Yahoo被Verizon收購，所以pandas-datareader無法使用。

In [101]:
import pandas_datareader.data as web

In [102]:
all_data= {'ticker': web.get_data_yahoo(ticker)
          for ticker in ['AAPL','IBM','MSFT','GOOG']}

price= pd.DataFrame({ticker: data['Adj Close']
                    for ticker, data in all_data.items()})

volume=pd.DataFrame({ticker: data['Volume']
                    for ticker, data in all_data.items()})

In [103]:
returns= price.pct_change()

In [104]:
returns.tail()

Unnamed: 0_level_0,ticker
Date,Unnamed: 1_level_1
2022-10-17,0.037045
2022-10-18,0.006053
2022-10-19,-0.010849
2022-10-20,0.002393
2022-10-21,0.00945


### 目前是使用yfinance去連接yahoo股市的api

### 在terminal輸入pip install yfinance 即可安裝

In [105]:
import yfinance as yf

### Yahoo的程式跑不動，先用隨機變數代替。

In [106]:
frame= pd.DataFrame(np.random.randn(20).reshape(5,4), 
                    index=['2022-10-01','2022-10-02','2022-10-03','2022-10-04','2022-10-05'],
                    columns=['AAPL','GOOG','IBM','MSFT'])
frame

Unnamed: 0,AAPL,GOOG,IBM,MSFT
2022-10-01,-1.788987,-0.56828,-0.615595,-0.630598
2022-10-02,-0.201641,-1.125275,-1.569592,-0.215547
2022-10-03,-0.003312,0.27995,0.26615,-0.334492
2022-10-04,-1.318154,-0.548588,1.472842,-0.331395
2022-10-05,1.344545,1.552147,0.00553,0.518079


### corr():代表計算兩個series的關係係數。cov():計算兩個series的共變異數。

In [107]:
frame['MSFT'].corr(frame['IBM']) #兩者高度正相關

0.03354537115769053

In [108]:
frame['AAPL'].cov(frame['GOOG']) #兩者低度負相關

0.9728293121095833

In [109]:
frame.MSFT.corr(frame['IBM']) #兩者高度正相關

0.03354537115769053

### 如果讓corr()直接與dataframe做搭配，會出現一個關係係數的矩陣。

In [110]:
frame.corr()

Unnamed: 0,AAPL,GOOG,IBM,MSFT
AAPL,1.0,0.761434,-0.12539,0.909955
GOOG,0.761434,1.0,0.28948,0.789158
IBM,-0.12539,0.28948,1.0,0.033545
MSFT,0.909955,0.789158,0.033545,1.0


In [111]:
frame.cov()

Unnamed: 0,AAPL,GOOG,IBM,MSFT
AAPL,1.503056,0.972829,-0.172625,0.478745
GOOG,0.972829,1.08601,0.338756,0.352921
IBM,-0.172625,0.338756,1.260967,0.016165
MSFT,0.478745,0.352921,0.016165,0.18416


### corrwith(): 可以計算整欄或整列的相關係數。

In [112]:
frame.corrwith(frame.IBM)

AAPL   -0.125390
GOOG    0.289480
IBM     1.000000
MSFT    0.033545
dtype: float64

### 如果是傳入整個frame，就會對應名稱相同的欄，進行變異數值運算。
### 需注意index要一致。

In [113]:
frame2= pd.DataFrame(np.random.randn(20).reshape(5,4), 
                    index=['2022-10-01','2022-10-02','2022-10-03','2022-10-04','2022-10-05'],
                    columns=['TSML','GOOG','IBM','MSFT'])
frame2

Unnamed: 0,TSML,GOOG,IBM,MSFT
2022-10-01,0.211041,-0.782676,1.453142,-0.044512
2022-10-02,-0.32494,-1.473604,-0.832557,-1.577551
2022-10-03,-1.205226,-1.038401,0.050018,-1.642688
2022-10-04,-1.409925,-1.796177,-0.650985,-0.224187
2022-10-05,0.738041,1.316395,-0.54078,0.346686


In [114]:
frame.corrwith(frame2)

GOOG    0.889638
IBM    -0.139175
MSFT    0.346304
AAPL         NaN
TSML         NaN
dtype: float64

### 不重複值、個數計算和成員關係

In [115]:
obj= pd.Series(['c','a','b','b','a','c','d','d','a'])

### unique(): 可以取出不重複的值。

In [116]:
uniques= obj.unique()
uniques

array(['c', 'a', 'b', 'd'], dtype=object)

In [117]:
uniques.sort() ### 將取出來的值做排序。

In [118]:
uniques

array(['a', 'b', 'c', 'd'], dtype=object)

### value_counts(): 可以計算每個元素各有幾個重複值。

In [119]:
obj.value_counts()

a    3
c    2
b    2
d    2
dtype: int64

### pd.value_counts(frame.values, sort=boolean) 可以直接對series的值進行個數加總，以及排序個數值。

In [120]:
pd.value_counts(obj.values, sort=True)

a    3
c    2
b    2
d    2
dtype: int64

In [121]:
obj

0    c
1    a
2    b
3    b
4    a
5    c
6    d
7    d
8    a
dtype: object

### isnin()： 可以用來檢查該series有沒有括號內的元素，如此將形成一個布林判斷式。

In [122]:
mask=obj.isin(['b','c'])

In [123]:
mask

0     True
1    False
2     True
3     True
4    False
5     True
6    False
7    False
8    False
dtype: bool

In [124]:
obj[mask] #Series放入布林判斷式，會產生另一個子Series

0    c
2    b
3    b
5    c
dtype: object

### get_index: 可以用來找出A_Series的index，套用在B_Series會呈現怎麼樣的array。

In [125]:
to_match= pd.Series(['c','a','b','b','c','a']) #B_Series

In [126]:
unique_vals= pd.Series(['c','b','a']) #A_Series(0,1,2)

In [127]:
pd.Index(unique_vals).get_indexer(to_match) #將c=0, b=1, a=2的Index套用在B_Series上。

array([0, 2, 1, 1, 0, 2])

In [128]:
data=pd.DataFrame({'Qu1':[1,3,4,3,4],
                   'Qu2':[2,3,1,2,3],
                   'Qu3':[1,5,2,4,4]}, 
                    index=['a','b','c','d','e'])
data

Unnamed: 0,Qu1,Qu2,Qu3
a,1,2,1
b,3,3,5
c,4,1,2
d,3,2,4
e,4,3,4


### 最左手邊代表值，數字1在Qu1欄位出現了1次。

In [129]:
result= data.apply(pd.value_counts)

In [130]:
result

Unnamed: 0,Qu1,Qu2,Qu3
1,1.0,1.0,1.0
2,,2.0,1.0
3,2.0,2.0,
4,2.0,,2.0
5,,,1.0


### fillna(0): 如果表格出現NaN，就回填0。

In [131]:
result= data.apply(pd.value_counts).fillna(0)
result

Unnamed: 0,Qu1,Qu2,Qu3
1,1.0,1.0,1.0
2,0.0,2.0,1.0
3,2.0,2.0,0.0
4,2.0,0.0,2.0
5,0.0,0.0,1.0
