# 12. Machine learning techniques

In [1]:
!git clone https://github.com/s7s/machine_learning_1.git
%cd  machine_learning_1/ML_in_practice

Cloning into 'machine_learning_1'...
remote: Enumerating objects: 215, done.[K
remote: Counting objects: 100% (215/215), done.[K
remote: Compressing objects: 100% (157/157), done.[K
remote: Total 215 (delta 102), reused 168 (delta 55), pack-reused 0[K
Receiving objects: 100% (215/215), 35.23 MiB | 19.04 MiB/s, done.
Resolving deltas: 100% (102/102), done.
/content/machine_learning_1/ML_in_practice


In [2]:
import random as rd
rd.seed(0)

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## 12.1 Loading and exploring the dataset

First, we use pandas to load the dataset from a csv file.

In [76]:
# read './titanic.csv'
raw_data = pd.read_csv('./titanic.csv')
raw_data

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


Next, we can explore the dataset.

In [77]:
# the length of the dataset
len(raw_data)


891

In [78]:
# the columns in the dataset
raw_data.columns

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

In [79]:
#examine "survived" column (labels)
raw_data["Survived"]



0      0
1      1
2      1
3      1
4      0
      ..
886    0
887    1
888    0
889    1
890    0
Name: Survived, Length: 891, dtype: int64

In [80]:
# exanine more than one column at the same time ["Name", "Age"]
raw_data[["Name", "Age"]]


Unnamed: 0,Name,Age
0,"Braund, Mr. Owen Harris",22.0
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",38.0
2,"Heikkinen, Miss. Laina",26.0
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",35.0
4,"Allen, Mr. William Henry",35.0
...,...,...
886,"Montvila, Rev. Juozas",27.0
887,"Graham, Miss. Margaret Edith",19.0
888,"Johnston, Miss. Catherine Helen ""Carrie""",
889,"Behr, Mr. Karl Howell",26.0


In [81]:
#check how many passengers survived
sum(raw_data["Survived"])
#raw_data[].sum() will give same result 


342

## 12.2. Cleaning up the data

Now, let's look at how many columns have missing data

In [86]:
#missing data (NA(not available) values) for all the columns
raw_data.isna().sum()


PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

The Cabin column is missing too many values to be useful. Let's drop it altogether.

In [87]:
#drop "Cabin" column
clean_data = raw_data.drop(['Cabin'],axis=1)

In [88]:
clean_data

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,S
...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C


Other columns such as Age or Embarked are missing some values, but they can still be useful.

For the age column, let's fill in the missing values with the median of all ages.

For the Embarked column, let's make a new category called 'U', for Unknown port of embarkment.

In [89]:
# the median of age column
median_age = raw_data['Age'].median()
median_age

28.0

In [90]:
# fill the na values in age column with the median age
clean_data["Age"] = clean_data["Age"].fillna(median_age)

In [91]:
# fill the na values in embarked column with 'U'
clean_data["Embarked"] = clean_data["Embarked"].fillna("U")

In [92]:
clean_data.isna().sum()

PassengerId    0
Survived       0
Pclass         0
Name           0
Sex            0
Age            0
SibSp          0
Parch          0
Ticket         0
Fare           0
Embarked       0
dtype: int64

In [93]:
clean_data.head(10)


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,S
5,6,0,3,"Moran, Mr. James",male,28.0,0,0,330877,8.4583,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,S
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,C


### 12.2.3 Saving our data for the future

In [94]:
clean_data.to_csv('./clean_titanic_data.csv',index=None)



## 12.3 Manipulating the features

- One-hot encoding
- Binning
- Feature selection

### 12.3.1 One-hot encoding

In [99]:
preprocessed_data = pd.read_csv('clean_titanic_data.csv')
preprocessed_data.columns

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Embarked'],
      dtype='object')

In [100]:
# the one hot encoding of “embarked”, “pclass” and “gender”
preprocessed_data=pd.get_dummies(preprocessed_data,columns=['Embarked','Pclass','Sex'])
preprocessed_data

#get dummies returns the data frame with new columns instead of the old 


Unnamed: 0,PassengerId,Survived,Name,Age,SibSp,Parch,Ticket,Fare,Embarked_C,Embarked_Q,Embarked_S,Embarked_U,Pclass_1,Pclass_2,Pclass_3,Sex_female,Sex_male
0,1,0,"Braund, Mr. Owen Harris",22.0,1,0,A/5 21171,7.2500,0,0,1,0,0,0,1,0,1
1,2,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",38.0,1,0,PC 17599,71.2833,1,0,0,0,1,0,0,1,0
2,3,1,"Heikkinen, Miss. Laina",26.0,0,0,STON/O2. 3101282,7.9250,0,0,1,0,0,0,1,1,0
3,4,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",35.0,1,0,113803,53.1000,0,0,1,0,1,0,0,1,0
4,5,0,"Allen, Mr. William Henry",35.0,0,0,373450,8.0500,0,0,1,0,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,"Montvila, Rev. Juozas",27.0,0,0,211536,13.0000,0,0,1,0,0,1,0,0,1
887,888,1,"Graham, Miss. Margaret Edith",19.0,0,0,112053,30.0000,0,0,1,0,1,0,0,1,0
888,889,0,"Johnston, Miss. Catherine Helen ""Carrie""",28.0,1,2,W./C. 6607,23.4500,0,0,1,0,0,0,1,1,0
889,890,1,"Behr, Mr. Karl Howell",26.0,0,0,111369,30.0000,1,0,0,0,1,0,0,0,1


In [101]:
preprocessed_data

Unnamed: 0,PassengerId,Survived,Name,Age,SibSp,Parch,Ticket,Fare,Embarked_C,Embarked_Q,Embarked_S,Embarked_U,Pclass_1,Pclass_2,Pclass_3,Sex_female,Sex_male
0,1,0,"Braund, Mr. Owen Harris",22.0,1,0,A/5 21171,7.2500,0,0,1,0,0,0,1,0,1
1,2,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",38.0,1,0,PC 17599,71.2833,1,0,0,0,1,0,0,1,0
2,3,1,"Heikkinen, Miss. Laina",26.0,0,0,STON/O2. 3101282,7.9250,0,0,1,0,0,0,1,1,0
3,4,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",35.0,1,0,113803,53.1000,0,0,1,0,1,0,0,1,0
4,5,0,"Allen, Mr. William Henry",35.0,0,0,373450,8.0500,0,0,1,0,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,"Montvila, Rev. Juozas",27.0,0,0,211536,13.0000,0,0,1,0,0,1,0,0,1
887,888,1,"Graham, Miss. Margaret Edith",19.0,0,0,112053,30.0000,0,0,1,0,1,0,0,1,0
888,889,0,"Johnston, Miss. Catherine Helen ""Carrie""",28.0,1,2,W./C. 6607,23.4500,0,0,1,0,0,0,1,1,0
889,890,1,"Behr, Mr. Karl Howell",26.0,0,0,111369,30.0000,1,0,0,0,1,0,0,0,1


### 12.3.2 Binning

In [102]:
# .cut() method to make bins from the age column
bins = [0, 10, 20, 30, 40, 50, 60, 70, 80,120]
categorized_age =pd.cut(preprocessed_data.Age, bins, labels=["From 0 to 10",'from 11 to 20','from 21 to 30',
                                                                             'from 31 to 40','from 41 to 50','from 51 to 60','from 61 to 70',
                                                                             'from 71 to 80','from 81 years old or older'])
preprocessed_data['Categorized_age'] = categorized_age
preprocessed_data = preprocessed_data.drop(["Age"], axis=1)
preprocessed_data

Unnamed: 0,PassengerId,Survived,Name,SibSp,Parch,Ticket,Fare,Embarked_C,Embarked_Q,Embarked_S,Embarked_U,Pclass_1,Pclass_2,Pclass_3,Sex_female,Sex_male,Categorized_age
0,1,0,"Braund, Mr. Owen Harris",1,0,A/5 21171,7.2500,0,0,1,0,0,0,1,0,1,from 21 to 30
1,2,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,0,PC 17599,71.2833,1,0,0,0,1,0,0,1,0,from 31 to 40
2,3,1,"Heikkinen, Miss. Laina",0,0,STON/O2. 3101282,7.9250,0,0,1,0,0,0,1,1,0,from 21 to 30
3,4,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,0,113803,53.1000,0,0,1,0,1,0,0,1,0,from 31 to 40
4,5,0,"Allen, Mr. William Henry",0,0,373450,8.0500,0,0,1,0,0,0,1,0,1,from 31 to 40
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,"Montvila, Rev. Juozas",0,0,211536,13.0000,0,0,1,0,0,1,0,0,1,from 21 to 30
887,888,1,"Graham, Miss. Margaret Edith",0,0,112053,30.0000,0,0,1,0,1,0,0,1,0,from 11 to 20
888,889,0,"Johnston, Miss. Catherine Helen ""Carrie""",1,2,W./C. 6607,23.4500,0,0,1,0,0,0,1,1,0,from 21 to 30
889,890,1,"Behr, Mr. Karl Howell",0,0,111369,30.0000,1,0,0,0,1,0,0,0,1,from 21 to 30


In [103]:
# the one hot encoding of “Categorized_age”
preprocessed_data=pd.get_dummies(preprocessed_data,columns=['Categorized_age'])
preprocessed_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 25 columns):
 #   Column                                      Non-Null Count  Dtype  
---  ------                                      --------------  -----  
 0   PassengerId                                 891 non-null    int64  
 1   Survived                                    891 non-null    int64  
 2   Name                                        891 non-null    object 
 3   SibSp                                       891 non-null    int64  
 4   Parch                                       891 non-null    int64  
 5   Ticket                                      891 non-null    object 
 6   Fare                                        891 non-null    float64
 7   Embarked_C                                  891 non-null    uint8  
 8   Embarked_Q                                  891 non-null    uint8  
 9   Embarked_S                                  891 non-null    uint8  
 10  Embarked_U    

### 12.3.4 Feature selection

In [104]:
preprocessed_data = preprocessed_data.drop(['Name', 'Ticket', 'PassengerId'], axis=1)

preprocessed_data.head()

Unnamed: 0,Survived,SibSp,Parch,Fare,Embarked_C,Embarked_Q,Embarked_S,Embarked_U,Pclass_1,Pclass_2,...,Sex_male,Categorized_age_From 0 to 10,Categorized_age_from 11 to 20,Categorized_age_from 21 to 30,Categorized_age_from 31 to 40,Categorized_age_from 41 to 50,Categorized_age_from 51 to 60,Categorized_age_from 61 to 70,Categorized_age_from 71 to 80,Categorized_age_from 81 years old or older
0,0,1,0,7.25,0,0,1,0,0,0,...,1,0,0,1,0,0,0,0,0,0
1,1,1,0,71.2833,1,0,0,0,1,0,...,0,0,0,0,1,0,0,0,0,0
2,1,0,0,7.925,0,0,1,0,0,0,...,0,0,0,1,0,0,0,0,0,0
3,1,1,0,53.1,0,0,1,0,1,0,...,0,0,0,0,1,0,0,0,0,0
4,0,0,0,8.05,0,0,1,0,0,0,...,1,0,0,0,1,0,0,0,0,0


### 12.3.5 Saving for future use

In [105]:
preprocessed_data.to_csv('./preprocessed_titanic_data.csv', index=None)

# 12.4 Training models

In [106]:
data = pd.read_csv('./preprocessed_titanic_data.csv')
data.head()

Unnamed: 0,Survived,SibSp,Parch,Fare,Embarked_C,Embarked_Q,Embarked_S,Embarked_U,Pclass_1,Pclass_2,...,Sex_male,Categorized_age_From 0 to 10,Categorized_age_from 11 to 20,Categorized_age_from 21 to 30,Categorized_age_from 31 to 40,Categorized_age_from 41 to 50,Categorized_age_from 51 to 60,Categorized_age_from 61 to 70,Categorized_age_from 71 to 80,Categorized_age_from 81 years old or older
0,0,1,0,7.25,0,0,1,0,0,0,...,1,0,0,1,0,0,0,0,0,0
1,1,1,0,71.2833,1,0,0,0,1,0,...,0,0,0,0,1,0,0,0,0,0
2,1,0,0,7.925,0,0,1,0,0,0,...,0,0,0,1,0,0,0,0,0,0
3,1,1,0,53.1,0,0,1,0,1,0,...,0,0,0,0,1,0,0,0,0,0
4,0,0,0,8.05,0,0,1,0,0,0,...,1,0,0,0,1,0,0,0,0,0


### 12.4.1 Features-labels split and train-validation split

In [107]:
# drop ["Survived"] column and save to features
features = data.drop(["Survived"], axis=1)

# save the ["Survived"] column to labels
labels = data["Survived"]
labels

0      0
1      1
2      1
3      1
4      0
      ..
886    0
887    1
888    0
889    1
890    0
Name: Survived, Length: 891, dtype: int64

In [108]:
from sklearn.model_selection import train_test_split

In [109]:
# split data by 60% train ; use random_state=100
features_train, features_validation_test, labels_train, labels_validation_test = train_test_split(
    features, labels, test_size=0.4, random_state=100)



In [110]:
# split test data by 50% validation and 50% test ; use random_state=100
features_validation, features_test, labels_validation, labels_test = train_test_split(
    features_validation_test, labels_validation_test, test_size=0.5, random_state=100)

In [111]:
print(len(features_train))
print(len(features_validation))
print(len(features_test))
print(len(labels_train))
print(len(labels_validation))
print(len(labels_test))

534
178
179
534
178
179


### 12.4.2 Training different models on our dataset

We'll train six models:
- Logistic regression (perceptron)
- Decision tree
- Support vector machine (SVM)
- RandomForestClassifier
- GradientBoostingClassifier
- AdaBoostClassifier

In [112]:
# Train logistic regression model
from sklearn.linear_model import LogisticRegression

lr_model = LogisticRegression().fit(features_train, labels_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [113]:
# Train decision tree model 
from sklearn.tree import DecisionTreeClassifier

dt_model = DecisionTreeClassifier().fit(features_train, labels_train)

In [114]:
# Train SVM model 
from sklearn.svm import SVC

svm_model = SVC().fit(features_train, labels_train)

In [115]:
# Train random forest model 
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier().fit(features_train, labels_train)

In [116]:
# Train gradient boosting model 
from sklearn.ensemble import GradientBoostingClassifier

gb_model = GradientBoostingClassifier().fit(features_train, labels_train)

In [117]:
# Train Adaboost model 
from sklearn.ensemble import AdaBoostClassifier

ab_model = AdaBoostClassifier().fit(features_train, labels_train)

### 12.4.3 Evaluating the models

#### Accuracy

In [118]:
# print accuracy of each model on validation data
print("Scores of the models" )
print("Logistic regression:",lr_model.score(features_validation, labels_validation))
print("Decision tree:",dt_model.score(features_validation, labels_validation))
print("SVM:", svm_model.score(features_validation, labels_validation))
print("Random forest:",rf_model.score(features_validation, labels_validation) )
print("Gradient boosting:", gb_model.score(features_validation, labels_validation))
print("AdaBoost:",ab_model.score(features_validation, labels_validation) )

Scores of the models
Logistic regression: 0.7696629213483146
Decision tree: 0.7752808988764045
SVM: 0.6797752808988764
Random forest: 0.7696629213483146
Gradient boosting: 0.8089887640449438
AdaBoost: 0.7640449438202247


#### F1-score

In [67]:
# print F1-score of each model on validation data
from sklearn.metrics import f1_score
val_predictions_lr_model = lr_model.predict(features_validation)
val_predictions_dt_model = dt_model.predict(features_validation)
val_predictions_svm_model = svm_model.predict(features_validation)
val_predictions_rf_model = rf_model.predict(features_validation)
val_predictions_gb_model = gb_model.predict(features_validation)
val_predictions_ab_model = ab_model.predict(features_validation)

print("F1-scores of the models:")

print("Logistic regression:",f1_score(labels_validation,val_predictions_lr_model) )

print("Decision tree:", f1_score(labels_validation,val_predictions_dt_model))

print("SVM:", f1_score(labels_validation,val_predictions_svm_model))

print("Random forest:", f1_score(labels_validation,val_predictions_rf_model))

print("Gradient boosting:",f1_score(labels_validation,val_predictions_gb_model) )

print("AdaBoost:",f1_score(labels_validation,val_predictions_ab_model) )

F1-scores of the models:
Logistic regression: 0.6870229007633588
Decision tree: 0.6969696969696969
SVM: 0.39999999999999997
Random forest: 0.7050359712230215
Gradient boosting: 0.744186046511628
AdaBoost: 0.6865671641791045


### 12.4.4 Testing the model

Finding the accuracy and the F1-score of the model in the testing set.

In [119]:
# accuracy of gradient boost model on testing data
print("Gradient boosting:", gb_model.score(features_test, labels_test))



Gradient boosting: 0.8324022346368715


In [120]:
# F1-score of gradient boost model on testing data
test_predictions_gb_model = gb_model.predict(features_test)


print("Gradient boosting:",f1_score(labels_test,test_predictions_gb_model) )


Gradient boosting: 0.8026315789473685


# 12.5 Grid search

In [121]:
from sklearn.model_selection import GridSearchCV

In [122]:
svm_parameters = {'kernel': ['rbf'],
                  'C': [0.01, 0.1, 1 , 10, 100],
                  'gamma': [0.01, 0.1, 1, 10, 100]
                }
# use gridsearch to find the best hyperparameters 
svm = SVC()
svm_gs = GridSearchCV(svm, svm_parameters).fit(features_train,labels_train)

# git the best model
svm_winner = svm_gs.best_estimator_
svm_winner

svm_winner.score(features_validation, labels_validation)

0.7191011235955056

In [123]:
svm_winner

SVC(C=10, gamma=0.01)

# 12.6 Cross validation

In [124]:
# print the k-fold cross validation output
svm_gs.cv_results_


#in the result the mean_test_score it has the accuracy of each model 

{'mean_fit_time': array([0.0122611 , 0.01169024, 0.01285982, 0.01369233, 0.01160021,
        0.01266856, 0.01259956, 0.01279612, 0.01384835, 0.01356874,
        0.01322956, 0.01261926, 0.0126698 , 0.01419711, 0.01339426,
        0.01286421, 0.01381884, 0.01547556, 0.01460361, 0.01430278,
        0.01720786, 0.01979594, 0.01570134, 0.0161356 , 0.0135201 ]),
 'std_fit_time': array([0.0025078 , 0.00035754, 0.00161917, 0.00199563, 0.00030081,
        0.00161311, 0.00158756, 0.00102658, 0.00145932, 0.00097114,
        0.00216401, 0.00065514, 0.00017193, 0.00224548, 0.0018871 ,
        0.00073591, 0.00042015, 0.00107865, 0.00051004, 0.00159965,
        0.00254058, 0.00204587, 0.00229066, 0.00361834, 0.00022709]),
 'mean_score_time': array([0.00511827, 0.00518622, 0.00553226, 0.00590162, 0.00542407,
        0.00562048, 0.00535102, 0.00529943, 0.00563536, 0.00558314,
        0.00541906, 0.0051229 , 0.00528369, 0.00608969, 0.00529137,
        0.00457649, 0.00473518, 0.00528221, 0.0054441 , 0.00