# Part 2: Train a Model

In [1]:
import mlrun

In [2]:
project_name = 'fraud-predict-demo'

In [3]:
project  = mlrun.get_or_create_project(project_name,'./',user_project=True)

> 2022-08-01 07:50:04,706 [info] loaded project fraud-predict-demo from MLRun DB


Before creating your Feature Vector for training you need to define which featuers you want to use in the feature vector.

You can define a feature with ``<feature set name>.<feature name>`` or with ``<feature set name>.*`` that use all the features in the feature set.

In [4]:
# Define the list of features we will be using
features = ['transactions.amount_max_2h', 
            'transactions.amount_sum_2h', 
            'transactions.amount_count_2h',
            'transactions.amount_avg_2h', 
            'transactions.amount_max_12h', 
            'transactions.amount_sum_12h',
            'transactions.amount_count_12h', 
            'transactions.amount_avg_12h', 
            'transactions.amount_max_24h',
            'transactions.amount_sum_24h', 
            'transactions.amount_count_24h', 
            'transactions.amount_avg_24h',
            'transactions.es_transportation_count_14d', 
            'transactions.es_health_count_14d',
            'transactions.es_otherservices_count_14d', 
            'transactions.es_food_count_14d',
            'transactions.es_hotelservices_count_14d', 
            'transactions.es_barsandrestaurants_count_14d',
            'transactions.es_tech_count_14d', 
            'transactions.es_sportsandtoys_count_14d',
            'transactions.es_wellnessandbeauty_count_14d', 
            'transactions.es_hyper_count_14d',
            'transactions.es_fashion_count_14d', 
            'transactions.es_home_count_14d', 
            'transactions.es_travel_count_14d', 
            'transactions.es_leisure_count_14d',
            'transactions.gender_F',
            'transactions.gender_M',
            'transactions.step', 
            'transactions.amount', 
            'transactions.timestamp_hour',
            'transactions.timestamp_day_of_week',
            'transactions.event_details_change','transactions.event_login','transactions.event_password_change']

For Creating a Feature Vector you need use fstore.[FeatureVector](https://docs.mlrun.org/en/latest/api/mlrun.feature_store.html?highlight=FeatureVector#mlrun.feature_store.FeatureVector) method.

In [5]:
# Import MLRun's Feature Store
import mlrun.feature_store as fstore

# Define the feature vector name for future reference
fv_name = 'transactions-fraud'

# Define the feature vector using our Feature Store (fstore)
transactions_fv = fstore.FeatureVector(fv_name, 
                          features, 
                          label_feature="transactions.label",
                          description='Predicting a fraudulent transaction')

# Save the feature vector in the Feature Store
transactions_fv.save()

In [6]:
# Import the Parquet Target so we can directly save our dataset as a file
from mlrun.datastore.targets import ParquetTarget

# Get offline feature vector as dataframe and save the dataset to parquet
train_dataset = fstore.get_offline_features(fv_name, target=ParquetTarget())

> 2022-08-01 07:50:05,134 [info] wrote target: {'name': 'parquet', 'kind': 'parquet', 'path': 'v3io:///projects/fraud-predict-demo-shapira/FeatureStore/transactions-fraud/parquet/vectors/transactions-fraud-latest.parquet', 'status': 'ready', 'updated': '2022-08-01T07:50:05.133998+00:00', 'size': 46679}


In [7]:
train_dataset.to_dataframe().tail(5)

Unnamed: 0,amount_max_2h,amount_sum_2h,amount_count_2h,amount_avg_2h,amount_max_12h,amount_sum_12h,amount_count_12h,amount_avg_12h,amount_max_24h,amount_sum_24h,...,gender_F,gender_M,step,amount,timestamp_hour,timestamp_day_of_week,event_details_change,event_login,event_password_change,label
495,5.3,84.8,16.0,5.3,5.3,84.8,16.0,5.3,5.3,84.8,...,1,0,14,5.3,21,1,1,0,0,0
496,5.3,90.1,17.0,5.3,5.3,90.1,17.0,5.3,5.3,90.1,...,1,0,14,5.3,21,1,0,0,1,0
497,5.3,95.4,18.0,5.3,5.3,95.4,18.0,5.3,5.3,95.4,...,1,0,14,5.3,21,1,0,1,0,0
498,5.3,100.7,19.0,5.3,5.3,100.7,19.0,5.3,5.3,100.7,...,1,0,14,5.3,21,1,0,0,1,0
499,5.3,106.0,20.0,5.3,5.3,106.0,20.0,5.3,5.3,106.0,...,1,0,14,5.3,21,1,0,0,1,0


In [8]:
# Import the Sklearn classifier function from the functions hub
classifier_fn = mlrun.import_function('hub://sklearn-classifier')

In [9]:
# Prepare the parameters list for the training function
training_params = {"model_name": 'transaction_fraud_adaboost',"model_pkg_class": 'sklearn.ensemble.AdaBoostClassifier','label_column': 'label'}

# Define the training task, including our feature vector, label and hyperparams definitions
train_task = mlrun.new_task('training', 
                      inputs={'dataset': transactions_fv.uri},
                      params=training_params
                     )


# Specify our cluster image
classifier_fn.spec.image = 'mlrun/mlrun'

# Run training
classifier__run=classifier_fn.run(train_task, local=False)

> 2022-08-01 07:50:05,453 [info] starting run training uid=d2286c188b674ee5b400cd6eec496592 DB=http://mlrun-api:8080
> 2022-08-01 07:50:05,912 [info] Job is running in the background, pod: training-4rtn7
> 2022-08-01 07:50:13,006 [info] run executed, status=completed
final state: completed


project,uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
fraud-predict-demo-shapira,...ec496592,0,Aug 01 07:50:09,completed,training,v3io_user=shapirakind=jobowner=shapiramlrun/client_version=1.1.0-rc12host=training-4rtn7class=sklearn.ensemble.AdaBoostClassifier,dataset,model_name=transaction_fraud_adaboostmodel_pkg_class=sklearn.ensemble.AdaBoostClassifierlabel_column=label,accuracy=1.0test-error=0.0rocauc=1.0brier_score=1.9205902684578608e-11f1-score=1.0precision_score=1.0recall_score=1.0,test_setprobability-calibrationconfusion-matrixfeature-importancesprecision-recall-binaryroc-binarymodel





> 2022-08-01 07:50:16,511 [info] run executed, status=completed


In [10]:
classifier__run.outputs

{'accuracy': 1.0,
 'test-error': 0.0,
 'rocauc': 1.0,
 'brier_score': 1.9205902684578608e-11,
 'f1-score': 1.0,
 'precision_score': 1.0,
 'recall_score': 1.0,
 'test_set': 'store://artifacts/fraud-predict-demo-shapira/training_test_set:d2286c188b674ee5b400cd6eec496592',
 'probability-calibration': 'v3io:///projects/fraud-predict-demo-shapira/artifacts/model/plots/training/0/probability-calibration.html',
 'confusion-matrix': 'v3io:///projects/fraud-predict-demo-shapira/artifacts/model/plots/training/0/confusion-matrix.html',
 'feature-importances': 'v3io:///projects/fraud-predict-demo-shapira/artifacts/model/plots/training/0/feature-importances.html',
 'precision-recall-binary': 'v3io:///projects/fraud-predict-demo-shapira/artifacts/model/plots/training/0/precision-recall-binary.html',
 'roc-binary': 'v3io:///projects/fraud-predict-demo-shapira/artifacts/model/plots/training/0/roc-binary.html',
 'model': 'store://artifacts/fraud-predict-demo-shapira/training_model:d2286c188b674ee5b400c

### Done!
Now you create your transactions-fraud Feature Vectore, and you trained your model and you ready to add prections steps to your feature set