In [0]:
!pip3 install --quiet -U fireflyai


In [0]:
#@title #Import firefly.ai library {display-mode: "form"}

import fireflyai as firefly
from fireflyai.version import __version__
print("Firefly SDK version {}".format(__version__))

import pandas as pd
import time, os, collections, getpass, pprint, urllib3

from matplotlib import pyplot as plt
import numpy as np
import seaborn as sn


In [0]:
#@title #Login to Firefly platform 
#@markdown * Using authenticate(username, password)

USER = input("User:")
PASSWORD = getpass.getpass("Password:")
try:
    firefly.authenticate(username=USER, password=PASSWORD)
    print("{} - Login successful".format(USER))
except Exception as e:
    print(e)

In [0]:
#@title #Example classification dataset UCI Car.
#@markdown * Example of full loop using dataset "UCI_Car" 
#@markdown * It is a classification task (multi-class) 

PATH="https://raw.githubusercontent.com/NeuralAlgorithms/firefly-python-sdk/master/examples/"

data_name='car_sdk_demo'

df = pd.read_csv(PATH+data_name+".csv")

In [0]:
#@title #Uploading a Data source
#@markdown * Use firefly.Datasource.create_from_dataframe to upload a Dataframe
#@markdown * Use firefly.Datasource.create to upload a CSV file

#@markdown * wait=True ----> wait until the upload and analysis of the data is completed. 
#@markdown * wait=False -----> return immediately 

try:
    source_id = firefly.Datasource.create_from_dataframe(df=df, 
                                   data_source_name=data_name, 
                                   wait=True, skip_if_exists=True)['id']
    print ("\nYour Source ID for {} is: {}".format(data_name, source_id))
except Exception as e:
    print(e)


In [0]:
#@title #Getting the list of uploaded data sources
#@markdown * Use firefly.list_datasources() 

list_sources = firefly.Datasource.list()
sources = pd.DataFrame(list_sources['hits'])

sources[[ 'creation_date', 'data_size',  'id',
        'name', 'row_count', 'state',]].head(10)

In [0]:
#@title # Preparing a Dataset
#@markdown # In this step we will define the Target and the task-type:

data_set_name = data_name


target = 'class' #@param {type:'string'}

#@markdown # Select Machine learning task-type:

#@markdown * Regression
#@markdown * Classification
#@markdown * Anomaly Detection
#@markdown * Time-series Regression/Classification

Task_Type_select = 'firefly.enums.ProblemType.CLASSIFICATION' #@param['firefly.enums.ProblemType.REGRESSION', 'firefly.enums.ProblemType.CLASSIFICATION', 'firefly.enums.ProblemType.ANOMALY_DETECTION']
Task_Type = eval(Task_Type_select)
print ('Your selected Target (y_value) is:', target)
print ('Task Type is:', Task_Type.value)

#@markdown # Use firefly.firefly.Datasource.prepare_data()

try:
    dataset_id = firefly.Datasource.prepare_data( 
                    datasource_id=source_id, 
                    dataset_name=data_set_name,
                    target=target, 
                    # header=True,
                    problem_type=Task_Type,
                    sample_id=['car_id'],
                    retype_columns={'car_id': firefly.enums.FeatureType.CATEGORICAL},
                    wait=True, skip_if_exists=True)['id']
    print("Dataset {} id is {}".format( data_set_name, dataset_id))
except Exception as e:
    print(e)
    


In [0]:
#@title # List the datasets 
#@markdown * use firefly.Dataset.list()

list_datasets = firefly.Dataset.list()
datasets = pd.DataFrame(list_datasets['hits'])
datasets.set_index('creation_date')
datasets[[ 'creation_date',  'id', 'name',  'problem_type', 'row_count', 'state']].head(10)


In [0]:
#@title # Get list of available pipelines/estimators:
#@markdown * estimator list use firefly.Dataset.get_available_estimators()
#@markdown * estimator list use firefly.Dataset.get_available_pipeline()

from fireflyai.enums import InterpretabilityLevel as interpt
precise_estimators=firefly.Dataset.get_available_estimators(inter_level=interpt.PRECISE,id=dataset_id)
precise_pipeline=firefly.Dataset.get_available_pipeline(inter_level=interpt.PRECISE,id=dataset_id)
simple_estimators=firefly.Dataset.get_available_estimators(inter_level=interpt.EXPLAINABLE, id=dataset_id)
simple_pipeline=firefly.Dataset.get_available_pipeline(inter_level=interpt.EXPLAINABLE, id=dataset_id)

print("List of all estimators:\n {}".format([x.value for x in precise_estimators]))
print("\nA simple pipeline:\n {}".format([x.value for x in simple_pipeline]))




In [0]:
#@title # Training a model
#@markdown * Choose metric

Target_metric_selection = 'firefly.enums.TargetMetric.RECALL_MACRO' #@param['firefly.enums.TargetMetric.RECALL_MACRO', 'firefly.enums.TargetMetric.F1', 'firefly.enums.TargetMetric.F2', 'Normalized Gini', 'AUC', 'Log loss', 'Accuracy', 'MAE', 'Normalized MSE', 'Normalized RMSE', 'Normalized MAE', 'Median AE', 'R2', 'RMSPE', 'RMSLE', 'MAPE']
Target_metric = eval(Target_metric_selection)

#@markdown * Allocate training time
Training_time_in_minutes = 6 #@param {type:'number'}


#@markdown * Data partitioning 
#@markdown * Hold-out // Cross-Validation


CV_folds = 3 #@param {type:'number'}
interpretability_level_select = 'Explainable' #@param['Explainable', 'Precise']

if interpretability_level_select=='Precise':
  pipeline=precise_pipeline
  estimators=precise_estimators
  ensemble_size=5
  interpretability_level = firefly.enums.InterpretabilityLevel.PRECISE
else:
  pipeline=simple_pipeline
  estimators=simple_estimators
  ensemble_size=5
  interpretability_level = firefly.enums.InterpretabilityLevel.EXPLAINABLE


print ('The training time is:', Training_time_in_minutes)
# print ('The selected Target Metric is:', Target_metric.value)

try:
    task= firefly.Dataset.train( 
        task_name=data_name + " " + interpretability_level.name,
        estimators=estimators,
        pipeline=pipeline,
        target_metric= Target_metric,
        dataset_id=dataset_id,
        # splitting_strategy=firefly.enums.SplittingStrategy.STRATIFIED,
        notes='demo created from SDK',
        ensemble_size=ensemble_size,
        n_folds = CV_folds,
        max_models_num=None,
        interpretability_level=interpretability_level,
        timeout=Training_time_in_minutes*60, wait=True, skip_if_exists=True
    )
    task_id=task['id']
    print("Task info:")
    pprint.pprint(task.to_dict())
except Exception as e:
    print(e)

In [0]:
#@title # List tasks
#@markdown * Use firefly.Task.list()
# list tasks

list_tasks = firefly.Task.list()
tasks = pd.DataFrame(list_tasks['hits'])

tasks.columns
tasks[['creation_date', 'dataset_id',  'name', 'notes', 'problem_type',
        'state', 'target_metric']].head(10)


In [0]:
#@title List ensembles
#@markdown * Use firefly.Ensemble.list()
print("task_id for ", data_name, " is ", task_id)

#get list of ensembles
ensembles=firefly.Ensemble.list(filter_={'task_id':[task_id],'stage': ['TASK', 'REFIT'] })['hits']

n_ensembles=len(ensembles)
if (n_ensembles>=1):
    for i in range(n_ensembles):
        ensemble_id = ensembles[i]['id']
        print("Ensemble id {} created on {}".format(ensemble_id, ensembles[i]['creation_date']))



In [0]:
#@title Model sensitivity report { run: "auto" }

df=pd.DataFrame()
n=4
alg='Permutation' #@param['Permutation', 'NA value']

sens=None

while (True):
    try:
        sens = firefly.Ensemble.get_model_sensitivity_report(id=ensemble_id)
        if sens is not None:
            break
        time.sleep(5)
    except Exception as e:
        print(e)
        break
    
sensp=sens[alg]

df['features']=sensp.keys()
df['importance']=sensp.values()

df=df.sort_values(by='importance', ascending=False)

print("Top {0} important features by {1}:".format(n, alg))

pprint.pprint(df[:n].set_index('features'))


y_pos = np.arange(len(df.features))

plt.bar(y_pos, df['importance'], align='center', alpha=0.5)
plt.xticks(y_pos, df.features)
plt.ylabel('Relative importance')
plt.title('Sensitivity by {0}'.format(alg))

plt.show()

In [0]:
#@title Running predictions

#@markdown * upload predict data
df_pred = pd.read_csv(PATH+data_name+"_test.csv")
data_id=firefly.Datasource.create_from_dataframe(df=df_pred, data_source_name=data_name+"_test.csv",wait=True, skip_if_exists=True)['id']

#@markdown * run prediction using firefly.Prediction.create
predict_id=firefly.Prediction.create(ensemble_id=ensemble_id, data_id=data_id, wait=True)['id']
   

In [0]:
#@title Download a prediction

predict_results=firefly.Prediction.get(predict_id)

download_url = predict_results['result_path']

if download_url is not None:
    df_predict = pd.read_csv(download_url) 
    pprint.pprint(df_predict.head(10))


In [0]:
least_important_features=list(df.sort_values(by='importance').features[-3:])
least_important_features

In [0]:
# create a dataset without the 3 least important features

try:
    dataset_id = firefly.Datasource.prepare_data(
            datasource_id=source_id, 
            dataset_name=data_name+"_imp",
            target=target, 
            header=True, 
            not_used=least_important_features,
            wait=False, skip_if_exists=True,
            problem_type=Task_Type
#             sample_id=['buying']
    )['id']
    print(dataset_id)

except Exception as e:
    print(e)
    

In [0]:
#confsion example
conf=firefly.Ensemble.get_ensemble_confusion_matrix(id=ensemble_id)['result']

labels= conf[0]
array = conf[1:]
df_cm = pd.DataFrame(array, index = [i for i in labels],
                  columns = labels)
plt.figure(figsize = (5,5))
sn.heatmap(df_cm, fmt='3',annot=True)

## delete task / dataset / datasource by name


In [0]:
# inventory
datasources = firefly.Datasource.list(search_term=data_name)['hits']
datasets = firefly.Dataset.list(search_term=data_name)['hits']
tasks = firefly.Task.list(search_term=data_name)['hits']

print("Found {} data sources, {} datasets, {} tasks with name {}".format(len(datasources), len(datasets), len(tasks), data_name))

In [0]:
sure=input("Input 'Y' for deleting demo files and tasks:")

if sure=='Y':

  for d in datasources:
    firefly.Datasource.delete(d['id'])

  for d in datasets:
    firefly.Dataset.delete(d['id'])

  for d in tasks:
    firefly.Task.delete(d['id'])

[link text](https://)