In [38]:
import pandas as pd
import numpy as np

In [39]:
!mkdir -p pyml_model

In [40]:
%%writefile pyml_model/__init__.py
 

Overwriting pyml_model/__init__.py


In [41]:
%%writefile pyml_model/model_configuration.py
# Parameters to tune the model
'''
The value at folder_level defines what features we are using to train.
For example, the feature of the following files is shown as follows.

1. apps/presidio/helix/app/src/test/java/com/ubercab/presidio/app/optional/root/
main/menu/MenuStreamWithFallbackWorkerTest.java -- root

2. "apps/carbon/DriverLibraries/Core/RealtimeDriver/RealtimeDriver/TestMocks/
RealtimeDriverProtocolMocks.swift" -- RealtimeDriver

In other words, if folder_level = 4, the 4th level of directory structure is
considered as a feature for that particular file.
The feature for the files,

"apps/presidio/carbon/features/social-profiles-driver/src/main/java/com/ubercab/
social_profiles_driver/hub/SocialProfilesHubBuilder.java"
"apps/presidio/carbon/features/social-profiles-driver/src/main/java/com/ubercab/
social_profiles_driver/profile/SocialProfilesBuilder.java"
"apps/presidio/carbon/features/social-profiles-driver/src/main/java/com/ubercab/
social_profiles_driver/profile/SocialProfilesInteractor.java"
"apps/presidio/carbon/features/social-profiles-driver/src/test/java/com/ubercab/
social_profiles_driver/profile/SocialProfilesInteractorTest.java"

when folder_level=4 is ubercab.
'''
folder_level = 4
repo = ['mobile/android', 'mobile/ios', 'lm/fievel', 'go-code']  # Repo name
#repo = 'mobile/android'
p95_cutoff = {
    "mobile/android" : 65,
    "mobile/ios" : 20,
    "lm/fievel" : 20,
    "go-code" : 20,
}
train_test_split_ratio = 0.67

Overwriting pyml_model/model_configuration.py


In [42]:
# Constructing a query to get submitqueue and phab data for the last 90 days
import time
unix_timestamp_3_months_ago = (int(round(time.time() * 1000)) - 7776000000)
query = "select kafka_hp_phab_events_streamer_uci_diff_change_log_nodedup.msg.differential_id, \
kafka_hp_phab_events_streamer_uci_diff_change_log_nodedup.msg.files_changed, \
mysql_submitqueue_cigreeter_submitqueue_requests_rows.diff_id, \
mysql_submitqueue_cigreeter_submitqueue_requests_rows.revision_id, \
mysql_submitqueue_cigreeter_submitqueue_requests_rows.raw_request, \
mysql_submitqueue_cigreeter_submitqueue_requests_rows.received_timestamp, \
mysql_submitqueue_cigreeter_contexts_rows.latency_in_ms \
from rawdata_user.mysql_submitqueue_cigreeter_contexts_requests_mapping_rows \
inner join \
rawdata_user.mysql_submitqueue_cigreeter_contexts_rows \
on mysql_submitqueue_cigreeter_contexts_rows.id=mysql_submitqueue_cigreeter_contexts_requests_mapping_rows.context_id \
inner join rawdata_user.mysql_submitqueue_cigreeter_submitqueue_requests_rows \
on mysql_submitqueue_cigreeter_contexts_requests_mapping_rows.submit_queue_id=mysql_submitqueue_cigreeter_submitqueue_requests_rows.id  \
inner join rawdata_user.mysql_submitqueue_cigreeter_state_transitions_agg_rows \
on mysql_submitqueue_cigreeter_submitqueue_requests_rows.id=mysql_submitqueue_cigreeter_state_transitions_agg_rows.submit_queue_id \
inner join rawdata_user.kafka_hp_phab_events_streamer_uci_diff_change_log_nodedup \
on kafka_hp_phab_events_streamer_uci_diff_change_log_nodedup.msg.differential_id=mysql_submitqueue_cigreeter_submitqueue_requests_rows.revision_id \
where (mysql_submitqueue_cigreeter_contexts_rows.processor_id='BUILD_CHECKER' AND mysql_submitqueue_cigreeter_contexts_rows.activation_state='COMPLETE' AND mysql_submitqueue_cigreeter_state_transitions_agg_rows.state_name='SUCCEEDED' AND mysql_submitqueue_cigreeter_submitqueue_requests_rows.received_timestamp > %s) order by mysql_submitqueue_cigreeter_submitqueue_requests_rows.received_timestamp asc" %(unix_timestamp_3_months_ago) 

In [43]:
# Executing a hive presto query which returns submitqueue_data
from queryrunner_client import Client
qr_obj = Client(user_email='ramsri@uber.com')
hive_data = qr_obj.execute('presto', query)
submitqueue_raw_data = []
for item in hive_data:
    submitqueue_raw_data.append(item)

08/21/2019 06:34:32 PM [93m [Polling] e3ca7ac8-667d-4ee1-9504-2e53ab3ffe14 [0m
08/21/2019 06:34:32 PM [93m [Status] pending validation [0m
08/21/2019 06:34:33 PM [93m [Status] pending execution [0m
08/21/2019 06:34:34 PM [93m [Status] in execution [0m
08/21/2019 06:34:34 PM [93m [External ID] 20190821_183434_72125_7eftn [0m
08/21/2019 06:35:00 PM [93m [Status] finished success [0m
08/21/2019 06:35:00 PM [92m [Query Success] finished success [0m


In [44]:
%%writefile pyml_model/model_utils.py
from collections import OrderedDict

# Parses submitqueue data and provides us with list features.
def feature_list(repo, submitqueue_raw_data, folder_level):
    list_of_features = OrderedDict()
    list_of_revision_ids = []
    for item in submitqueue_raw_data:
        # Every element in submitqueue_raw_data (item) list contains a set of information specific to each request.
        # item['raw_request'] contains information of the specific repo that a particular request queries.
        # We parse item['raw_request'] to query that.
        # An example of what is present in each item['raw_request'] is given below.
        # item['raw_request'] = {"remote":"gitolite@code.uber.internal:pricing/wayfare","diffId":"8438835", \
        #                       "revisionId":"2665737","targetOnto":"master"}
        # Filtering queries belonging to a particular repo!
        if item['raw_request'].split('internal:')[1].split("\"")[0] == repo: # filtering reques based on repo
            list_of_revision_ids.append(item['differential_id'])
            # Similarly, every element in item['files_changed'] contains the list of files changes for each diff.
            # An example of list of files for a sample item['files_changed' is illustrated in the two lines below.
            # item['files_changed'] = [u'apps/eats/app_root/app/src/main/java/com/ubercab/eats/app/module/EatsExperimentModule.java', \
            # u'apps/eats/library/core/build.gradle', u'apps/eats/library/core/src/main/java/com/ubercab/eats/core/module/package-info.java',\
            # u'apps/eats/app_root/app/src/main/java/com/ubercab/eats/package-info.java', u'apps/eats/library/core/BUCK']
            for fl in item['files_changed'].split(', '):
                feature = fl.split('\'')[1]
                if len(feature.split('/')) >= folder_level:
                    list_of_features[feature.split('/')[len(feature.split('/'))-folder_level]] = 0
                else:
                    list_of_features[feature] = 0
    return list_of_features, list_of_revision_ids

# Convert raw_data into a 2D list. i.e. For each query (1st Dimension),
# list of all features that are applicable (2nd Dimension)
def create_dataframe(repo, list_of_features, submitqueue_raw_data, folder_level):
    submitqueue_data = [] # retuns  1. list of list
    latency_list = []     # returns 2. list of all latencies
    temp_vector = list_of_features.copy() # make a copy of a OrderedDict, that contains a dictionary of features

    for item in submitqueue_raw_data:
        # Every element in submitqueue_raw_data (item) list contains a set of information specific to each request.
        # item['raw_request'] contains information of the specific repo that a particular request queries.
        # We parse item['raw_request'] to query that.
        # An example of what is present in each item['raw_request'] is given below.
        # item['raw_request'] = {"remote":"gitolite@code.uber.internal:pricing/wayfare","diffId":"8438835", \
        #                       "revisionId":"2665737","targetOnto":"master"}
        # Filtering queries belonging to a particular repo!
        if item['raw_request'].split('internal:')[1].split("\"")[0] == repo:
            temp_list = []
            # Similarly, every element in item['files_changed'] contains the list of files changes for each diff.
            # An example of list of files for a sample item['files_changed' is illustrated in the two lines below.
            # item['files_changed'] = [u'apps/eats/app_root/app/src/main/java/com/ubercab/eats/app/module/EatsExperimentModule.java', \
            # u'apps/eats/library/core/build.gradle', u'apps/eats/library/core/src/main/java/com/ubercab/eats/core/module/package-info.java',\
            # u'apps/eats/app_root/app/src/main/java/com/ubercab/eats/package-info.java', u'apps/eats/library/core/BUCK']
            for fl in item['files_changed'].split(', '):
                feature = fl.split('\'')[1]
                # for every file that's changed its corresponding feature is the folder name at the folder_level.
                # if the level of folders are lesser, the use the entire filename as feature.
                if len(feature.split('/')) >= folder_level:
                    if feature.split('/')[len(feature.split('/')) - folder_level] in temp_vector.keys():
                        temp_vector[feature.split('/')[len(feature.split('/')) - folder_level]] = 1 # Enable feature in dictionary
                else:
                    if feature in temp_vector.keys():
                        temp_vector[feature] = 1

            # Dump a vector of features into a list, a list of 0s and 1s which corresponds to the features enabled and disabled.
            temp_list.extend(temp_vector.values())

            # Reinitialize the features that are enabled to 0. It is then used for ne next SQ request in the next iteration
            for fl in item['files_changed'].split(', '):
                feature = fl.split('\'')[1]
                if len(feature.split('/')) >= folder_level:
                    if feature.split('/')[len(feature.split('/')) - folder_level] in temp_vector.keys():
                        temp_vector[feature.split('/')[len(feature.split('/')) - folder_level]] = 0
                else:
                    if feature in temp_vector.keys():
                        temp_vector[feature] = 0

            # Appending latency as a feature
            temp_list.append(float(item['latency_in_ms'])/1000)
            latency_list.append(float(item['latency_in_ms'])/1000)

            # Generating SubmitQueue feature 2D vector
            submitqueue_data.append(temp_list)

            del temp_list
    return submitqueue_data, latency_list

Overwriting pyml_model/model_utils.py


In [45]:
!rm pyml_model/weights.pkl
!rm pyml_model/listfile.txt
!rm pyml_model/requirements.txt

In [46]:
%%writefile pyml_model/requirements.txt
pandas==0.18.1
scipy==0.17.1
numpy==1.16.4
scikit-learn==0.20.3
phabricator==0.7.0

Writing pyml_model/requirements.txt


In [47]:
%%writefile pyml_model/model.py
import pandas as pd
import numpy as np
import os
import time
from collections import OrderedDict
import logging
import model_configuration


from pyml.model.dataframe_model import DataFrameModel
from sklearn.externals import joblib

class SQLogisticRegressionModel(DataFrameModel):
    """In order for your model to be evaluated on data in Hive, it must
    inherit from DataFrameModel. A DataFrameModel takes input as a pandas dataframe
    and produces predictions as a Pandas Dataframe. 
    """
    def __init__(self):
        super(SQLogisticRegressionModel, self).__init__()  # Don't forget this line
        
        # All paths should be relative to the root of your model folder. 
        self.clf = joblib.load('weights.pkl')
        
        # Initialize the list of features from the file
        f = open("listfile.txt", "r")
        self.list_of_features = OrderedDict()
        for idx,x in enumerate(f):
            self.list_of_features[x.split('\n')[0]] = 0
            #self.feature_columns.append(x.split('\n')[0])
        f.close()
        
        #self.feature_columns =  map(unicode,self.feature_columns)
        self.folder_level = model_configuration.folder_level
        
        # Your code should always access dataframe fields by name and never assume
        # that the columns will arrive in a particular order. Since sklearn
        # expects columns to be in a particular order, it's useful to have a list
        # of column names in that order. 

    def predict(self, df):
        """Predict receives data from a query as a pandas dataframe and returns
        results as a pandas dataframe.
        """
        
        # Obtain a feature vector based on string
        df_list = df.values.tolist()
        commit_features = [item for sublist in df_list for item in sublist]
        
        # Convert the features into 0s and 1s, then to a dataframe
        temp_list = []
        for item in commit_features:
            if item in self.list_of_features:
                self.list_of_features[item] = 1

        temp_list.extend(self.list_of_features.values())

        for item in commit_features:
            if item in self.list_of_features:
                self.list_of_features[item] = 0 

        predict_input = []
        predict_input.append(temp_list)
        feature = pd.DataFrame(predict_input)

        op_df = pd.DataFrame(self.clf.predict(feature), columns=['job_class'])
        return op_df

Overwriting pyml_model/model.py


In [49]:
import pyml_model.model_configuration as conf
import pyml_model.model_utils as util

# Train and get best model
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn import metrics

# Print training metrics
from sklearn.metrics import confusion_matrix

# Dump model into a pkl file
from sklearn.externals import joblib

# Dump model into a pkl file
from sklearn.externals import joblib

# PyML libraries
from pyml import PyMLModel, PythonML
from phabricator import Phabricator

# Splitting submitqueue data into two parts for train test and for validation
submitqueue_raw_data_train_test = submitqueue_raw_data[0:(2*len(submitqueue_raw_data))/3]
submitqueue_raw_data_validation = submitqueue_raw_data[(2*len(submitqueue_raw_data))/3:len(submitqueue_raw_data)]

model_ids = []

for repo_name in conf.repo:
    print repo_name
    # Obtaining a list of features (i.e. folder levels)
    list_of_features, list_of_revision_ids = util.feature_list(repo_name, submitqueue_raw_data_train_test, conf.folder_level)
    #print len(list_of_features), len(list_of_revision_ids)
    
    #Get train test raw data
    submitqueue_data_train_test, latency_list = util.create_dataframe(repo_name, list_of_features, submitqueue_raw_data_train_test, conf.folder_level)
    #print len(submitqueue_data_train_test), len(latency_list)
    
    # Feature names with latency
    column_names = []
    features = []
    for item in list_of_features:
        column_names.append(item)
        features.append(item)
    column_names.append("latency")
    

    
    # get pandas dataframe for train test data. This is used by scikit learn to perform training 
    data = pd.DataFrame(submitqueue_data_train_test, columns=column_names)

    # Create a bucket for classifying SQ requests below and above p95 latencies. 
    # e.g. bins - [-1.0, 3900, 7712.439], p95 = 39000s 
    bins = [np.min(latency_list)-1, conf.p95_cutoff[repo_name]*60, \
            max(np.max(latency_list)+1, conf.p95_cutoff[repo_name]*60 + 1)]
    #print bins

    # Label for SQ requests below p95 latency - 0 
    # Label for SQ requests above p95 latency - 1
    labels = [0,1]

    # Create a bucket colunm in my dataframe that assignns corresponding labels to my SQ data.
    data["bucket"] = pd.cut(data["latency"], bins, labels=labels, right=False)

    # Feature vector 
    feature_data = data[features] #X
    # Latency vector 
    latency_bins = data["bucket"]#y
    
    # Training using Logistic Regression
    logreg = LogisticRegression( max_iter=5000) # change to 5000

    # Hyperparameters
    penalty = ['l1', 'l2']
    C = np.logspace(0, 4, 10)
    hyperparameters = dict(C=C, penalty=penalty)
    clf = GridSearchCV(logreg, hyperparameters, cv=10, n_jobs=-1, verbose=1, scoring='f1_micro')
    best_model = clf.fit(feature_data, latency_bins)
    
    joblib.dump(best_model, 'pyml_model/weights.pkl') 
    
    with open('pyml_model/listfile.txt', 'w') as filehandle:  
        for listitem in features:
            filehandle.write('%s\n' % listitem)
        
    project_id = 'ram_python_ml_model_test'
    model_name = 'sq_ml'
    pyml_model = PyMLModel(model_path="pyml_model/",
                          project_id=project_id,
                          model_name=model_name)
    
    # Create project -- afterwards it will be available at https://michelangelo.uberinternal.com/ram_python_ml_model_test
    client = PythonML(user_email="ramsri@uber.com", ublame_team="mobile-ci-compute")
    model_id = client.upload_model(pyml_model, use_ma_processor=True, enable_validation=False) 

    model_ids.append(model_id)
    
    #client.deploy_model(project_id, 
    #                    model_id,
    #                    deployment_label=repo_name,
    #                    wait_timeout=3600)
    #
    #!rm pyml_model/listfile.txt
    !rm pyml_model/weights.pkl


mobile/android
Fitting 10 folds for each of 20 candidates, totalling 200 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   14.2s
[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:   56.2s
[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed:   58.7s finished
08/21/2019 06:37:31 PM [92m 1. Archiving, Uploading, & Registering model with Michelangelo [0m
2019-08-21 18:37:31,408 pyml INFO [92m 1. Archiving, Uploading, & Registering model with Michelangelo [0m
08/21/2019 06:37:31 PM [92m 1/3 - Archiving model... [0m
2019-08-21 18:37:31,411 pyml INFO [92m 1/3 - Archiving model... [0m
08/21/2019 06:37:31 PM [92m Success! [0m
2019-08-21 18:37:31,427 pyml INFO [92m Success! [0m
08/21/2019 06:37:31 PM [92m 2/3 - Registering model... [0m
2019-08-21 18:37:31,431 pyml INFO [92m 2/3 - Registering model... [0m
08/21/2019 06:37:31 PM [92m Success! [0m
2019-08-21 18:37:31,454 pyml INFO [92m Success! [0m
08/21/2019 06:37:31 PM [92m Uploading model archive to Michelangelo

2019-08-21 18:39:17,762 pyml INFO [92m DockerBuildStatus({'status': u'STARTED', 'dockerImageId': u'phx2-produ-0000000331', 'dockerBuildStatusId': u'8f5bd92c-6ee9-4548-b5e2-c1985e630c44', 'endTime': None, 'projectId': u'ram_python_ml_model_test', 'errorMessage': None, 'tmId': u'tm20190821-183731-YEVIEFAD', 'modifiedAt': 1566412756101, 'startTime': None, 'ubuildId': u'c20055db-8f86-4fea-82f9-7694038b458f', 'logUrl': None, 'modelId': u'tm20190821-183731-YEVIEFAD-QVYTAS'}) [0m
08/21/2019 06:39:32 PM [92m DockerBuildStatus({'status': u'STARTED', 'dockerImageId': u'phx2-produ-0000000331', 'dockerBuildStatusId': u'8f5bd92c-6ee9-4548-b5e2-c1985e630c44', 'endTime': None, 'projectId': u'ram_python_ml_model_test', 'errorMessage': None, 'tmId': u'tm20190821-183731-YEVIEFAD', 'modifiedAt': 1566412756101, 'startTime': None, 'ubuildId': u'c20055db-8f86-4fea-82f9-7694038b458f', 'logUrl': None, 'modelId': u'tm20190821-183731-YEVIEFAD-QVYTAS'}) [0m
2019-08-21 18:39:32,783 pyml INFO [92m DockerBuild

2019-08-21 18:41:32,952 pyml INFO [92m DockerBuildStatus({'status': u'STARTED', 'dockerImageId': u'phx2-produ-0000000331', 'dockerBuildStatusId': u'8f5bd92c-6ee9-4548-b5e2-c1985e630c44', 'endTime': None, 'projectId': u'ram_python_ml_model_test', 'errorMessage': None, 'tmId': u'tm20190821-183731-YEVIEFAD', 'modifiedAt': 1566412877483, 'startTime': None, 'ubuildId': u'c20055db-8f86-4fea-82f9-7694038b458f', 'logUrl': None, 'modelId': u'tm20190821-183731-YEVIEFAD-QVYTAS'}) [0m
08/21/2019 06:41:47 PM [92m DockerBuildStatus({'status': u'STARTED', 'dockerImageId': u'phx2-produ-0000000331', 'dockerBuildStatusId': u'8f5bd92c-6ee9-4548-b5e2-c1985e630c44', 'endTime': None, 'projectId': u'ram_python_ml_model_test', 'errorMessage': None, 'tmId': u'tm20190821-183731-YEVIEFAD', 'modifiedAt': 1566412877483, 'startTime': None, 'ubuildId': u'c20055db-8f86-4fea-82f9-7694038b458f', 'logUrl': None, 'modelId': u'tm20190821-183731-YEVIEFAD-QVYTAS'}) [0m
2019-08-21 18:41:47,974 pyml INFO [92m DockerBuild

2019-08-21 18:43:48,123 pyml INFO [92m DockerBuildStatus({'status': u'STARTED', 'dockerImageId': u'phx2-produ-0000000331', 'dockerBuildStatusId': u'8f5bd92c-6ee9-4548-b5e2-c1985e630c44', 'endTime': None, 'projectId': u'ram_python_ml_model_test', 'errorMessage': None, 'tmId': u'tm20190821-183731-YEVIEFAD', 'modifiedAt': 1566412998450, 'startTime': None, 'ubuildId': u'c20055db-8f86-4fea-82f9-7694038b458f', 'logUrl': None, 'modelId': u'tm20190821-183731-YEVIEFAD-QVYTAS'}) [0m
08/21/2019 06:44:03 PM [92m DockerBuildStatus({'status': u'STARTED', 'dockerImageId': u'phx2-produ-0000000331', 'dockerBuildStatusId': u'8f5bd92c-6ee9-4548-b5e2-c1985e630c44', 'endTime': None, 'projectId': u'ram_python_ml_model_test', 'errorMessage': None, 'tmId': u'tm20190821-183731-YEVIEFAD', 'modifiedAt': 1566412998450, 'startTime': None, 'ubuildId': u'c20055db-8f86-4fea-82f9-7694038b458f', 'logUrl': None, 'modelId': u'tm20190821-183731-YEVIEFAD-QVYTAS'}) [0m
2019-08-21 18:44:03,136 pyml INFO [92m DockerBuild

mobile/ios
Fitting 10 folds for each of 20 candidates, totalling 200 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   17.4s
[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed:  3.9min finished
08/21/2019 06:48:50 PM [92m 1. Archiving, Uploading, & Registering model with Michelangelo [0m
2019-08-21 18:48:50,233 pyml INFO [92m 1. Archiving, Uploading, & Registering model with Michelangelo [0m
08/21/2019 06:48:50 PM [92m 1/3 - Archiving model... [0m
2019-08-21 18:48:50,237 pyml INFO [92m 1/3 - Archiving model... [0m
08/21/2019 06:48:50 PM [92m Success! [0m
2019-08-21 18:48:50,258 pyml INFO [92m Success! [0m
08/21/2019 06:48:50 PM [92m 2/3 - Registering model... [0m
2019-08-21 18:48:50,261 pyml INFO [92m 2/3 - Registering model... [0m
08/21/2019 06:48:50 PM [92m Success! [0m
2019-08-21 18:48:50,282 pyml INFO [92m Success! [0m
08/21/2019 06:48:50 PM [92m Uploading model archive to Michelangelo

08/21/2019 06:50:35 PM [92m DockerBuildStatus({'status': u'STARTED', 'dockerImageId': u'phx2-produ-0000000332', 'dockerBuildStatusId': u'df62e700-8730-4cce-af49-13f13c850ed4', 'endTime': None, 'projectId': u'ram_python_ml_model_test', 'errorMessage': None, 'tmId': u'tm20190821-184850-GHDRNJLL', 'modifiedAt': 1566413396434, 'startTime': None, 'ubuildId': u'8a833e05-4ffc-4098-9911-90d0380af521', 'logUrl': None, 'modelId': u'tm20190821-184850-GHDRNJLL-BWVXEZ'}) [0m
2019-08-21 18:50:35,829 pyml INFO [92m DockerBuildStatus({'status': u'STARTED', 'dockerImageId': u'phx2-produ-0000000332', 'dockerBuildStatusId': u'df62e700-8730-4cce-af49-13f13c850ed4', 'endTime': None, 'projectId': u'ram_python_ml_model_test', 'errorMessage': None, 'tmId': u'tm20190821-184850-GHDRNJLL', 'modifiedAt': 1566413396434, 'startTime': None, 'ubuildId': u'8a833e05-4ffc-4098-9911-90d0380af521', 'logUrl': None, 'modelId': u'tm20190821-184850-GHDRNJLL-BWVXEZ'}) [0m
08/21/2019 06:50:50 PM [92m DockerBuildStatus({'st

08/21/2019 06:52:51 PM [92m DockerBuildStatus({'status': u'STARTED', 'dockerImageId': u'phx2-produ-0000000332', 'dockerBuildStatusId': u'df62e700-8730-4cce-af49-13f13c850ed4', 'endTime': None, 'projectId': u'ram_python_ml_model_test', 'errorMessage': None, 'tmId': u'tm20190821-184850-GHDRNJLL', 'modifiedAt': 1566413517548, 'startTime': None, 'ubuildId': u'8a833e05-4ffc-4098-9911-90d0380af521', 'logUrl': None, 'modelId': u'tm20190821-184850-GHDRNJLL-BWVXEZ'}) [0m
2019-08-21 18:52:51,000 pyml INFO [92m DockerBuildStatus({'status': u'STARTED', 'dockerImageId': u'phx2-produ-0000000332', 'dockerBuildStatusId': u'df62e700-8730-4cce-af49-13f13c850ed4', 'endTime': None, 'projectId': u'ram_python_ml_model_test', 'errorMessage': None, 'tmId': u'tm20190821-184850-GHDRNJLL', 'modifiedAt': 1566413517548, 'startTime': None, 'ubuildId': u'8a833e05-4ffc-4098-9911-90d0380af521', 'logUrl': None, 'modelId': u'tm20190821-184850-GHDRNJLL-BWVXEZ'}) [0m
08/21/2019 06:53:06 PM [92m DockerBuildStatus({'st

08/21/2019 06:55:06 PM [92m DockerBuildStatus({'status': u'STARTED', 'dockerImageId': u'phx2-produ-0000000332', 'dockerBuildStatusId': u'df62e700-8730-4cce-af49-13f13c850ed4', 'endTime': None, 'projectId': u'ram_python_ml_model_test', 'errorMessage': None, 'tmId': u'tm20190821-184850-GHDRNJLL', 'modifiedAt': 1566413698381, 'startTime': None, 'ubuildId': u'8a833e05-4ffc-4098-9911-90d0380af521', 'logUrl': None, 'modelId': u'tm20190821-184850-GHDRNJLL-BWVXEZ'}) [0m
2019-08-21 18:55:06,258 pyml INFO [92m DockerBuildStatus({'status': u'STARTED', 'dockerImageId': u'phx2-produ-0000000332', 'dockerBuildStatusId': u'df62e700-8730-4cce-af49-13f13c850ed4', 'endTime': None, 'projectId': u'ram_python_ml_model_test', 'errorMessage': None, 'tmId': u'tm20190821-184850-GHDRNJLL', 'modifiedAt': 1566413698381, 'startTime': None, 'ubuildId': u'8a833e05-4ffc-4098-9911-90d0380af521', 'logUrl': None, 'modelId': u'tm20190821-184850-GHDRNJLL-BWVXEZ'}) [0m
08/21/2019 06:55:21 PM [92m DockerBuildStatus({'st

08/21/2019 06:57:06 PM [92m Updating model... [0m
2019-08-21 18:57:06,435 pyml INFO [92m Updating model... [0m
08/21/2019 06:57:06 PM [92m Model successfully updated. [0m
2019-08-21 18:57:06,517 pyml INFO [92m Model successfully updated. [0m


lm/fievel
Fitting 10 folds for each of 20 candidates, totalling 200 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   18.8s
[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed:  4.4min finished
08/21/2019 07:02:28 PM [92m 1. Archiving, Uploading, & Registering model with Michelangelo [0m
2019-08-21 19:02:28,301 pyml INFO [92m 1. Archiving, Uploading, & Registering model with Michelangelo [0m
08/21/2019 07:02:28 PM [92m 1/3 - Archiving model... [0m
2019-08-21 19:02:28,304 pyml INFO [92m 1/3 - Archiving model... [0m
08/21/2019 07:02:28 PM [92m Success! [0m
2019-08-21 19:02:28,325 pyml INFO [92m Success! [0m
08/21/2019 07:02:28 PM [92m 2/3 - Registering model... [0m
2019-08-21 19:02:28,327 pyml INFO [92m 2/3 - Registering model... [0m
08/21/2019 07:02:28 PM [92m Success! [0m
2019-08-21 19:02:28,349 pyml INFO [92m Success! [0m
08/21/2019 07:02:28 PM [92m Uploading model archive to Michelangelo

08/21/2019 07:04:13 PM [92m DockerBuildStatus({'status': u'STARTED', 'dockerImageId': u'phx2-produ-0000000333', 'dockerBuildStatusId': u'7e466d78-4e03-4dac-8642-b37e164a961a', 'endTime': None, 'projectId': u'ram_python_ml_model_test', 'errorMessage': None, 'tmId': u'tm20190821-190228-VZPIXTVG', 'modifiedAt': 1566414213850, 'startTime': None, 'ubuildId': u'21193b8a-3d9c-490e-a27e-667af3a6ff3f', 'logUrl': None, 'modelId': u'tm20190821-190228-VZPIXTVG-VFBAXW'}) [0m
2019-08-21 19:04:13,669 pyml INFO [92m DockerBuildStatus({'status': u'STARTED', 'dockerImageId': u'phx2-produ-0000000333', 'dockerBuildStatusId': u'7e466d78-4e03-4dac-8642-b37e164a961a', 'endTime': None, 'projectId': u'ram_python_ml_model_test', 'errorMessage': None, 'tmId': u'tm20190821-190228-VZPIXTVG', 'modifiedAt': 1566414213850, 'startTime': None, 'ubuildId': u'21193b8a-3d9c-490e-a27e-667af3a6ff3f', 'logUrl': None, 'modelId': u'tm20190821-190228-VZPIXTVG-VFBAXW'}) [0m
08/21/2019 07:04:28 PM [92m DockerBuildStatus({'st

08/21/2019 07:06:28 PM [92m DockerBuildStatus({'status': u'STARTED', 'dockerImageId': u'phx2-produ-0000000333', 'dockerBuildStatusId': u'7e466d78-4e03-4dac-8642-b37e164a961a', 'endTime': None, 'projectId': u'ram_python_ml_model_test', 'errorMessage': None, 'tmId': u'tm20190821-190228-VZPIXTVG', 'modifiedAt': 1566414334329, 'startTime': None, 'ubuildId': u'21193b8a-3d9c-490e-a27e-667af3a6ff3f', 'logUrl': None, 'modelId': u'tm20190821-190228-VZPIXTVG-VFBAXW'}) [0m
2019-08-21 19:06:28,836 pyml INFO [92m DockerBuildStatus({'status': u'STARTED', 'dockerImageId': u'phx2-produ-0000000333', 'dockerBuildStatusId': u'7e466d78-4e03-4dac-8642-b37e164a961a', 'endTime': None, 'projectId': u'ram_python_ml_model_test', 'errorMessage': None, 'tmId': u'tm20190821-190228-VZPIXTVG', 'modifiedAt': 1566414334329, 'startTime': None, 'ubuildId': u'21193b8a-3d9c-490e-a27e-667af3a6ff3f', 'logUrl': None, 'modelId': u'tm20190821-190228-VZPIXTVG-VFBAXW'}) [0m
08/21/2019 07:06:43 PM [92m DockerBuildStatus({'st

08/21/2019 07:08:44 PM [92m DockerBuildStatus({'status': u'SUCCESS', 'dockerImageId': u'phx2-produ-0000000333', 'dockerBuildStatusId': u'7e466d78-4e03-4dac-8642-b37e164a961a', 'endTime': None, 'projectId': u'ram_python_ml_model_test', 'errorMessage': None, 'tmId': u'tm20190821-190228-VZPIXTVG', 'modifiedAt': 1566414515319, 'startTime': None, 'ubuildId': u'21193b8a-3d9c-490e-a27e-667af3a6ff3f', 'logUrl': None, 'modelId': u'tm20190821-190228-VZPIXTVG-VFBAXW'}) [0m
2019-08-21 19:08:44,046 pyml INFO [92m DockerBuildStatus({'status': u'SUCCESS', 'dockerImageId': u'phx2-produ-0000000333', 'dockerBuildStatusId': u'7e466d78-4e03-4dac-8642-b37e164a961a', 'endTime': None, 'projectId': u'ram_python_ml_model_test', 'errorMessage': None, 'tmId': u'tm20190821-190228-VZPIXTVG', 'modifiedAt': 1566414515319, 'startTime': None, 'ubuildId': u'21193b8a-3d9c-490e-a27e-667af3a6ff3f', 'logUrl': None, 'modelId': u'tm20190821-190228-VZPIXTVG-VFBAXW'}) [0m
08/21/2019 07:08:44 PM [92m Updating model... [0m

go-code
Fitting 10 folds for each of 20 candidates, totalling 200 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   14.8s
[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed:  3.4min finished
08/21/2019 07:12:23 PM [92m 1. Archiving, Uploading, & Registering model with Michelangelo [0m
2019-08-21 19:12:23,923 pyml INFO [92m 1. Archiving, Uploading, & Registering model with Michelangelo [0m
08/21/2019 07:12:23 PM [92m 1/3 - Archiving model... [0m
2019-08-21 19:12:23,926 pyml INFO [92m 1/3 - Archiving model... [0m
08/21/2019 07:12:23 PM [92m Success! [0m
2019-08-21 19:12:23,940 pyml INFO [92m Success! [0m
08/21/2019 07:12:23 PM [92m 2/3 - Registering model... [0m
2019-08-21 19:12:23,943 pyml INFO [92m 2/3 - Registering model... [0m
08/21/2019 07:12:23 PM [92m Success! [0m
2019-08-21 19:12:23,961 pyml INFO [92m Success! [0m
08/21/2019 07:12:23 PM [92m Uploading model archive to Michelangelo

08/21/2019 07:14:09 PM [92m DockerBuildStatus({'status': u'STARTED', 'dockerImageId': u'phx2-produ-0000000334', 'dockerBuildStatusId': u'6c44cc1e-7d23-442e-a8f8-01d69befca43', 'endTime': None, 'projectId': u'ram_python_ml_model_test', 'errorMessage': None, 'tmId': u'tm20190821-191223-OMEBAFNY', 'modifiedAt': 1566414809852, 'startTime': None, 'ubuildId': u'd3198edd-be37-47bc-8bfc-ab7dc2e12aa2', 'logUrl': None, 'modelId': u'tm20190821-191223-OMEBAFNY-OGAKBE'}) [0m
2019-08-21 19:14:09,382 pyml INFO [92m DockerBuildStatus({'status': u'STARTED', 'dockerImageId': u'phx2-produ-0000000334', 'dockerBuildStatusId': u'6c44cc1e-7d23-442e-a8f8-01d69befca43', 'endTime': None, 'projectId': u'ram_python_ml_model_test', 'errorMessage': None, 'tmId': u'tm20190821-191223-OMEBAFNY', 'modifiedAt': 1566414809852, 'startTime': None, 'ubuildId': u'd3198edd-be37-47bc-8bfc-ab7dc2e12aa2', 'logUrl': None, 'modelId': u'tm20190821-191223-OMEBAFNY-OGAKBE'}) [0m
08/21/2019 07:14:24 PM [92m DockerBuildStatus({'st

08/21/2019 07:16:24 PM [92m DockerBuildStatus({'status': u'STARTED', 'dockerImageId': u'phx2-produ-0000000334', 'dockerBuildStatusId': u'6c44cc1e-7d23-442e-a8f8-01d69befca43', 'endTime': None, 'projectId': u'ram_python_ml_model_test', 'errorMessage': None, 'tmId': u'tm20190821-191223-OMEBAFNY', 'modifiedAt': 1566414930376, 'startTime': None, 'ubuildId': u'd3198edd-be37-47bc-8bfc-ab7dc2e12aa2', 'logUrl': None, 'modelId': u'tm20190821-191223-OMEBAFNY-OGAKBE'}) [0m
2019-08-21 19:16:24,555 pyml INFO [92m DockerBuildStatus({'status': u'STARTED', 'dockerImageId': u'phx2-produ-0000000334', 'dockerBuildStatusId': u'6c44cc1e-7d23-442e-a8f8-01d69befca43', 'endTime': None, 'projectId': u'ram_python_ml_model_test', 'errorMessage': None, 'tmId': u'tm20190821-191223-OMEBAFNY', 'modifiedAt': 1566414930376, 'startTime': None, 'ubuildId': u'd3198edd-be37-47bc-8bfc-ab7dc2e12aa2', 'logUrl': None, 'modelId': u'tm20190821-191223-OMEBAFNY-OGAKBE'}) [0m
08/21/2019 07:16:39 PM [92m DockerBuildStatus({'st

08/21/2019 07:18:39 PM [92m DockerBuildStatus({'status': u'SUCCESS', 'dockerImageId': u'phx2-produ-0000000334', 'dockerBuildStatusId': u'6c44cc1e-7d23-442e-a8f8-01d69befca43', 'endTime': None, 'projectId': u'ram_python_ml_model_test', 'errorMessage': None, 'tmId': u'tm20190821-191223-OMEBAFNY', 'modifiedAt': 1566415110989, 'startTime': None, 'ubuildId': u'd3198edd-be37-47bc-8bfc-ab7dc2e12aa2', 'logUrl': None, 'modelId': u'tm20190821-191223-OMEBAFNY-OGAKBE'}) [0m
2019-08-21 19:18:39,780 pyml INFO [92m DockerBuildStatus({'status': u'SUCCESS', 'dockerImageId': u'phx2-produ-0000000334', 'dockerBuildStatusId': u'6c44cc1e-7d23-442e-a8f8-01d69befca43', 'endTime': None, 'projectId': u'ram_python_ml_model_test', 'errorMessage': None, 'tmId': u'tm20190821-191223-OMEBAFNY', 'modifiedAt': 1566415110989, 'startTime': None, 'ubuildId': u'd3198edd-be37-47bc-8bfc-ab7dc2e12aa2', 'logUrl': None, 'modelId': u'tm20190821-191223-OMEBAFNY-OGAKBE'}) [0m
08/21/2019 07:18:39 PM [92m Updating model... [0m

In [50]:
f = open("model_ids.txt", "r")
project_id = 'ram_python_ml_model_test'

if len(model_ids) == 4:
    for repo_idx, repo_name in enumerate(conf.repo):
        client.deploy_model(project_id, 
            model_ids[repo_idx],
            deployment_label=repo_name,
            wait_timeout=3600)

if len(model_ids) == 4:
    for idx, model_id in enumerate(f):
        model_id_retire = model_id.split('\n')[0]
        client.retire_model(project_id, 
                            model_id_retire)


08/21/2019 07:58:00 PM [92m Sending deployment request for model tm20190821-183731-YEVIEFAD-QVYTAS... [0m
2019-08-21 19:58:00,645 pyml INFO [92m Sending deployment request for model tm20190821-183731-YEVIEFAD-QVYTAS... [0m
08/21/2019 07:58:01 PM [92m Sent! [0m
2019-08-21 19:58:01,854 pyml INFO [92m Sent! [0m
08/21/2019 07:58:01 PM [92m On average, this will take less than 10 min [0m
2019-08-21 19:58:01,857 pyml INFO [92m On average, this will take less than 10 min [0m
08/21/2019 07:58:01 PM [92m For updates on the deployment status, see UI here: https://michelangelo.uberinternal.com/ram_python_ml_model_test/models [0m
2019-08-21 19:58:01,860 pyml INFO [92m For updates on the deployment status, see UI here: https://michelangelo.uberinternal.com/ram_python_ml_model_test/models [0m
08/21/2019 07:58:01 PM [92m Deploying... [0m
2019-08-21 19:58:01,862 pyml INFO [92m Deploying... [0m
08/21/2019 07:58:01 PM [92m You can find your custom logs here: https://search.uberinter

2019-08-21 20:12:37,293 pyml INFO [92m Sending retirement request for model tm20190820-002917-PLAMURGK-IHXKCH... [0m
08/21/2019 08:12:37 PM [92m Sent! [0m
2019-08-21 20:12:37,754 pyml INFO [92m Sent! [0m
08/21/2019 08:12:37 PM [92m On average, this will take less than 20 min [0m
2019-08-21 20:12:37,757 pyml INFO [92m On average, this will take less than 20 min [0m
08/21/2019 08:12:37 PM [92m For updates on the retirement status, see UI here: https://michelangelo.uberinternal.com/ram_python_ml_model_test/models [0m
2019-08-21 20:12:37,760 pyml INFO [92m For updates on the retirement status, see UI here: https://michelangelo.uberinternal.com/ram_python_ml_model_test/models [0m
08/21/2019 08:12:37 PM [92m Started waiting for a retirement request to finish [0m
2019-08-21 20:12:37,762 pyml INFO [92m Started waiting for a retirement request to finish [0m
08/21/2019 08:14:38 PM [92m Your model has been successfully retired. [0m
2019-08-21 20:14:38,052 pyml INFO [92m Your 

In [51]:
if len(model_ids) == 4:
    !rm pyml_model/listfile.txt
    with open('model_ids.txt', 'w') as filehandle:  
        for model_id in model_ids:
            filehandle.write('%s\n' % model_id)