# Установка библиoтек

In [41]:
import os
import pandas as pd
import numpy
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, TargetEncoder, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression
from sklearn.metrics import recall_score, precision_score, f1_score, roc_auc_score
import mlflow

# Загрузка и подготовка данных

In [2]:
df = pd.read_pickle('../data/clean_train_data.pkl')

In [7]:
df

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range,total_pixels,screen_size
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,2631,17,3,7,1,1,0,2,1799140,17.262677
2,563,1,0.5,1,2,1,41,0.9,145,5,...,2603,11,2,9,1,1,0,2,2167308,11.180340
3,615,1,2.5,0,0,0,10,0.8,131,6,...,2769,16,8,11,1,0,0,2,2171776,17.888544
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1411,8,2,15,1,1,0,1,1464096,8.246211
5,1859,0,0.5,1,3,0,22,0.7,164,1,...,1067,17,1,10,1,0,0,1,1660616,17.029386
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,794,1,0.5,1,0,1,2,0.8,106,6,...,668,13,4,19,1,1,0,0,2309580,13.601471
1996,1965,1,2.6,1,0,0,39,0.2,187,4,...,2032,11,10,16,1,1,1,2,1797975,14.866069
1997,1911,0,0.9,1,1,1,36,0.7,108,8,...,3057,9,1,5,1,1,0,3,1416576,9.055385
1998,1512,0,0.9,0,4,1,46,0.1,145,5,...,869,18,10,19,1,1,1,0,225120,20.591260


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1935 entries, 1 to 1999
Data columns (total 23 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   battery_power  1935 non-null   int64  
 1   blue           1935 non-null   int64  
 2   clock_speed    1935 non-null   float64
 3   dual_sim       1935 non-null   int64  
 4   fc             1935 non-null   int64  
 5   four_g         1935 non-null   int64  
 6   int_memory     1935 non-null   int64  
 7   m_dep          1935 non-null   float64
 8   mobile_wt      1935 non-null   int64  
 9   n_cores        1935 non-null   int64  
 10  pc             1935 non-null   int64  
 11  px_height      1935 non-null   int64  
 12  px_width       1935 non-null   int64  
 13  ram            1935 non-null   int64  
 14  sc_h           1935 non-null   int64  
 15  sc_w           1935 non-null   int64  
 16  talk_time      1935 non-null   int64  
 17  three_g        1935 non-null   int64  
 18  touch_screen 

In [3]:
X_train, X_test, y_train, y_test = train_test_split(df.drop('price_range', axis=1), df['price_range'], test_size=0.25, random_state=2)

In [4]:
num_features = df.select_dtypes(exclude="category").columns.drop('price_range')
cat_features = df.select_dtypes('category').columns

# Baseline-модель

In [7]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), num_features),
        ('cat', TargetEncoder(), cat_features)
])

In [10]:
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('model', RandomForestClassifier())
])
pipeline.fit(X_train, y_train)

0,1,2
,steps,"[('preprocessor', ...), ('model', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('num', ...), ('cat', ...)]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,categories,'auto'
,target_type,'auto'
,smooth,'auto'
,cv,5
,shuffle,True
,random_state,

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [13]:
estimator = pipeline.fit(X_train, y_train)
predictions = estimator.predict(X_test)

In [43]:
metrics = {}
metrics["recall"] = recall_score(y_test, predictions, average='weighted')   
metrics["precision"] = precision_score(y_test, predictions, average='weighted')
metrics["f1"] = f1_score(y_test, predictions, average='weighted')
metrics

{'recall': 0.9008264462809917,
 'precision': 0.9014126558840371,
 'f1': 0.9008680611666997}

# Mlflow

In [24]:
TARGET_HOST = "localhost"
TARGET_PORT = 5000
TRACKING_URI = f"http://{TARGET_HOST}:{TARGET_PORT}"
REGISTRY_URI = TRACKING_URI

mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_registry_uri(REGISTRY_URI)

In [None]:
EXPERIMENT_NAME = 'MobilePrice Classification'
RUN_NAME = "baseline model"
REGISTRY_MODEL_NAME = "estate_model_rf"

signature = mlflow.models.infer_signature(model_input=X_train.head(5))
input_example = X_train.head(5)
req_file = '../requirements.txt'
params_dict = pipeline.get_params()



In [None]:
experiment_id = mlflow.create_experiment(EXPERIMENT_NAME)
with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:
    run_id = run.info.run_id 
    mlflow.sklearn.log_model(pipeline, 
                             artifact_path="models",
                             signature=signature,
                             input_example=input_example,
                             pip_requirements=req_file
                             )
    mlflow.log_metrics(calc_metrics(y_test, predictions))
    mlflow.log_params(params_dict)

run = mlflow.get_run(run_id) 
assert (run.info.status =='FINISHED')

In [70]:
run_id

'2c44d7c273684abe8257e246eec8c585'

# Hовые признаки

In [30]:
X_train_fe_sklearn = X_train.copy()

In [36]:
from sklearn.preprocessing import PolynomialFeatures, KBinsDiscretizer, MinMaxScaler
pf = PolynomialFeatures(degree=2)

In [34]:
X_train_fe_sklearn

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,total_pixels,screen_size
1671,1146,0,3.0,0,5,1,57,0.4,111,5,...,583,291,19,9,11,1,1,1,42559,21.023796
975,892,1,0.5,0,0,1,47,0.4,94,1,...,1782,1241,12,3,9,1,0,1,1167210,12.369317
1455,867,1,1.5,0,0,1,57,0.1,159,4,...,724,2157,8,6,18,1,0,0,209236,10.000000
659,966,1,0.6,0,9,1,50,0.2,117,4,...,1754,2574,10,1,5,1,0,1,2536284,10.049876
1398,1540,0,0.7,1,0,1,29,0.1,157,7,...,831,1161,11,8,5,1,0,0,264258,13.601471
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1610,1793,0,2.7,0,12,1,44,0.7,175,5,...,1459,2803,19,9,6,1,1,1,955645,21.023796
1661,1902,0,0.5,0,10,0,13,0.2,155,4,...,877,940,14,10,15,1,1,0,671782,17.204651
509,1872,1,2.3,0,6,0,44,0.7,134,3,...,1259,1955,9,4,15,1,0,1,577881,9.848858
543,1590,0,1.9,1,3,1,21,1.0,160,5,...,963,1701,11,4,4,1,0,1,885960,11.704700


In [35]:
numeric_for_poly = ['battery_power', 'ram', 'px_height']
numeric_for_bins = ['int_memory', 'mobile_wt', 'clock_speed']

In [37]:
pf_pipeline = Pipeline(steps=[
    ('poly', pf),
    ('scale', StandardScaler())
])
preprocessor_sklearn = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), num_features),
        ('cat', TargetEncoder(), cat_features),
        ('poly', pf_pipeline, numeric_for_poly), 
        ('binned_features', KBinsDiscretizer(n_bins=5, encode='ordinal', strategy='quantile'), numeric_for_bins)
    ],
    remainder='drop',
    ) 
X_train_fe_sklearn[numeric_for_poly] = X_train_fe_sklearn[numeric_for_poly].astype('float128')

In [38]:
X_train_fe_sklearn = preprocessor_sklearn.fit_transform(X_train_fe_sklearn, y_train)



In [39]:
feature_names = preprocessor_sklearn.get_feature_names_out()
feature_names_file = '../mlflow/new_feature_cols.txt'

with open(feature_names_file, 'w') as f:
    for name in feature_names:
        f.write(name + '\n')

In [40]:
pipeline_new_features = Pipeline([
    ('preprocessor', preprocessor_sklearn),
    ('classifier', RandomForestClassifier(random_state=42))
])

In [44]:
RUN_NAME = "new_features"
EXPERIMENT_NAME = 'MobilePrice Classification'

mlflow.set_experiment(EXPERIMENT_NAME)

with mlflow.start_run(run_name=RUN_NAME):

    mlflow.log_params({
        'poly_features': numeric_for_poly,
        'binned_features': numeric_for_bins,
        'poly_degree': 2,
        'kbins_n_bins': 5,
        'kbins_strategy': 'quantile'
    })
    
    pipeline_new_features.fit(X_train, y_train)
    
    y_pred = pipeline_new_features.predict(X_test)
    
    mlflow.log_metric("precision", precision_score(y_test, y_pred, average='weighted'))
    mlflow.log_metric("recall", recall_score(y_test, y_pred, average='weighted'))
    mlflow.log_metric("f1", f1_score(y_test, y_pred, average='weighted'))
    
    signature = mlflow.models.infer_signature(model_input=X_train.head(5))
    input_example = X_train.head(5)
    
    mlflow.sklearn.log_model(
        sk_model=pipeline_new_features,
        artifact_path="model",
        registered_model_name="2 version",
        signature=signature,
        input_example=input_example
    )
    
    mlflow.log_artifact(feature_names_file)
    
    if os.path.exists(req_file):
        mlflow.log_artifact(req_file)

Successfully registered model '2 version'.
2025/10/30 20:02:40 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: 2 version, version 1
Created version '1' of model '2 version'.
2025/10/30 20:02:40 INFO mlflow.tracking._tracking_service.client: 🏃 View run new_features at: http://localhost:5000/#/experiments/1/runs/1573193aeac548d1adf21381d53af32b.
2025/10/30 20:02:40 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/1.


# Отбор наиболее важных признаков

In [55]:
from mlxtend.feature_selection import SequentialFeatureSelector

total_features = X_train_fe_sklearn.shape[1]
n_features_to_select = int(total_features * 0.4)  # 40% от общего количества

classifier_main_features = RandomForestClassifier(n_estimators=50, random_state=42)
selector = SequentialFeatureSelector(
    classifier_main_features, 
    k_features=n_features_to_select,
    forward=True,
)

selector.fit(X_train_fe_sklearn, y_train)

selected_indices = list(selector.k_feature_idx_)
selected_feature_names = [feature_names[i] for i in selected_indices]

main_features_file = '../mlflow/main_features.txt'

with open(main_features_file, 'w') as f:
    print(*selected_indices, sep=',', file=f)
    print(*selected_feature_names, sep=',', file=f)

print('main features:', *selected_feature_names)

from sklearn.base import BaseEstimator, TransformerMixin

class FeatureSelector(BaseEstimator, TransformerMixin):
    def __init__(self, feature_indices):
        self.feature_indices = feature_indices
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        return X[:, self.feature_indices]

pipeline_main_features = Pipeline([
    ('preprocessor', preprocessor_sklearn),  
    ('selection', FeatureSelector(selected_indices)),
    ('classifier', RandomForestClassifier(random_state=42))
])

estimator = pipeline_main_features.fit(X_train, y_train)
predictions = estimator.predict(X_test) 

RUN_NAME = "main_features"
EXPERIMENT_NAME = 'MobilePrice Classification'

mlflow.set_experiment(EXPERIMENT_NAME)

with mlflow.start_run(run_name=RUN_NAME):
    mlflow.log_params({
        'feature_selection_method': 'SequentialFeatureSelector',
        'selection_direction': 'forward', 
        'n_features_to_select': n_features_to_select,
        'total_features': total_features,
        'selected_features_count': len(selected_indices),
        'selection_percentage': round(len(selected_indices)/total_features, 3)
    })
    
    mlflow.log_artifact(main_features_file)
     
    mlflow.log_metric("precision", precision_score(y_test, predictions, average='weighted'))
    mlflow.log_metric("recall", recall_score(y_test, predictions, average='weighted'))
    mlflow.log_metric("f1", f1_score(y_test, predictions, average='weighted'))
    
    input_example = X_train.head(5) 
    
    signature = mlflow.models.infer_signature(model_input=input_example)
    
    mlflow.sklearn.log_model(
        sk_model=pipeline_main_features,
        artifact_path="model",
        registered_model_name="3_version",
        signature=signature,
        input_example=input_example
    )

main features: num__four_g num__int_memory num__mobile_wt num__px_width num__ram num__touch_screen num__total_pixels poly__1 poly__battery_power poly__ram poly__battery_power ram poly__battery_power px_height poly__ram px_height binned_features__clock_speed


Successfully registered model '3_version'.
2025/10/30 21:31:03 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: 3_version, version 1
Created version '1' of model '3_version'.
2025/10/30 21:31:03 INFO mlflow.tracking._tracking_service.client: 🏃 View run main_features at: http://localhost:5000/#/experiments/1/runs/fad20cd47c3947cfb407690257ff0746.
2025/10/30 21:31:03 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/1.


# Настройка параметров для лучшей модели

In [61]:
from sklearn.model_selection import  GridSearchCV
from sklearn.metrics import make_scorer

param_grid = {
    'classifier__n_estimators': [50, 100], #, 200, 300], 
    'classifier__max_depth': [None, 10, 15, 20, 25, 30],
    'classifier__max_features': [i/10 for i in range(1,10)],
}

gs = GridSearchCV(
    pipeline_main_features, 
    param_grid, 
    cv=3,
    scoring=make_scorer(f1_score, average='binary')
)
X_train_fe_sklearn = X_train.copy()
gs.fit(X_train_fe_sklearn, y_train)
print("Лучшие гиперпараметры:", gs.best_params_)
print("Лучшее значение f1-score:", gs.best_score_)
print("Лучшая модель:", gs.best_estimator_)

Traceback (most recent call last):
  File "/home/nika/iis/.venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 942, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
  File "/home/nika/iis/.venv/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 308, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "/home/nika/iis/.venv/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 408, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "/home/nika/iis/.venv/lib/python3.10/site-packages/sklearn/utils/_param_validation.py", line 218, in wrapper
    return func(*args, **kwargs)
  File "/home/nika/iis/.venv/lib/python3.10/site-packages/sklearn/metrics/_classification.py", line 1465, in f1_score
    return fbeta_score(
  File "/home/nika/iis/.venv/lib/python3.10/site-packages/sklearn/utils/_param_validation.py", line 191, in wrapper


Лучшие гиперпараметры: {'classifier__max_depth': None, 'classifier__max_features': 0.1, 'classifier__n_estimators': 50}
Лучшее значение f1-score: nan
Лучшая модель: Pipeline(steps=[('preprocessor',
                 ColumnTransformer(transformers=[('num', StandardScaler(),
                                                  Index(['battery_power', 'blue', 'clock_speed', 'dual_sim', 'fc', 'four_g',
       'int_memory', 'm_dep', 'mobile_wt', 'n_cores', 'pc', 'px_height',
       'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time', 'three_g',
       'touch_screen', 'wifi', 'total_pixels', 'screen_size'],
      dtype='object')),
                                                 ('cat', Targ...
                                                                   PolynomialFeatures()),
                                                                  ('scale',
                                                                   StandardScaler())]),
                                                  ['batte

In [62]:
print("Лучшие гиперпараметры:", gs.best_params_)
print("Лучшее значение f1-score:", gs.best_score_)
print("Лучшая модель:", gs.best_estimator_)

Лучшие гиперпараметры: {'classifier__max_depth': None, 'classifier__max_features': 0.1, 'classifier__n_estimators': 50}
Лучшее значение f1-score: nan
Лучшая модель: Pipeline(steps=[('preprocessor',
                 ColumnTransformer(transformers=[('num', StandardScaler(),
                                                  Index(['battery_power', 'blue', 'clock_speed', 'dual_sim', 'fc', 'four_g',
       'int_memory', 'm_dep', 'mobile_wt', 'n_cores', 'pc', 'px_height',
       'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time', 'three_g',
       'touch_screen', 'wifi', 'total_pixels', 'screen_size'],
      dtype='object')),
                                                 ('cat', Targ...
                                                                   PolynomialFeatures()),
                                                                  ('scale',
                                                                   StandardScaler())]),
                                                  ['batte

In [63]:
classifier_optimized = RandomForestClassifier(max_depth=None, max_features=0.1, n_estimators=50)

pipeline_optimized = Pipeline([
    ('preprocessor', ColumnTransformer([
        (
            'numeric', StandardScaler(),
            X_train_fe_sklearn.select_dtypes(exclude='category').columns
        ),
        ('categorical', TargetEncoder(), cat_features)])),
    ('selection', selector),
    ('classifier', classifier_optimized)
])
estimator = pipeline_optimized.fit(X_train_fe_sklearn, y_train)
display(estimator)
X_test_fe_sklearn = X_test.copy()
predictions = estimator.predict(X_test_fe_sklearn)

0,1,2
,steps,"[('preprocessor', ...), ('selection', ...), ...]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('numeric', ...), ('categorical', ...)]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,categories,'auto'
,target_type,'auto'
,smooth,'auto'
,cv,5
,shuffle,True
,random_state,

0,1,2
,estimator,RandomForestC...ndom_state=42)
,k_features,"(14, ...)"
,forward,True
,floating,False
,verbose,0
,scoring,'accuracy'
,cv,5
,n_jobs,1
,pre_dispatch,'2*n_jobs'
,clone_estimator,True

0,1,2
,n_estimators,50
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True

0,1,2
,n_estimators,50
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,0.1
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [64]:
RUN_NAME = "best hyperparams"
input_example = X_train_fe_sklearn.head(5)
signature = mlflow.models.infer_signature(model_input=input_example)

params_dict = pipeline_main_features.get_params()

experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id

with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:
    run_id = run.info.run_id 
    mlflow.sklearn.log_model(pipeline_main_features, 
                             artifact_path="models",
                             signature=signature,
                             input_example=input_example,
                             pip_requirements=req_file
                             )
    mlflow.log_metrics(calc_metrics(y_test, predictions))
    mlflow.log_params(params_dict)

run = mlflow.get_run(run_id) 
assert (run.info.status =='FINISHED')

2025/10/30 22:16:06 INFO mlflow.tracking._tracking_service.client: 🏃 View run best hyperparams at: http://localhost:5000/#/experiments/1/runs/e70c64bc504a4daabb2d5734f16a4c0b.
2025/10/30 22:16:06 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/1.


# Обучение лучшей модели на всей выборке

In [65]:
X_fe_sklearn = pd.concat([X_train_fe_sklearn, X_test_fe_sklearn])
y = pd.concat([y_train, y_test])

In [66]:
pipeline_all_data= Pipeline([
    ('preprocessor', ColumnTransformer([
        (
            'numeric', StandardScaler(),
            X_fe_sklearn.select_dtypes(exclude='category').columns
        ),
        ('categorical', TargetEncoder(), cat_features)])),
    ('selection', selector),
    ('classifier', classifier_main_features)
])

estimator = pipeline_main_features.fit(X_fe_sklearn, y)
display(estimator)



0,1,2
,steps,"[('preprocessor', ...), ('selection', ...), ...]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('num', ...), ('cat', ...), ...]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,categories,'auto'
,target_type,'auto'
,smooth,'auto'
,cv,5
,shuffle,True
,random_state,

0,1,2
,degree,2
,interaction_only,False
,include_bias,True
,order,'C'

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,n_bins,5
,encode,'ordinal'
,strategy,'quantile'
,quantile_method,'warn'
,dtype,
,subsample,200000
,random_state,

0,1,2
,feature_indices,"[5, 6, ...]"

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [69]:
RUN_NAME = "all_dataset"
input_example = X_fe_sklearn.head(5)
signature = mlflow.models.infer_signature(model_input=input_example)

params_dict = pipeline_all_data.get_params()

experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id

with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:
    run_id = run.info.run_id 
    mlflow.sklearn.log_model(pipeline_all_data, 
                             artifact_path="models",
                             signature=signature,
                             input_example=input_example,
                             pip_requirements=req_file
                             )
    mlflow.log_artifact('../mlflow/main_features.txt')
    mlflow.log_params(params_dict)

run = mlflow.get_run(run_id) 
assert (run.info.status =='FINISHED')

  "dataframe_split": {
    "columns": [
      "battery_power",
      "blue",
      "clock_speed",
      "dual_sim",
      "fc",
      "four_g",
      "int_memory",
      "m_dep",
      "mobile_wt",
      "n_cores",
      "pc",
      "px_height",
      "px_width",
      "ram",
      "sc_h",
      "sc_w",
      "talk_time",
      "three_g",
      "touch_screen",
      "wifi",
      "total_pixels",
      "screen_size"
    ],
    "data": [
      [
        1146,
        0,
        3.0,
        0,
        5,
        1,
        57,
        0.4,
        111,
        5,
        10,
        73,
        583,
        291,
        19,
        9,
        11,
        1,
        1,
        1,
        42559,
        21.02379604162864
      ],
      [
        892,
        1,
        0.5,
        0,
        0,
        1,
        47,
        0.4,
        94,
        1,
        18,
        655,
        1782,
        1241,
        12,
        3,
        9,
        1,
        0,
        1,
        1167210,
 