In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pickle

from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor

import tensorflow as tf

In [3]:
COLUMNNS_FOR_ML = ['log_delta','log_followers','conversion',
                   'log_revenue','log_brand_appearance','log_avg_price',
                   'artisanal', 'b_corporation', 'bio', 'biodegradable',
                   'cadeau_ideal', 'concept_original', 'durable',
                   'eco_friendly','excellent_sur_yuka', 'exclusivite_choose',
                   'fabrication_a_la_demande', 'fait_main', 'gluten_free',
                   'iconique', 'inclusive', 'innovation', 'made_in_europe',
                   'made_in_france', 'madeinjapan', 'naturel', 'oeko_tex',
                   'premium', 'recyclable', 'saint_valentin', 'savoir_faire',
                   'seconde_main', 'socialement_engagee', 'serie_limitee',
                   'tendance', 'upcycling', 'vegan', 'vintage', 'zerodechet',
                   'category_sale',
                   'log_monetary', 'log_frequency','log_recency',
                   'category_1','category_2', 'category_3'
                   ]

In [4]:
scored_test_data = pd.read_csv('training_preparation/preped_test_data.csv', index_col=0).dropna().reset_index(drop=True)
scored_train_data = pd.read_csv('training_preparation/preped_train_data.csv', index_col=0).dropna().reset_index(drop=True)
all_scored_data = pd.concat([scored_test_data,scored_train_data], axis=0).reset_index(drop=True)

In [5]:
X_train = scored_train_data[COLUMNNS_FOR_ML].values
y_train = scored_train_data['interaction']
Y_train = np.ravel(y_train)

X_total = all_scored_data[COLUMNNS_FOR_ML].values
y_total = all_scored_data['interaction']

X_test = scored_test_data[COLUMNNS_FOR_ML].values
y_test = scored_test_data['interaction']
Y_test = np.ravel(y_test)

# Grid search

In [6]:
from sklearn.model_selection import GridSearchCV
import math

def gs_regression(model, par) :
    gs = GridSearchCV(model, par,cv=3,scoring ='neg_mean_absolute_error', verbose=10) 
    gs = gs.fit(X_train,y_train)

    #summarize the results of your GRIDSEARCH
    print('***GRIDSEARCH RESULTS***')
    print("Best score: %f using %s" % (gs.best_score_, gs.best_params_))
    means = gs.cv_results_['mean_test_score']
    stds = gs.cv_results_['std_test_score']
    params = gs.cv_results_['params']
    #for mean, stdev, param in zip(means, stds, params):
    #    print("%f (%f) with: %r" % (mean, stdev, param))
    
    y_pred_train=gs.predict(X_train)
    y_pred_test=gs.predict(X_test) 
    
    y_train_exp=y_train.apply(lambda x: math.exp(x)-1)
    y_test_exp=y_test.apply(lambda x: math.exp(x)-1)
    y_pred_train_exp=np.exp(y_pred_train)-1
    y_pred_test_exp=np.exp(y_pred_test)-1
    
            
    from sklearn import metrics
    print()
    print("MAE  train %.3f (%f)  test %.3f (%f)" % (metrics.mean_absolute_error(y_train, y_pred_train), metrics.mean_absolute_error(y_train_exp, y_pred_train_exp) ,metrics.mean_absolute_error(y_test, y_pred_test),  metrics.mean_absolute_error(y_test_exp, y_pred_test_exp)  ) )
    print("MSE  train %.3f              test %.3f" % (metrics.mean_squared_error(y_train, y_pred_train), metrics.mean_squared_error(y_test, y_pred_test)) ) 
    print("RMSE train %.3f              test %.3f" % (np.sqrt(metrics.mean_squared_error(y_train, y_pred_train)), np.sqrt(metrics.mean_squared_error(y_test, y_pred_test))) ) 
    print("r2   train %.3f              test %.3f" % (metrics.r2_score(y_train, y_pred_train), metrics.r2_score(y_test, y_pred_test)) )

# Linear regression

In [13]:
from sklearn.linear_model import LinearRegression

regressor = LinearRegression() 
parameters = {}

gs_regression(regressor, parameters)

Fitting 3 folds for each of 1 candidates, totalling 3 fits
[CV 1/3; 1/1] START ............................................................
[CV 1/3; 1/1] END ............................., score=-0.421 total time=   2.5s
[CV 2/3; 1/1] START ............................................................
[CV 2/3; 1/1] END ............................., score=-0.421 total time=   2.6s
[CV 3/3; 1/1] START ............................................................
[CV 3/3; 1/1] END ............................., score=-0.421 total time=   3.3s
***GRIDSEARCH RESULTS***
Best score: -0.420915 using {}

MAE  train 0.421 (0.729379)  test 0.424 (0.728327)
MSE  train 0.209              test 0.210
RMSE train 0.457              test 0.459
r2   train 0.163              test 0.159


In [None]:
from sklearn.linear_model import LinearRegression #Ordinary Least Squares
regr = LinearRegression()

# Train the model using the training sets
regr.fit(X_train, y_train)

LinearRegression()

In [30]:
pickle.dump(regr, open('regr_model.pkl', 'wb'))

# Ridge Regression

In [11]:
from sklearn.linear_model import Ridge

regressor = Ridge()
parameters = {"alpha": [0.001,0.01,0.1,1,10]}

gs_regression(regressor, parameters)

Fitting 3 folds for each of 5 candidates, totalling 15 fits
[CV 1/3; 1/5] START alpha=0.001.................................................
[CV 1/3; 1/5] END .................alpha=0.001;, score=-0.416 total time=   0.3s
[CV 2/3; 1/5] START alpha=0.001.................................................
[CV 2/3; 1/5] END .................alpha=0.001;, score=-0.416 total time=   0.3s
[CV 3/3; 1/5] START alpha=0.001.................................................
[CV 3/3; 1/5] END .................alpha=0.001;, score=-0.416 total time=   0.3s
[CV 1/3; 2/5] START alpha=0.01..................................................
[CV 1/3; 2/5] END ..................alpha=0.01;, score=-0.416 total time=   0.4s
[CV 2/3; 2/5] START alpha=0.01..................................................
[CV 2/3; 2/5] END ..................alpha=0.01;, score=-0.416 total time=   0.5s
[CV 3/3; 2/5] START alpha=0.01..................................................
[CV 3/3; 2/5] END ..................alpha=0.01;, 

In [31]:
from sklearn.linear_model import Ridge
ridge = Ridge(alpha=0.001)
# Train the model using the training sets
ridge.fit(X_train, y_train)

Ridge(alpha=0.001)

In [32]:
pickle.dump(ridge, open('ridge_model.pkl', 'wb'))

# Lasso

In [18]:
from sklearn.linear_model import Lasso

regressor = Lasso()
parameters = {"alpha": [0.001,0.002,0.005,0.007]}

gs_regression(regressor, parameters)

Fitting 3 folds for each of 4 candidates, totalling 12 fits
[CV 1/3; 1/4] START alpha=0.001.................................................
[CV 1/3; 1/4] END .................alpha=0.001;, score=-0.424 total time=   1.5s
[CV 2/3; 1/4] START alpha=0.001.................................................
[CV 2/3; 1/4] END .................alpha=0.001;, score=-0.424 total time=   1.3s
[CV 3/3; 1/4] START alpha=0.001.................................................
[CV 3/3; 1/4] END .................alpha=0.001;, score=-0.424 total time=   1.1s
[CV 1/3; 2/4] START alpha=0.002.................................................
[CV 1/3; 2/4] END .................alpha=0.002;, score=-0.424 total time=   1.2s
[CV 2/3; 2/4] START alpha=0.002.................................................
[CV 2/3; 2/4] END .................alpha=0.002;, score=-0.425 total time=   1.1s
[CV 3/3; 2/4] START alpha=0.002.................................................
[CV 3/3; 2/4] END .................alpha=0.002;, 

In [33]:
from sklearn.linear_model import Lasso
lasso = Lasso(alpha=0.001)
# Train the model using the training sets
lasso.fit(X_train, y_train)

Lasso(alpha=0.001)

In [34]:
pickle.dump(lasso, open('lasso_model.pkl', 'wb'))

# KNN

In [14]:
from sklearn.neighbors import KNeighborsRegressor

regressor = KNeighborsRegressor()

parameters = {'n_neighbors': np.arange(20,50,10),
              'p': [1,2]
            }

gs_regression(regressor, parameters)

Fitting 3 folds for each of 6 candidates, totalling 18 fits
[CV 1/3; 1/6] START n_neighbors=20, p=1.........................................


# Random Forest

In [7]:
from sklearn.ensemble import RandomForestRegressor 

regressor = RandomForestRegressor()
parameters = {"n_estimators":[100], "criterion": ['mse'], 
              "min_samples_leaf": [5,10,15], "random_state" : [42]}

gs_regression(regressor, parameters)

Fitting 3 folds for each of 3 candidates, totalling 9 fits
[CV 1/3; 1/3] START criterion=mse, min_samples_leaf=5, n_estimators=100, random_state=42


  warn(


[CV 1/3; 1/3] END criterion=mse, min_samples_leaf=5, n_estimators=100, random_state=42;, score=-0.364 total time=12.7min
[CV 2/3; 1/3] START criterion=mse, min_samples_leaf=5, n_estimators=100, random_state=42


  warn(


[CV 2/3; 1/3] END criterion=mse, min_samples_leaf=5, n_estimators=100, random_state=42;, score=-0.364 total time=200.3min
[CV 3/3; 1/3] START criterion=mse, min_samples_leaf=5, n_estimators=100, random_state=42


  warn(


[CV 3/3; 1/3] END criterion=mse, min_samples_leaf=5, n_estimators=100, random_state=42;, score=-0.365 total time=213.4min
[CV 1/3; 2/3] START criterion=mse, min_samples_leaf=10, n_estimators=100, random_state=42


  warn(


[CV 1/3; 2/3] END criterion=mse, min_samples_leaf=10, n_estimators=100, random_state=42;, score=-0.367 total time=166.6min
[CV 2/3; 2/3] START criterion=mse, min_samples_leaf=10, n_estimators=100, random_state=42


  warn(


[CV 2/3; 2/3] END criterion=mse, min_samples_leaf=10, n_estimators=100, random_state=42;, score=-0.367 total time=205.6min
[CV 3/3; 2/3] START criterion=mse, min_samples_leaf=10, n_estimators=100, random_state=42


  warn(


[CV 3/3; 2/3] END criterion=mse, min_samples_leaf=10, n_estimators=100, random_state=42;, score=-0.367 total time=151.5min
[CV 1/3; 3/3] START criterion=mse, min_samples_leaf=15, n_estimators=100, random_state=42


  warn(


[CV 1/3; 3/3] END criterion=mse, min_samples_leaf=15, n_estimators=100, random_state=42;, score=-0.369 total time=19.7min
[CV 2/3; 3/3] START criterion=mse, min_samples_leaf=15, n_estimators=100, random_state=42


  warn(


[CV 2/3; 3/3] END criterion=mse, min_samples_leaf=15, n_estimators=100, random_state=42;, score=-0.369 total time=12.0min
[CV 3/3; 3/3] START criterion=mse, min_samples_leaf=15, n_estimators=100, random_state=42


  warn(


[CV 3/3; 3/3] END criterion=mse, min_samples_leaf=15, n_estimators=100, random_state=42;, score=-0.370 total time=12.1min


  warn(


***GRIDSEARCH RESULTS***
Best score: -0.364455 using {'criterion': 'mse', 'min_samples_leaf': 5, 'n_estimators': 100, 'random_state': 42}

MAE  train 0.288 (0.508912)  test 0.415 (0.721468)
MSE  train 0.119              test 0.205
RMSE train 0.345              test 0.453
r2   train 0.524              test 0.181


In [8]:
from sklearn.ensemble import RandomForestRegressor 

RFR = RandomForestRegressor(criterion= 'mse', min_samples_leaf= 5, n_estimators= 100, random_state= 42)
RFR.fit (X_train,y_train)

  warn(


RandomForestRegressor(criterion='mse', min_samples_leaf=5, random_state=42)

In [9]:
pickle.dump(RFR, open('RFR_model.pkl', 'wb'))

# Support verctor regressor

In [25]:
from sklearn.svm import SVR

regressor = SVR()
parameters = {'C': [0.1,10,1000],
             'epsilon': [0.01,1],
             'gamma':['auto'],
             'kernel': ['linear','poly'],
             'degree': [2,3]
             }

gs_regression(regressor, parameters)

Fitting 3 folds for each of 24 candidates, totalling 72 fits
[CV 1/3; 1/24] START C=0.1, degree=2, epsilon=0.01, gamma=auto, kernel=linear...


In [20]:
from sklearn.svm import SVR
svr = SVR(C=0.1, epsilon=0.01, gamma='auto')
svr.fit (X_train,y_train)

NameError: name 'Y_total' is not defined

In [None]:
pickle.dump(svr, open('svr_model.pkl', 'wb'))

# Neural networks

In [9]:
from sklearn.neural_network import MLPRegressor

regressor = MLPRegressor(random_state=0)
parameters = {'hidden_layer_sizes': [(20, 20, 20, 10, 10),(20, 20, 10, 10, 10),(20,10,5,10), (20,10,5,10), (20,10,10,10),(20,20,10,10), (20,5,5)],
              'solver' : ['sgd'],
              'batch_size': [20],
              'learning_rate' : ['constant'],
              'alpha':[0.1],
              'max_iter':[10000]}

gs_regression(regressor, parameters)

Fitting 3 folds for each of 7 candidates, totalling 21 fits
[CV 1/3; 1/7] START alpha=0.1, batch_size=20, hidden_layer_sizes=(20, 20, 20, 10, 10), learning_rate=constant, max_iter=10000, solver=sgd
[CV 1/3; 1/7] END alpha=0.1, batch_size=20, hidden_layer_sizes=(20, 20, 20, 10, 10), learning_rate=constant, max_iter=10000, solver=sgd;, score=-0.417 total time= 2.9min
[CV 2/3; 1/7] START alpha=0.1, batch_size=20, hidden_layer_sizes=(20, 20, 20, 10, 10), learning_rate=constant, max_iter=10000, solver=sgd
[CV 2/3; 1/7] END alpha=0.1, batch_size=20, hidden_layer_sizes=(20, 20, 20, 10, 10), learning_rate=constant, max_iter=10000, solver=sgd;, score=-0.418 total time= 2.8min
[CV 3/3; 1/7] START alpha=0.1, batch_size=20, hidden_layer_sizes=(20, 20, 20, 10, 10), learning_rate=constant, max_iter=10000, solver=sgd
[CV 3/3; 1/7] END alpha=0.1, batch_size=20, hidden_layer_sizes=(20, 20, 20, 10, 10), learning_rate=constant, max_iter=10000, solver=sgd;, score=-0.416 total time= 2.7min
[CV 1/3; 2/7] ST

In [35]:
from sklearn.neural_network import MLPRegressor
MLPR = MLPRegressor(alpha= 0.1, batch_size=20, hidden_layer_sizes= (20, 20, 10, 10), max_iter= 10000, solver= 'sgd')
MLPR.fit (X_train,y_train)

MLPRegressor(alpha=0.1, batch_size=20, hidden_layer_sizes=(20, 20, 10, 10),
             max_iter=10000, solver='sgd')

In [36]:
pickle.dump(MLPR, open('MLPR_model.pkl', 'wb'))

# Adaboost

In [10]:
from sklearn.ensemble import AdaBoostRegressor

regressor = AdaBoostRegressor() # base_estimator=DecisionTreeRegressor(max_depth=3)
parameters = {"n_estimators":[5,10,20,30], "learning_rate":[0.2, 0.1,0.5,0.3], 
              "loss": ['linear'], "random_state" : [0]}

gs_regression(regressor, parameters)

Fitting 3 folds for each of 16 candidates, totalling 48 fits
[CV 1/3; 1/16] START learning_rate=0.2, loss=linear, n_estimators=5, random_state=0
[CV 1/3; 1/16] END learning_rate=0.2, loss=linear, n_estimators=5, random_state=0;, score=-0.422 total time=  20.1s
[CV 2/3; 1/16] START learning_rate=0.2, loss=linear, n_estimators=5, random_state=0
[CV 2/3; 1/16] END learning_rate=0.2, loss=linear, n_estimators=5, random_state=0;, score=-0.422 total time=  18.6s
[CV 3/3; 1/16] START learning_rate=0.2, loss=linear, n_estimators=5, random_state=0
[CV 3/3; 1/16] END learning_rate=0.2, loss=linear, n_estimators=5, random_state=0;, score=-0.423 total time=  19.6s
[CV 1/3; 2/16] START learning_rate=0.2, loss=linear, n_estimators=10, random_state=0
[CV 1/3; 2/16] END learning_rate=0.2, loss=linear, n_estimators=10, random_state=0;, score=-0.423 total time=  35.6s
[CV 2/3; 2/16] START learning_rate=0.2, loss=linear, n_estimators=10, random_state=0
[CV 2/3; 2/16] END learning_rate=0.2, loss=linear, n

In [37]:
from sklearn.ensemble import AdaBoostRegressor

Ada = AdaBoostRegressor(learning_rate= 0.1, loss='linear', n_estimators= 5)
Ada.fit(X_train,y_train)

AdaBoostRegressor(learning_rate=0.1, n_estimators=5)

In [38]:
pickle.dump(Ada, open('ada_model.pkl', 'wb'))

# Gradient Boosting

In [13]:
from sklearn.ensemble import GradientBoostingRegressor

regressor = GradientBoostingRegressor() 
parameters = {"n_estimators":[20,50,100], "learning_rate":[0.01, 0.1,1,10], 
              "random_state" : [10] ,
             "max_depth":[1,2]}

gs_regression(regressor, parameters)

Fitting 3 folds for each of 24 candidates, totalling 72 fits
[CV 1/3; 1/24] START learning_rate=0.01, max_depth=1, n_estimators=20, random_state=10
[CV 1/3; 1/24] END learning_rate=0.01, max_depth=1, n_estimators=20, random_state=10;, score=-0.490 total time=  16.1s
[CV 2/3; 1/24] START learning_rate=0.01, max_depth=1, n_estimators=20, random_state=10
[CV 2/3; 1/24] END learning_rate=0.01, max_depth=1, n_estimators=20, random_state=10;, score=-0.490 total time=  18.2s
[CV 3/3; 1/24] START learning_rate=0.01, max_depth=1, n_estimators=20, random_state=10
[CV 3/3; 1/24] END learning_rate=0.01, max_depth=1, n_estimators=20, random_state=10;, score=-0.490 total time=  16.1s
[CV 1/3; 2/24] START learning_rate=0.01, max_depth=1, n_estimators=50, random_state=10
[CV 1/3; 2/24] END learning_rate=0.01, max_depth=1, n_estimators=50, random_state=10;, score=-0.479 total time=  38.2s
[CV 2/3; 2/24] START learning_rate=0.01, max_depth=1, n_estimators=50, random_state=10
[CV 2/3; 2/24] END learning_

In [39]:
from sklearn.ensemble import GradientBoostingRegressor

GBR = GradientBoostingRegressor(learning_rate=1, n_estimators=100, max_depth=2)
GBR.fit(X_train,y_train)

GradientBoostingRegressor(learning_rate=1, max_depth=2)

In [40]:
pickle.dump(GBR, open('GBR_model.pkl', 'wb'))

# Tensorflow

In [7]:
import tensorflow as tf
from tensorboard.plugins.hparams import api as hp

In [8]:
fashion_mnist = tf.keras.datasets.fashion_mnist

(x_train, y_train),(x_test, y_test) = fashion_mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

In [9]:
HP_NUM_UNITS = hp.HParam('num_units', hp.Discrete([16, 32]))
HP_DROPOUT = hp.HParam('dropout', hp.RealInterval(0.1, 0.2))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam', 'sgd']))

METRIC_ACCURACY = 'accuracy'

with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
  hp.hparams_config(
    hparams=[HP_NUM_UNITS, HP_DROPOUT, HP_OPTIMIZER],
    metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')],
  )

In [10]:
def train_test_model(hparams):
  model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(hparams[HP_NUM_UNITS], activation=tf.nn.relu),
    tf.keras.layers.Dropout(hparams[HP_DROPOUT]),
    tf.keras.layers.Dense(10, activation=tf.nn.softmax),
  ])
  model.compile(
      optimizer=hparams[HP_OPTIMIZER],
      loss='sparse_categorical_crossentropy',
      metrics=['accuracy'],
  )

  model.fit(x_train, y_train, epochs=1) # Run with 1 epoch to speed things up for demo purposes
  _, accuracy = model.evaluate(x_test, y_test)
  return accuracy

In [11]:
def run(run_dir, hparams):
  with tf.summary.create_file_writer(run_dir).as_default():
    hp.hparams(hparams)  # record the values used in this trial
    accuracy = train_test_model(hparams)
    tf.summary.scalar(METRIC_ACCURACY, accuracy, step=1)

In [12]:
session_num = 0

for num_units in HP_NUM_UNITS.domain.values:
  for dropout_rate in (HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value):
    for optimizer in HP_OPTIMIZER.domain.values:
      hparams = {
          HP_NUM_UNITS: num_units,
          HP_DROPOUT: dropout_rate,
          HP_OPTIMIZER: optimizer,
      }
      run_name = "run-%d" % session_num
      print('--- Starting trial: %s' % run_name)
      print({h.name: hparams[h] for h in hparams})
      run('logs/hparam_tuning/' + run_name, hparams)
      session_num += 1

--- Starting trial: run-0
{'num_units': 16, 'dropout': 0.1, 'optimizer': 'adam'}
--- Starting trial: run-1
{'num_units': 16, 'dropout': 0.1, 'optimizer': 'sgd'}
--- Starting trial: run-2
{'num_units': 16, 'dropout': 0.2, 'optimizer': 'adam'}
--- Starting trial: run-3
{'num_units': 16, 'dropout': 0.2, 'optimizer': 'sgd'}
--- Starting trial: run-4
{'num_units': 32, 'dropout': 0.1, 'optimizer': 'adam'}
--- Starting trial: run-5
{'num_units': 32, 'dropout': 0.1, 'optimizer': 'sgd'}
--- Starting trial: run-6
{'num_units': 32, 'dropout': 0.2, 'optimizer': 'adam'}
--- Starting trial: run-7
{'num_units': 32, 'dropout': 0.2, 'optimizer': 'sgd'}


In [16]:
import tensorflow as tf
import matplotlib.pyplot as plt

model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(20, activation=tf.nn.relu),
    tf.keras.layers.Dense(20, activation=tf.nn.relu),
    tf.keras.layers.Dense(10, activation=tf.nn.relu),
    tf.keras.layers.Dense(20, activation=tf.nn.relu),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1, activation='linear')
])

model.compile(loss='mean_squared_error',
              optimizer=tf.keras.optimizers.SGD(learning_rate=0.01),
              metrics=['accuracy'])

history1 = model.fit(X_train, Y_train, batch_size=20, epochs=15, validation_data=(X_test, Y_test))

plt.plot(history1.history['loss'], marker='o', label='training loss')
plt.plot(history1.history['val_loss'], marker='o', label='validation loss')
plt.xlabel('Number of epochs')
plt.ylabel('Loss')
plt.legend()
plt.ylim(0.2, 0.25)

model.summary()



Epoch 1/15

In [None]:


model.save('TF_model.h5')


In [15]:
model = tf.keras.models.load_model('TF_model.h5')
# Calculate the gradients of the model's output with respect to the input features
with tf.GradientTape() as tape:
    inputs = tf.convert_to_tensor(X_train, dtype=tf.float32)
    tape.watch(inputs)
    outputs = model(inputs)

gradients = tape.gradient(outputs, inputs).numpy()

# Calculate the feature importance as the mean absolute value of the gradients
importances = np.mean(np.abs(gradients), axis=0)

# Sort feature importances in descending order
indices = np.argsort(importances)[::-1]

# Print feature importance rankings
for i, feature_idx in enumerate(indices):
    print(f"{i+1}. Feature {COLUMNNS_FOR_ML[feature_idx]}: {importances[feature_idx]}")

1. Feature conversion: 0.13033786416053772
2. Feature log_revenue: 0.12142834812402725
3. Feature log_avg_price: 0.04703013598918915
4. Feature log_frequency: 0.04637593775987625
5. Feature log_monetary: 0.04583758860826492
6. Feature saint_valentin: 0.04387616738677025
7. Feature innovation: 0.036797888576984406
8. Feature oeko_tex: 0.03383103013038635
9. Feature concept_original: 0.024469507858157158
10. Feature premium: 0.02433004602789879
11. Feature seconde_main: 0.02207506075501442
12. Feature vegan: 0.020178072154521942
13. Feature excellent_sur_yuka: 0.01981588453054428
14. Feature biodegradable: 0.018985005095601082
15. Feature tendance: 0.017507929354906082
16. Feature inclusive: 0.017333930358290672
17. Feature upcycling: 0.01619735360145569
18. Feature exclusivite_choose: 0.016185613349080086
19. Feature naturel: 0.01556459628045559
20. Feature cadeau_ideal: 0.015101165510714054
21. Feature vintage: 0.014050913974642754
22. Feature iconique: 0.013366227969527245
23. Feature