# Surrogate Model
### Training of Random Forest, Gradient Boosting, and Extra Trees Classifier wrapped in Ordinal Classifier Framework 

In [1]:
import os
os.chdir("..")
import pickle
import pandas as pd
import numpy as np
import random
import copy
#import seaborn as sn
#import matplotlib.pyplot as plt
#import statsmodels.api as sm
from sklearn.linear_model import LogisticRegression
#from sklearn.feature_selection import RFE
from sklearn.model_selection import train_test_split, RandomizedSearchCV
#from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier
from src.preprocessing.sm_label_transformer import *
from src.models.ordinal_classifier_scikit import *
#import matplotlib.pyplot as plt
#from matplotlib import pyplot
from pprint import pprint
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import IsolationForest



### 1. Set seeds

In [2]:
# Set seeds in order to reproduce results
random.seed(73)
np.random.seed(73)

### 2. Load data

In [3]:
train_dataset = pd.read_csv("data/surrogate_model/sm_train_23042021.csv")
test_dataset = pd.read_csv("data/surrogate_model/sm_test_23042021.csv") 
train_dataset.head()

Unnamed: 0,an_vec_0,an_vec_1,an_vec_2,an_vec_3,an_vec_4,an_vec_5,an_vec_6,an_vec_7,an_vec_8,an_vec_9,...,rel_width,rel_x_position,rel_y_position,rel_x_position_to_animations,rel_y_position_to_animations,nr_paths_svg,rating_0,rating_1,rating_2,rating_3
0,0,0,0,1,0,0,-1.0,-1.0,-1.0,-1.0,...,0.054752,0.033838,0.04212,0.039501,0.051404,24.0,1,1,1,0
1,0,0,0,0,0,1,-1.0,-1.0,-1.0,-1.0,...,0.395994,0.501511,0.579289,0.714309,0.706974,24.0,1,1,1,0
2,0,0,0,0,1,0,-1.0,-1.0,-1.0,-1.0,...,0.395994,0.501511,0.63794,0.714309,0.778553,24.0,1,1,1,0
3,1,0,0,0,0,0,0.134364,0.847434,-1.0,-1.0,...,0.054752,0.033838,0.04212,0.039501,0.051404,24.0,0,0,0,0
4,0,0,1,0,0,0,-1.0,-1.0,-1.0,0.763775,...,0.395994,0.501511,0.579289,0.714309,0.706974,24.0,1,1,0,0


We need to decode rating labels as orgininal labels are required here.

In [4]:
X_train = train_dataset.iloc[:,:-4]
y_train = train_dataset.iloc[:,-4:]
y_train = pd.Series(decode_classes(y_train.to_numpy()).flatten())

X_test = test_dataset.iloc[:,:-4]
y_test = test_dataset.iloc[:,-4:]
y_test = pd.Series(decode_classes(y_test.to_numpy()).flatten())

In [5]:
X_train.head()

Unnamed: 0,an_vec_0,an_vec_1,an_vec_2,an_vec_3,an_vec_4,an_vec_5,an_vec_6,an_vec_7,an_vec_8,an_vec_9,...,diff_fill_r,diff_fill_g,diff_fill_b,rel_height,rel_width,rel_x_position,rel_y_position,rel_x_position_to_animations,rel_y_position_to_animations,nr_paths_svg
0,0,0,0,1,0,0,-1.0,-1.0,-1.0,-1.0,...,-4.541667,-4.541667,-4.541667,0.084239,0.054752,0.033838,0.04212,0.039501,0.051404,24.0
1,0,0,0,0,0,1,-1.0,-1.0,-1.0,-1.0,...,102.458333,102.458333,102.458333,0.362888,0.395994,0.501511,0.579289,0.714309,0.706974,24.0
2,0,0,0,0,1,0,-1.0,-1.0,-1.0,-1.0,...,102.458333,102.458333,102.458333,0.362904,0.395994,0.501511,0.63794,0.714309,0.778553,24.0
3,1,0,0,0,0,0,0.134364,0.847434,-1.0,-1.0,...,-4.541667,-4.541667,-4.541667,0.084239,0.054752,0.033838,0.04212,0.039501,0.051404,24.0
4,0,0,1,0,0,0,-1.0,-1.0,-1.0,0.763775,...,102.458333,102.458333,102.458333,0.362888,0.395994,0.501511,0.579289,0.714309,0.706974,24.0


In [6]:
y_train.head()

0    3
1    3
2    3
3    0
4    2
dtype: int32

### 3. Upscaling of class 4/"Very Good" (optional, was shown to not improve the performance^)

In [7]:
unique_train, counts_train = np.unique(y_train, return_counts=True)
label_counts = dict(zip(unique_train, counts_train))
label_counts

{0: 2580, 1: 2143, 2: 3752, 3: 1892, 4: 402}

In [8]:
# Get indices where data label equals 4
i_class4 = np.where(y_train == 4)[0]
# Calculate upsample size (mean of class sizes 0-3 - class size 4)
upsample_size = round(np.mean([label_counts[i] for i in range(4)])) - label_counts[4]
# Get upsample indices
i_class4_upsampled = np.random.choice(i_class4, size=upsample_size, replace=True)

In [9]:
# Create upsampled dataframe
y_train = pd.concat([y_train, y_train[i_class4_upsampled]]).reset_index(drop=True)
X_train = pd.concat([X_train, X_train.iloc[i_class4_upsampled,:]]).reset_index(drop=True)

In [10]:
y_train

0       3
1       3
2       3
3       0
4       2
       ..
6234    4
6235    4
6236    4
6237    4
6238    4
Length: 6239, dtype: int64

### 4. Outlier removal (optional)

In [7]:
# Use of Isolation Forest
ifo = IsolationForest(random_state=0).fit(X_train)
X_train[['anomaly']] = ifo.predict(X_train)
X_train.head()

Unnamed: 0,an_vec_0,an_vec_1,an_vec_2,an_vec_3,an_vec_4,an_vec_5,an_vec_6,an_vec_7,an_vec_8,an_vec_9,...,diff_fill_g,diff_fill_b,rel_height,rel_width,rel_x_position,rel_y_position,rel_x_position_to_animations,rel_y_position_to_animations,nr_paths_svg,anomaly
0,0,0,0,1,0,0,-1.0,-1.0,-1.0,-1.0,...,-4.541667,-4.541667,0.084239,0.054752,0.033838,0.04212,0.039501,0.051404,24.0,1
1,0,0,0,0,0,1,-1.0,-1.0,-1.0,-1.0,...,102.458333,102.458333,0.362888,0.395994,0.501511,0.579289,0.714309,0.706974,24.0,1
2,0,0,0,0,1,0,-1.0,-1.0,-1.0,-1.0,...,102.458333,102.458333,0.362904,0.395994,0.501511,0.63794,0.714309,0.778553,24.0,1
3,1,0,0,0,0,0,0.134364,0.847434,-1.0,-1.0,...,-4.541667,-4.541667,0.084239,0.054752,0.033838,0.04212,0.039501,0.051404,24.0,-1
4,0,0,1,0,0,0,-1.0,-1.0,-1.0,0.763775,...,102.458333,102.458333,0.362888,0.395994,0.501511,0.579289,0.714309,0.706974,24.0,1


In [8]:
# remove outliers
out_ind = X_train[X_train['anomaly']==-1].index
X_train.drop(out_ind, inplace=True, axis=0)
y_train.drop(out_ind, inplace=True, axis=0)
X_train.reset_index(drop=True, inplace=True)
y_train.reset_index(drop=True, inplace=True)
X_train.drop('anomaly', axis=1, inplace=True)

### 5. Train models

#### 5.1 Random Forest

##### Define Grid for Random Search

In [8]:
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start=20, stop=2000, num=10)]
# Number of features to consider at every split
max_features = ['auto', 'log2']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 110, num=11)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]
# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}
pprint(random_grid)

{'bootstrap': [True, False],
 'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None],
 'max_features': ['auto', 'log2'],
 'min_samples_leaf': [1, 2, 4],
 'min_samples_split': [2, 5, 10],
 'n_estimators': [20, 240, 460, 680, 900, 1120, 1340, 1560, 1780, 2000]}


##### Perform Random Search

In [9]:
# Use the random grid to search for best hyperparameters
# Define stratified cross validation
cross_val = StratifiedKFold(n_splits=5)
# First create the base model to tune
rf = RandomForestOC()
# Random search of parameters, using 3 fold cross validation, search across 100 different combinations
rf_random = RandomizedSearchCV(estimator=rf, param_distributions=random_grid, n_iter=1, cv=cross_val, verbose=2, random_state=42, scoring='neg_mean_absolute_error')
# Fit the random search model
rf_random.fit(X_train, y_train)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV] END bootstrap=True, max_depth=50, max_features=log2, min_samples_leaf=2, min_samples_split=10, n_estimators=20; total time=   2.0s
[CV] END bootstrap=True, max_depth=50, max_features=log2, min_samples_leaf=2, min_samples_split=10, n_estimators=20; total time=   1.9s
[CV] END bootstrap=True, max_depth=50, max_features=log2, min_samples_leaf=2, min_samples_split=10, n_estimators=20; total time=   2.0s
[CV] END bootstrap=True, max_depth=50, max_features=log2, min_samples_leaf=2, min_samples_split=10, n_estimators=20; total time=   1.9s
[CV] END bootstrap=True, max_depth=50, max_features=log2, min_samples_leaf=2, min_samples_split=10, n_estimators=20; total time=   2.6s


RandomizedSearchCV(cv=StratifiedKFold(n_splits=5, random_state=None, shuffle=False),
                   estimator=<src.models.ordinal_classifier_scikit.RandomForestOC object at 0x000001CDEAEA86C8>,
                   n_iter=1,
                   param_distributions={'bootstrap': [True, False],
                                        'max_depth': [10, 20, 30, 40, 50, 60,
                                                      70, 80, 90, 100, 110,
                                                      None],
                                        'max_features': ['auto', 'log2'],
                                        'min_samples_leaf': [1, 2, 4],
                                        'min_samples_split': [2, 5, 10],
                                        'n_estimators': [20, 240, 460, 680, 900,
                                                         1120, 1340, 1560, 1780,
                                                         2000]},
                   random_state=42, scoring='

##### Get best parameters and best evaluation score

In [10]:
rf_random.best_params_

{'n_estimators': 20,
 'min_samples_split': 10,
 'min_samples_leaf': 2,
 'max_features': 'log2',
 'max_depth': 50,
 'bootstrap': True}

In [11]:
rf_random.best_score_

-0.8123291074060035

##### Train best model on whole training data

In [12]:
rf_best = rf_random.best_estimator_
rf_best.fit(X_train, y_train)

##### Evaluate best model on test data

In [13]:
y_pred_test = rf_best.predict(X_test)
y_pred_train = rf_best.predict(X_train)

In [14]:
print(f'Label MAE of best random forest classifier on train set: {mean_absolute_error(y_pred_train, y_train)}')
print(f'Label MAE of best random forest classifier on test set: {mean_absolute_error(y_pred_test, y_test)}')

Label MAE of best random forest classifier on train set: 0.41248026743430216
Label MAE of best random forest classifier on test set: 0.787363304981774


In [15]:
print(f'Accuracy of  best random forest classifier on train set: {accuracy_score(y_pred_train, y_train)}')
print(f'Accuracy of best random forest classifier on test set: {accuracy_score(y_pred_test, y_test)}')

Accuracy of  best random forest classifier on train set: 0.7060079858854118
Accuracy of best random forest classifier on test set: 0.44552450384771164


In [16]:
print(classification_report(y_test, y_pred_test))

              precision    recall  f1-score   support

           0       0.52      0.75      0.61       498
           1       0.19      0.13      0.15       414
           2       0.47      0.69      0.56       918
           3       0.31      0.07      0.11       532
           4       0.00      0.00      0.00       107

    accuracy                           0.45      2469
   macro avg       0.30      0.33      0.29      2469
weighted avg       0.38      0.45      0.38      2469



In [17]:
print(confusion_matrix(y_test, y_pred_test))

[[374  36  82   4   2]
 [149  54 193  18   0]
 [120 115 637  46   0]
 [ 67  64 366  35   0]
 [ 16  15  65  11   0]]


##### Save best model

In [18]:
filename = 'models/sm_random_forest.sav'
pickle.dump(rf_best, open(filename, 'wb'))

#### 5.2 Gradient Boosting Classifier

##### Define Grid for Random Search

In [19]:
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]

# Boosting learning rate
learning_rate = [0.05, 0.10, 0.15, 0.20, 0.25, 0.30]

# Maximum number of levels in tree
max_depth = range(5,16,2)

min_samples_split = range(200,1401,200)

min_samples_leaf = range(30,71,10)

max_features = range(7,20,2)

subsample = [0.6,0.7,0.75,0.8,0.85,0.9]


random_grid = {'n_estimators': n_estimators,
               'learning_rate': learning_rate,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'max_features': max_features,
               'subsample': subsample}
pprint(random_grid)

{'learning_rate': [0.05, 0.1, 0.15, 0.2, 0.25, 0.3],
 'max_depth': range(5, 16, 2),
 'max_features': range(7, 20, 2),
 'min_samples_leaf': range(30, 71, 10),
 'min_samples_split': range(200, 1401, 200),
 'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000],
 'subsample': [0.6, 0.7, 0.75, 0.8, 0.85, 0.9]}


##### Perform Random Search

In [20]:
# Use the random grid to search for best hyperparameters
# Define stratified cross validation
cross_val = StratifiedKFold(n_splits=5)
# First create the base model to tune
gb = GradientBoostingOC()
# Random search of parameters, using 3 fold cross validation, search across 100 different combinations
gb_random = RandomizedSearchCV(estimator=gb, param_distributions=random_grid, n_iter=1, cv=cross_val, verbose=2, random_state=42, scoring = 'neg_mean_absolute_error')
# Fit the random search model
gb_random.fit(X_train, y_train)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV] END learning_rate=0.1, max_depth=9, max_features=11, min_samples_leaf=30, min_samples_split=600, n_estimators=1400, subsample=0.75; total time= 1.8min
[CV] END learning_rate=0.1, max_depth=9, max_features=11, min_samples_leaf=30, min_samples_split=600, n_estimators=1400, subsample=0.75; total time= 1.9min
[CV] END learning_rate=0.1, max_depth=9, max_features=11, min_samples_leaf=30, min_samples_split=600, n_estimators=1400, subsample=0.75; total time= 2.2min
[CV] END learning_rate=0.1, max_depth=9, max_features=11, min_samples_leaf=30, min_samples_split=600, n_estimators=1400, subsample=0.75; total time= 2.9min
[CV] END learning_rate=0.1, max_depth=9, max_features=11, min_samples_leaf=30, min_samples_split=600, n_estimators=1400, subsample=0.75; total time= 2.8min


RandomizedSearchCV(cv=StratifiedKFold(n_splits=5, random_state=None, shuffle=False),
                   estimator=<src.models.ordinal_classifier_scikit.GradientBoostingOC object at 0x000001CDEB1BD888>,
                   n_iter=1,
                   param_distributions={'learning_rate': [0.05, 0.1, 0.15, 0.2,
                                                          0.25, 0.3],
                                        'max_depth': range(5, 16, 2),
                                        'max_features': range(7, 20, 2),
                                        'min_samples_leaf': range(30, 71, 10),
                                        'min_samples_split': range(200, 1401, 200),
                                        'n_estimators': [200, 400, 600, 800,
                                                         1000, 1200, 1400, 1600,
                                                         1800, 2000],
                                        'subsample': [0.6, 0.7, 0.75, 0.8, 0.85,
    

##### Get best parameters and best evaluation score

In [21]:
gb_random.best_params_

{'subsample': 0.75,
 'n_estimators': 1400,
 'min_samples_split': 600,
 'min_samples_leaf': 30,
 'max_features': 11,
 'max_depth': 9,
 'learning_rate': 0.1}

In [22]:
gb_random.best_score_

-0.82913453232539

##### Train best model on whole training data

In [23]:
gb_best = gb_random.best_estimator_
gb_best.fit(X_train, y_train)

##### Evaluate best model on test data

In [24]:
y_pred_test = gb_best.predict(X_test)
y_pred_train = gb_best.predict(X_train)

In [25]:
print(f'Label MAE of best gradient boosting classifier on train set: {mean_absolute_error(y_pred_train, y_train)}')
print(f'Label MAE of best gradient boosting classifier on test set: {mean_absolute_error(y_pred_test, y_test)}')

Label MAE of best gradient boosting classifier on train set: 0.22091187668307177
Label MAE of best gradient boosting classifier on test set: 0.7825030376670717


In [26]:
print(f'Accuracy of  best gradient boosting classifier on train set: {accuracy_score(y_pred_train, y_train)}')
print(f'Accuracy of best gradient boosting classifier on test set: {accuracy_score(y_pred_test, y_test)}')

Accuracy of  best gradient boosting classifier on train set: 0.8281177453802582
Accuracy of best gradient boosting classifier on test set: 0.43458890238963144


In [27]:
print(classification_report(y_test, y_pred_test))

              precision    recall  f1-score   support

           0       0.57      0.62      0.59       498
           1       0.22      0.30      0.25       414
           2       0.50      0.59      0.54       918
           3       0.38      0.18      0.25       532
           4       0.00      0.00      0.00       107

    accuracy                           0.43      2469
   macro avg       0.33      0.34      0.33      2469
weighted avg       0.42      0.43      0.42      2469



In [28]:
print(confusion_matrix(y_test, y_pred_test))

[[308 110  64  15   1]
 [103 126 149  36   0]
 [ 82 201 541  91   3]
 [ 39 117 276  98   2]
 [  6  23  59  19   0]]


##### Save best model

In [29]:
filename = 'models/sm_gradient_boosting.sav'
pickle.dump(gb_best, open(filename, 'wb'))

#### 5.3 Extra Trees Classifier

##### Define Grid for Random Search

In [30]:
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start=20, stop=2000, num=10)]
# Number of features to consider at every split
max_features = ['auto', 'log2']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 110, num=11)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]
# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}
pprint(random_grid)

{'bootstrap': [True, False],
 'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None],
 'max_features': ['auto', 'log2'],
 'min_samples_leaf': [1, 2, 4],
 'min_samples_split': [2, 5, 10],
 'n_estimators': [20, 240, 460, 680, 900, 1120, 1340, 1560, 1780, 2000]}


##### Perform Random Search

In [31]:
# Use the random grid to search for best hyperparameters
# Define stratified cross validation
cross_val = StratifiedKFold(n_splits=5)
# First create the base model to tune
et = ExtraTreesOC()
# Random search of parameters, using 3 fold cross validation, search across 100 different combinations
et_random = RandomizedSearchCV(estimator = et, param_distributions = random_grid, n_iter = 1, cv = cross_val, verbose=2, random_state=42, scoring = 'neg_mean_absolute_error')
# Fit the random search model
et_random.fit(X_train, y_train)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV] END bootstrap=True, max_depth=50, max_features=log2, min_samples_leaf=2, min_samples_split=10, n_estimators=20; total time=   0.7s
[CV] END bootstrap=True, max_depth=50, max_features=log2, min_samples_leaf=2, min_samples_split=10, n_estimators=20; total time=   0.7s
[CV] END bootstrap=True, max_depth=50, max_features=log2, min_samples_leaf=2, min_samples_split=10, n_estimators=20; total time=   0.8s
[CV] END bootstrap=True, max_depth=50, max_features=log2, min_samples_leaf=2, min_samples_split=10, n_estimators=20; total time=   0.8s
[CV] END bootstrap=True, max_depth=50, max_features=log2, min_samples_leaf=2, min_samples_split=10, n_estimators=20; total time=   0.7s


RandomizedSearchCV(cv=StratifiedKFold(n_splits=5, random_state=None, shuffle=False),
                   estimator=<src.models.ordinal_classifier_scikit.ExtraTreesOC object at 0x000001CDEC4A7208>,
                   n_iter=1,
                   param_distributions={'bootstrap': [True, False],
                                        'max_depth': [10, 20, 30, 40, 50, 60,
                                                      70, 80, 90, 100, 110,
                                                      None],
                                        'max_features': ['auto', 'log2'],
                                        'min_samples_leaf': [1, 2, 4],
                                        'min_samples_split': [2, 5, 10],
                                        'n_estimators': [20, 240, 460, 680, 900,
                                                         1120, 1340, 1560, 1780,
                                                         2000]},
                   random_state=42, scoring='ne

##### Get best parameters and best evaluation score

In [32]:
et_random.best_params_

{'n_estimators': 20,
 'min_samples_split': 10,
 'min_samples_leaf': 2,
 'max_features': 'log2',
 'max_depth': 50,
 'bootstrap': True}

In [33]:
et_random.best_score_

-0.7962629071050694

##### Train best model on whole training data

In [34]:
et_best = et_random.best_estimator_
et_best.fit(X_train, y_train)

##### Evaluate best model on test data

In [35]:
y_pred_test = et_best.predict(X_test)
y_pred_train = et_best.predict(X_train)

In [36]:
print(f'Label MAE of best extra trees classifier on train set: {mean_absolute_error(y_pred_train, y_train)}')
print(f'Label MAE of best extra trees classifier on test set: {mean_absolute_error(y_pred_test, y_test)}')

Label MAE of best extra trees classifier on train set: 0.505896554926177
Label MAE of best extra trees classifier on test set: 0.7586067233697853


In [37]:
print(f'Accuracy of  best extra trees classifier on train set: {accuracy_score(y_pred_train, y_train)}')
print(f'Accuracy of best extra trees classifier on test set: {accuracy_score(y_pred_test, y_test)}')

Accuracy of  best extra trees classifier on train set: 0.6466710000928592
Accuracy of best extra trees classifier on test set: 0.46132037262049413


In [38]:
print(classification_report(y_test, y_pred_test))

              precision    recall  f1-score   support

           0       0.52      0.75      0.61       498
           1       0.23      0.11      0.15       414
           2       0.48      0.76      0.59       918
           3       0.24      0.04      0.06       532
           4       0.00      0.00      0.00       107

    accuracy                           0.46      2469
   macro avg       0.29      0.33      0.28      2469
weighted avg       0.37      0.46      0.38      2469



In [39]:
print(confusion_matrix(y_test, y_pred_test))

[[374  35  84   5   0]
 [150  46 206  12   0]
 [115  63 699  41   0]
 [ 68  46 398  20   0]
 [ 17   6  77   7   0]]


##### Save best model

In [40]:
filename = 'models/sm_extra_trees.sav'
pickle.dump(et_best, open(filename, 'wb'))