In [1]:
# 绘图案例 an example of matplotlib
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from scipy.special import jn
from IPython.display import display, clear_output
import time

In [2]:
#将kilometer当做类别变量处理试试,异常值用groupby处理,'匿名特征可以进一步处理一下'
## 基础工具
import numpy as np
import pandas as pd
import warnings
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.special import jn
from IPython.display import display, clear_output
import time
from tqdm import tqdm
import itertools

warnings.filterwarnings('ignore')
%matplotlib inline

## 模型预测的
from sklearn import linear_model
from sklearn import preprocessing
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor

## 数据降维处理的
from sklearn.decomposition import PCA,FastICA,FactorAnalysis,SparsePCA

import lightgbm as lgb
import xgboost as xgb

## 参数搜索和评价的
from sklearn.model_selection import GridSearchCV,cross_val_score,StratifiedKFold,train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error

import scipy.signal as signal

## Predefined Functions

In [3]:
#处理异常值
def smooth_cols(group,out_value,kind):
    cols = ['power']
    if kind == 'g':
        for col in cols:
            yes_no = (group[col]<out_value).astype('int')
            new = yes_no * group[col]
            group[col] = new.replace(0,group[col].quantile(q=0.995))
        return group
    if kind == 'l':
        for col in cols:
            yes_no = (group[col]>out_value).astype('int')
            new = yes_no * group[col]
            group[col] = new.replace(0,group[col].quantile(q=0.07))
        return group   
    
def date_proc(x):
    m = int(x[4:6])
    if m == 0:
        m = 1
    return x[:4] + '-' + str(m) + '-' + x[6:]

#定义日期提取函数
def date_tran(df,fea_col):
    for f in tqdm(fea_col):
        df[f] = pd.to_datetime(df[f].astype('str').apply(date_proc))
        df[f + '_year'] = df[f].dt.year
        df[f + '_month'] = df[f].dt.month
        df[f + '_day'] = df[f].dt.day
        df[f + '_dayofweek'] = df[f].dt.dayofweek
    return (df)

#分桶操作
def cut_group(df,cols,num_bins=50):
    for col in cols:
        all_range = int(df[col].max()-df[col].min())
        bin = [i*all_range/num_bins for i in range(all_range)]
        df[col+'_bin'] = pd.cut(df[col], bin, labels=False)
    return df

### count编码
def count_coding(df,fea_col):
    for f in fea_col:
        df[f + '_count'] = df[f].map(df[f].value_counts())
    return(df)

#定义交叉特征统计
def cross_cat_num(df,num_col,cat_col):
    for f1 in tqdm(cat_col):
        g = df.groupby(f1, as_index=False)
        for f2 in tqdm(num_col):
            feat = g[f2].agg({
                '{}_{}_max'.format(f1, f2): 'max', '{}_{}_min'.format(f1, f2): 'min',
                '{}_{}_median'.format(f1, f2): 'median',
            })
            df = df.merge(feat, on=f1, how='left')
    return(df)

### 类别特征的二阶交叉
from scipy.stats import entropy
def cross_qua_cat_num(df):
    for f_pair in tqdm([
        ['model', 'brand'], ['model', 'regionCode'], ['brand', 'regionCode']
    ]):
        ### 共现次数
        df['_'.join(f_pair) + '_count'] = df.groupby(f_pair)['SaleID'].transform('count')
        ### n unique、熵
        df = df.merge(df.groupby(f_pair[0], as_index=False)[f_pair[1]].agg({
            '{}_{}_nunique'.format(f_pair[0], f_pair[1]): 'nunique',
            '{}_{}_ent'.format(f_pair[0], f_pair[1]): lambda x: entropy(x.value_counts() / x.shape[0])
        }), on=f_pair[0], how='left')
        df = df.merge(df.groupby(f_pair[1], as_index=False)[f_pair[0]].agg({
            '{}_{}_nunique'.format(f_pair[1], f_pair[0]): 'nunique',
            '{}_{}_ent'.format(f_pair[1], f_pair[0]): lambda x: entropy(x.value_counts() / x.shape[0])
        }), on=f_pair[1], how='left')
        ### 比例偏好
        df['{}_in_{}_prop'.format(f_pair[0], f_pair[1])] = df['_'.join(f_pair) + '_count'] / df[f_pair[1] + '_count']
        df['{}_in_{}_prop'.format(f_pair[1], f_pair[0])] = df['_'.join(f_pair) + '_count'] / df[f_pair[0] + '_count']
    return (df)

def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """
    start_mem = df.memory_usage().sum() 
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() 
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    return df

## Read Data

In [4]:
## 通过Pandas对于数据进行读取 (pandas是一个很友好的数据读取函数库)
Train_data = reduce_mem_usage(pd.read_csv('./data/used_car_train_20200313.csv', sep=' '))
TestA_data = reduce_mem_usage(pd.read_csv('./data/used_car_testB_20200421.csv', sep=' '))

#Train_data = Train_data[Train_data['price']>100]
#Train_data['price'] = np.log1p(Train_data['price'])
## 输出数据的大小信息
print('Train data shape:',Train_data.shape)
print('TestA data shape:',TestA_data.shape)


#合并数据集
concat_data = pd.concat([Train_data,TestA_data])
concat_data['notRepairedDamage'] = concat_data['notRepairedDamage'].replace('-',0).astype('float16')
concat_data = concat_data.fillna(concat_data.mode().iloc[0,:])
print('concat_data shape:',concat_data.shape)

Memory usage of dataframe is 37200128.00 MB
Memory usage after optimization is: 10200260.00 MB
Decreased by 72.6%
Memory usage of dataframe is 12000128.00 MB
Memory usage after optimization is: 3200260.00 MB
Decreased by 73.3%
Train data shape: (150000, 31)
TestA data shape: (50000, 30)
concat_data shape: (200000, 31)


In [5]:
#截断异常值
concat_data['power'][concat_data['power']>600] = 600
concat_data['power'][concat_data['power']<1] = 1

concat_data['v_13'][concat_data['v_13']>6] = 6
concat_data['v_14'][concat_data['v_14']>4] = 4

In [6]:
for i in ['v_' +str(i) for i in range(14)]:
    for j in ['v_' +str(i) for i in range(14)]:
        concat_data[str(i)+'+'+str(j)] = concat_data[str(i)]+concat_data[str(j)]
for i in ['model','brand', 'bodyType', 'fuelType','gearbox', 'power', 'kilometer', 'notRepairedDamage', 'regionCode']:
    for j in ['v_' +str(i) for i in range(14)]:
        concat_data[str(i)+'*'+str(j)] = concat_data[i]*concat_data[j]    
concat_data

Unnamed: 0,SaleID,name,regDate,model,brand,bodyType,fuelType,gearbox,power,kilometer,...,regionCode*v_4,regionCode*v_5,regionCode*v_6,regionCode*v_7,regionCode*v_8,regionCode*v_9,regionCode*v_10,regionCode*v_11,regionCode*v_12,regionCode*v_13
0,0,736,20040402,30.0,6,1.0,0.0,0.0,60,12.5,...,1196.158203,246.560791,106.681274,135.474365,23.861237,101.956909,-3013.378906,2933.703125,-2531.238281,831.999023
1,1,2262,20030301,40.0,1,2.0,0.0,0.0,1,15.0,...,-6207.906250,1156.520996,528.428833,592.650391,116.118439,89.870209,-21403.632812,9149.839844,-4498.173828,-7521.117188
2,2,14874,20040403,115.0,15,1.0,0.0,0.0,163,12.5,...,-2797.779297,705.610352,322.491333,463.442139,174.433044,75.955872,-13602.523438,5061.212891,4392.595703,-2336.049805
3,3,71865,19960908,109.0,10,0.0,0.0,1.0,193,15.0,...,-967.175781,119.095703,47.866089,52.925537,14.489624,0.000000,-1956.390625,558.181641,-217.847656,-1057.875000
4,4,111080,20120103,110.0,5,1.0,0.0,0.0,68,5.0,...,15670.996094,1590.946777,510.584900,640.892639,549.762390,847.851990,-13231.771484,6353.566895,6496.649902,19772.708984
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49995,249995,111443,20041005,4.0,4,0.0,0.0,1.0,150,15.0,...,-4330.574219,1467.070312,1.625037,789.229492,425.178711,218.532349,11530.085938,-14083.875000,9552.257812,-5917.183594
49996,249996,152834,20130409,65.0,1,0.0,0.0,0.0,179,4.0,...,1865.742188,1333.037109,5.172329,813.713379,565.839844,354.287109,7090.839844,-17179.101562,22286.953125,733.425293
49997,249997,132531,20041211,4.0,4,0.0,0.0,1.0,147,12.5,...,-2607.209473,997.855225,1.206096,538.304443,273.089905,163.066406,8220.029297,-9176.191406,5203.300781,-4072.954102
49998,249998,143405,20020702,40.0,1,4.0,0.0,1.0,176,15.0,...,-112.528320,17.215820,0.001407,4.117798,4.117798,0.549164,123.787109,-179.306641,34.699707,-104.784180


In [7]:
#提取日期信息
date_cols = ['regDate', 'creatDate']
concat_data = date_tran(concat_data,date_cols)

100%|██████████| 2/2 [00:01<00:00,  1.27it/s]


In [8]:
data = concat_data.copy()

#count编码
count_list = ['regDate', 'creatDate', 'model', 'brand', 'regionCode','bodyType','fuelType','name','regDate_year', 'regDate_month', 'regDate_day', 'regDate_dayofweek' , 'creatDate_month','creatDate_day', 'creatDate_dayofweek','kilometer']
       
data = count_coding(data,count_list)

In [9]:
# 特征构造
# 使用时间：data['creatDate'] - data['regDate']，反应汽车使用时间，一般来说价格与使用时间成反比
# 不过要注意，数据里有时间出错的格式，所以我们需要 errors='coerce'
data['used_time1'] = (pd.to_datetime(data['creatDate'], format='%Y%m%d', errors='coerce') - 
                            pd.to_datetime(data['regDate'], format='%Y%m%d', errors='coerce')).dt.days
data['used_time2'] = (pd.datetime.now() - pd.to_datetime(data['regDate'], format='%Y%m%d', errors='coerce')).dt.days                        
data['used_time3'] = (pd.datetime.now() - pd.to_datetime(data['creatDate'], format='%Y%m%d', errors='coerce') ).dt.days

#分桶操作
cut_cols = ['power']+['used_time1','used_time2','used_time3']
data = cut_group(data,cut_cols,50)

In [10]:
### 用数值特征对类别特征做统计刻画，随便挑了几个跟price相关性最高的匿名特征
cross_cat = ['model', 'brand','regDate_year']
cross_num = ['v_0','v_3', 'v_4', 'v_8', 'v_12','power']
data = cross_cat_num(data,cross_num,cross_cat)#一阶交叉
data.head()
#data = cross_qua_cat_num(data)#二阶交叉

  0%|          | 0/3 [00:00<?, ?it/s]
  0%|          | 0/6 [00:00<?, ?it/s][A
 17%|█▋        | 1/6 [00:00<00:04,  1.08it/s][A
 33%|███▎      | 2/6 [00:01<00:03,  1.02it/s][A
 50%|█████     | 3/6 [00:02<00:02,  1.19it/s][A
 67%|██████▋   | 4/6 [00:03<00:01,  1.28it/s][A
 83%|████████▎ | 5/6 [00:04<00:00,  1.30it/s][A
100%|██████████| 6/6 [00:04<00:00,  1.22it/s][A
 33%|███▎      | 1/3 [00:04<00:09,  4.92s/it]
  0%|          | 0/6 [00:00<?, ?it/s][A
 17%|█▋        | 1/6 [00:00<00:02,  1.94it/s][A
 33%|███▎      | 2/6 [00:01<00:02,  1.61it/s][A
 50%|█████     | 3/6 [00:01<00:01,  1.55it/s][A
 67%|██████▋   | 4/6 [00:02<00:01,  1.56it/s][A
 83%|████████▎ | 5/6 [00:03<00:00,  1.55it/s][A
100%|██████████| 6/6 [00:03<00:00,  1.58it/s][A
 67%|██████▋   | 2/3 [00:08<00:04,  4.27s/it]
  0%|          | 0/6 [00:00<?, ?it/s][A
 17%|█▋        | 1/6 [00:00<00:02,  1.79it/s][A
 33%|███▎      | 2/6 [00:01<00:02,  1.63it/s][A
 50%|█████     | 3/6 [00:01<00:01,  1.56it/s][A
 67%|██████▋

Unnamed: 0,SaleID,name,regDate,model,brand,bodyType,fuelType,gearbox,power,kilometer,...,regDate_year_v_4_median,regDate_year_v_8_max,regDate_year_v_8_min,regDate_year_v_8_median,regDate_year_v_12_max,regDate_year_v_12_min,regDate_year_v_12_median,regDate_year_power_max,regDate_year_power_min,regDate_year_power_median
0,0,736,2004-04-02,30.0,6,1.0,0.0,0.0,60,12.5,...,-0.005676,0.128052,0.0,0.060364,9.820312,-5.84375,-0.032684,600,1,116.0
1,1,2262,2003-03-01,40.0,1,2.0,0.0,0.0,1,15.0,...,0.046509,0.117371,0.0,0.054596,8.46875,-7.988281,-0.62793,600,1,110.0
2,2,14874,2004-04-03,115.0,15,1.0,0.0,0.0,163,12.5,...,-0.005676,0.128052,0.0,0.060364,9.820312,-5.84375,-0.032684,600,1,116.0
3,3,71865,1996-09-08,109.0,10,0.0,0.0,1.0,193,15.0,...,-0.537598,0.11438,0.0,0.025009,6.75,-9.640625,-2.695312,600,1,90.0
4,4,111080,2012-01-03,110.0,5,1.0,0.0,0.0,68,5.0,...,0.357178,0.157471,0.0,0.09729,13.382812,-6.367188,2.882812,600,1,136.0


In [11]:
## 选择特征列
numerical_cols = data.columns

cat_fea = ['SaleID','offerType','seller']
feature_cols = [col for col in numerical_cols if col not in cat_fea]
feature_cols = [col for col in feature_cols if col not in ['price']]

## 提前特征列，标签列构造训练样本和测试样本
X_data = data.iloc[:len(Train_data),:][feature_cols]
Y_data = Train_data['price']
X_test  = data.iloc[len(Train_data):,:][feature_cols]

feature_cols

['name',
 'regDate',
 'model',
 'brand',
 'bodyType',
 'fuelType',
 'gearbox',
 'power',
 'kilometer',
 'notRepairedDamage',
 'regionCode',
 'creatDate',
 'v_0',
 'v_1',
 'v_2',
 'v_3',
 'v_4',
 'v_5',
 'v_6',
 'v_7',
 'v_8',
 'v_9',
 'v_10',
 'v_11',
 'v_12',
 'v_13',
 'v_14',
 'v_0+v_0',
 'v_0+v_1',
 'v_0+v_2',
 'v_0+v_3',
 'v_0+v_4',
 'v_0+v_5',
 'v_0+v_6',
 'v_0+v_7',
 'v_0+v_8',
 'v_0+v_9',
 'v_0+v_10',
 'v_0+v_11',
 'v_0+v_12',
 'v_0+v_13',
 'v_1+v_0',
 'v_1+v_1',
 'v_1+v_2',
 'v_1+v_3',
 'v_1+v_4',
 'v_1+v_5',
 'v_1+v_6',
 'v_1+v_7',
 'v_1+v_8',
 'v_1+v_9',
 'v_1+v_10',
 'v_1+v_11',
 'v_1+v_12',
 'v_1+v_13',
 'v_2+v_0',
 'v_2+v_1',
 'v_2+v_2',
 'v_2+v_3',
 'v_2+v_4',
 'v_2+v_5',
 'v_2+v_6',
 'v_2+v_7',
 'v_2+v_8',
 'v_2+v_9',
 'v_2+v_10',
 'v_2+v_11',
 'v_2+v_12',
 'v_2+v_13',
 'v_3+v_0',
 'v_3+v_1',
 'v_3+v_2',
 'v_3+v_3',
 'v_3+v_4',
 'v_3+v_5',
 'v_3+v_6',
 'v_3+v_7',
 'v_3+v_8',
 'v_3+v_9',
 'v_3+v_10',
 'v_3+v_11',
 'v_3+v_12',
 'v_3+v_13',
 'v_4+v_0',
 'v_4+v_1',
 'v_4+v_2

In [12]:
from meanencoder import MeanEncoder

In [13]:
class_list = ['model','brand','name','regionCode']+date_cols
MeanEnocodeFeature = class_list#声明需要平均数编码的特征
ME = MeanEncoder(MeanEnocodeFeature,target_type='regression') #声明平均数编码的类
X_data = ME.fit_transform(X_data,Y_data)#对训练数据集的X和y进行拟合
#x_train_fav = ME.fit_transform(x_train,y_train_fav)#对训练数据集的X和y进行拟合
X_test = ME.transform(X_test)#对测试集进行编码

In [14]:
X_data['price'] = Train_data['price']

In [15]:
from sklearn.model_selection import KFold

### target encoding目标编码，回归场景相对来说做目标编码的选择更多，不仅可以做均值编码，还可以做标准差编码、中位数编码等
enc_cols = []
stats_default_dict = {
    'max': X_data['price'].max(),
    'min': X_data['price'].min(),
    'median': X_data['price'].median(),
    'mean': X_data['price'].mean(),
    'sum': X_data['price'].sum(),
    'std': X_data['price'].std(),
    'skew': X_data['price'].skew(),
    'kurt': X_data['price'].kurt(),
    'mad': X_data['price'].mad()
}
### 暂且选择这三种编码
enc_stats = ['max','min','mean']
skf = KFold(n_splits=10, shuffle=True, random_state=42)
for f in tqdm(['regionCode','brand','regDate_year','creatDate_year','kilometer','model']):
    enc_dict = {}
    for stat in enc_stats:
        enc_dict['{}_target_{}'.format(f, stat)] = stat
        X_data['{}_target_{}'.format(f, stat)] = 0
        X_test['{}_target_{}'.format(f, stat)] = 0
        enc_cols.append('{}_target_{}'.format(f, stat))
    for i, (trn_idx, val_idx) in enumerate(skf.split(X_data, Y_data)):
        trn_x, val_x = X_data.iloc[trn_idx].reset_index(drop=True), X_data.iloc[val_idx].reset_index(drop=True)
        enc_df = trn_x.groupby(f, as_index=False)['price'].agg(enc_dict)
        val_x = val_x[[f]].merge(enc_df, on=f, how='left')
        test_x = X_test[[f]].merge(enc_df, on=f, how='left')
        for stat in enc_stats:
            val_x['{}_target_{}'.format(f, stat)] = val_x['{}_target_{}'.format(f, stat)].fillna(stats_default_dict[stat])
            test_x['{}_target_{}'.format(f, stat)] = test_x['{}_target_{}'.format(f, stat)].fillna(stats_default_dict[stat])
            X_data.loc[val_idx, '{}_target_{}'.format(f, stat)] = val_x['{}_target_{}'.format(f, stat)].values 
            X_test['{}_target_{}'.format(f, stat)] += test_x['{}_target_{}'.format(f, stat)].values / skf.n_splits

X_data.shape

100%|██████████| 6/6 [00:37<00:00,  6.26s/it]


(150000, 459)

In [16]:
drop_list = ['regDate', 'creatDate','brand_power_min', 'regDate_year_power_min']
x_train = X_data.drop(drop_list+['price'],axis=1)
x_test = X_test.drop(drop_list,axis=1)
x_train.shape

(150000, 454)

In [17]:
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

In [18]:
from sklearn.preprocessing import MinMaxScaler
#特征归一化
min_max_scaler = MinMaxScaler()
min_max_scaler.fit(pd.concat([x_train,x_test]).values)
all_data = min_max_scaler.transform(pd.concat([x_train,x_test]).values)

In [19]:
from sklearn import decomposition
pca = decomposition.PCA(n_components=146)
all_pca = pca.fit_transform(all_data)
X_pca = all_pca[:len(x_train)]
test = all_pca[len(x_train):]
y = Train_data['price'].values

In [20]:
from model import NN_model
from evaluation import Metric
from keras.callbacks import EarlyStopping
from keras.callbacks import LearningRateScheduler

In [21]:
import keras.backend as K
def scheduler(epoch):
    # 每隔100个epoch，学习率减小为原来的1/10
    if epoch % 20 == 0 and epoch != 0:
        lr = K.get_value(model.optimizer.lr)
        K.set_value(model.optimizer.lr, lr * 0.6)
        print("lr changed to {}".format(lr * 0.6))
    return K.get_value(model.optimizer.lr)

In [22]:
import tensorflow as tf
print(tf.__version__)
from tensorflow.keras.optimizers import Adam

2.6.0


In [None]:
n_splits = 6
kf = KFold(n_splits=n_splits, shuffle=True)

import keras 

b_size = 2000
max_epochs = 145
oof_pred = np.zeros((len(X_pca), ))

sub = pd.read_csv('./data/used_car_testB_20200421.csv',sep = ' ')[['SaleID']].copy()
sub['price'] = 0
reduce_lr = LearningRateScheduler(scheduler)

avg_mae = 0
for fold, (trn_idx, val_idx) in enumerate(kf.split(X_pca, y)):
    print('fold:', fold)
    X_train, y_train = X_pca[trn_idx], y[trn_idx]
    X_val, y_val = X_pca[val_idx], y[val_idx]
    
    model = NN_model(X_train.shape[1])
    simple_adam = Adam(lr = 0.015)
    model.compile(loss='mae', optimizer=simple_adam,metrics=['mae'])
    es = EarlyStopping(monitor='val_score', patience=10, verbose=2, mode='min', restore_best_weights=True,)
    es.set_model(model)
    metric = Metric(model, [es], [(X_train, y_train), (X_val, y_val)])
    print("X_train shape:", X_train.shape)
    print("y_train shape:", y_train.shape)
    print("X_val shape:", X_val.shape)
    print("y_val shape:", y_val.shape)
    model.fit(X_train, y_train, batch_size=b_size, epochs=max_epochs, 
          validation_data=(X_val, y_val),  # Use a tuple here
          callbacks=[reduce_lr], shuffle=True, verbose=2)
    y_pred3 = model.predict(X_val)
    y_pred = np.zeros((len(y_pred3), ))
    sub['price'] += model.predict(test).reshape(-1,)/n_splits
    for i in range(len(y_pred3)):
        y_pred[i] = y_pred3[i]
        
    oof_pred[val_idx] = y_pred
    val_mae = mean_absolute_error(y[val_idx], y_pred)
    avg_mae += val_mae/n_splits
    print()
    print('val_mae is:{}'.format(val_mae))
    print()
mean_absolute_error(y, oof_pred)

fold: 0
X_train shape: (125000, 146)
y_train shape: (125000,)
X_val shape: (25000, 146)
y_val shape: (25000,)
Epoch 1/145
63/63 - 3s - loss: 2659.1897 - mae: 2659.1897 - val_loss: 942.0569 - val_mae: 942.0569
Epoch 2/145
63/63 - 0s - loss: 802.2687 - mae: 802.2687 - val_loss: 693.8436 - val_mae: 693.8436
Epoch 3/145
63/63 - 1s - loss: 690.1297 - mae: 690.1297 - val_loss: 760.4976 - val_mae: 760.4976
Epoch 4/145
63/63 - 1s - loss: 649.7761 - mae: 649.7761 - val_loss: 582.4513 - val_mae: 582.4513
Epoch 5/145
63/63 - 0s - loss: 579.1445 - mae: 579.1445 - val_loss: 607.6285 - val_mae: 607.6285
Epoch 6/145
63/63 - 0s - loss: 557.7840 - mae: 557.7840 - val_loss: 557.1953 - val_mae: 557.1953
Epoch 7/145
63/63 - 0s - loss: 577.1560 - mae: 577.1560 - val_loss: 591.2554 - val_mae: 591.2554
Epoch 8/145
63/63 - 0s - loss: 530.0547 - mae: 530.0547 - val_loss: 513.4880 - val_mae: 513.4880
Epoch 9/145
63/63 - 0s - loss: 519.0400 - mae: 519.0400 - val_loss: 679.2574 - val_mae: 679.2574
Epoch 10/145
63

In [None]:
sub.to_csv('nn_sub_{}_{}.csv'.format('mae', sub['price'].mean()), index=False)