In [1]:
import pandas as pd
import numpy as np
import time
import warnings
import gc
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use("ggplot")
import seaborn as sns
import datetime
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.metrics import log_loss
import lightgbm as lgb
from sklearn import preprocessing
warnings.filterwarnings("ignore")
plt.style.use("seaborn")

## Formatted data

In [2]:
# Reduce the memory usage - Inspired by Panchajanya Banerjee
def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024 ** 2
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
    end_mem = df.memory_usage().sum() / 1024 ** 2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (
                start_mem - end_mem) / start_mem))
    return df

In [None]:
train = reduce_mem_usage(pd.read_csv("../data/train.csv", parse_dates=["first_active_month"]))
test = reduce_mem_usage(pd.read_csv("../data/test.csv", parse_dates=["first_active_month"]))
test["target"] = -9999
data = pd.concat([train, test])
data["year"] = data["first_active_month"].apply(lambda x: x.year)
data["month"] = data["first_active_month"].apply(lambda x: x.month)
data["dayofyear"] = data["first_active_month"].dt.dayofyear
data['week'] = data["first_active_month"].dt.weekofyear
data['dayofweek'] = data['first_active_month'].dt.dayofweek
data['days'] = (datetime.date(2018, 2, 1) - data['first_active_month'].dt.date).dt.days

In [None]:
def pro_trans(trans):  
    
    trans["authorized_flag"] = trans["authorized_flag"].map({"Y": 1, "N": 0})
    trans["category_1"] = trans["category_1"].map({"Y": 1, "N":0})
    trans["purchase_date"] = pd.to_datetime(trans["purchase_date"])
    trans["year"] = trans["purchase_date"].apply(lambda x: x.year)
    trans["month"] = trans["purchase_date"].apply(lambda x: x.month)
    trans["weekofyear"] = trans["purchase_date"].apply(lambda x: x.weekofyear)
    trans["dayofweek"] = trans["purchase_date"].apply(lambda x: x.dayofweek)
    trans["weekend"] = (trans["purchase_date"].apply(lambda x: x.dayofweek) >= 5).astype(int)
    trans["hour"] = trans["purchase_date"].apply(lambda x: x.hour)
    trans["minute"] = trans["purchase_date"].apply(lambda x: x.minute)
    trans["month_diff"] = ((datetime.datetime.today() - trans["purchase_date"]).apply(lambda x: x.days)) // 30
    trans["month_diff"] += trans["month_lag"]
    trans["month_diff2"] = trans["month"] - trans["month_lag"]
    trans["category_2"] = trans["category_2"].fillna(value = 2.0)
    trans["category_3"] = trans["category_3"].fillna(value = "A")
    trans["merchant_id"] = trans["merchant_id"].fillna(value = "M_ID_00a6ca8a8a")
    lbl = preprocessing.LabelEncoder()
    trans["category_3"] = lbl.fit_transform(list(trans["category_3"].values))
    
    agg_func = {"mean": ["mean"]}
    for col in ["category_1", "category_2", "category_3"]:
        trans[col+"_mean"] = trans["purchase_amount"].groupby(trans[col]).agg(agg_func)
        trans[col+"_mean"] = trans["installments"].groupby(trans[col]).agg(agg_func)
    for col in ["month", "hour"]:
        trans[col+"_mean"] = trans["purchase_amount"].groupby(trans[col]).agg(agg_func)
        trans[col+"_mean"] = trans["installments"].groupby(trans[col]).agg(agg_func)
    
    return trans
    

In [None]:
# Taking Reference from Other Kernels
def trans_agg(trans, nunique_col, prefix):
    agg_func = {"purchase_date":["max", "min"],
                "month_diff": ["max", "min", "mean"],
                "weekend": ["max", "min", "mean"],
                "authorized_flag": ["max", "min", "mean"],
                "category_1": ["max", "min", "mean"],
                "category_2": ["max", "min", "mean"],
                "category_3": ["max", "min", "mean"],
                "installments": ["max", "min", "mean", "std"],
                "purchase_amount": ["max", "min", "mean", "std"],
                "merchant_id": ["nunique"],
                "month_lag": ["mean", "max", "min", "nunique"],
                "month_diff": ["mean", "max", "min", "nunique"],
                "card_id": ["size", "nunique"],
                "month": ["max", "min", "nunique"],
                "hour": ["max", "min", "nunique"],
                "weekofyear": ["max", "min", "nunique"],
                "dayofweek": ["max", "min", "nunique"],
                "year": ["max", "min", "nunique"],
                "subsector_id": ["max", "min", "nunique"],
                "merchant_category_id": ["max", "min", "nunique"]}
    
    agg_trans = trans.groupby([nunique_col]).agg(agg_func)
    agg_trans.columns = [prefix + '_'.join(col).strip() for col in agg_trans.columns.values]
    agg_trans.reset_index(inplace=True)
    df = (trans.groupby(nunique_col).size().reset_index(name='{}transactions_count'.format(prefix)))
    agg_trans = pd.merge(df, agg_trans, on=nunique_col, how='left')

    return agg_trans

In [None]:
# Feature Engineering - Adding new features inspired by Chau's first kernel
def data_add_feat(data, prefix):
    data[prefix + "purchase_date_max"] = pd.to_datetime(data[prefix + "purchase_date_max"])
    data[prefix + "purchase_date_min"] = pd.to_datetime(data[prefix + "purchase_date_min"])
    data[prefix + "purchase_date_diff"] = (data[prefix + "purchase_date_max"] - data[prefix + "purchase_date_min"]).dt.days   
    data[prefix + "purchase_date_average"] = data[prefix + "purchase_date_diff"] / data[prefix + "card_id_size"]
    data[prefix + "purchase_date_uptonow"] = (datetime.datetime.today() - data[prefix + "purchase_date_max"]).dt.days
    data[prefix + "first_buy"] = (data[prefix + "purchase_date_min"] - data["first_active_month"]).dt.days
    data[prefix + "last_buy"] = (data[prefix + "purchase_date_max"] - data["first_active_month"]).dt.days
    for feature in [prefix + "purchase_date_max", prefix + "purchase_date_min"]:
        data[feature] = data[feature].astype(np.int64) * 1e-9
    
    return data

In [None]:
hist_trans = reduce_mem_usage(pd.read_csv('../data/historical_transactions.csv'))
hist_trans_pro = pro_trans(hist_trans)

In [None]:
hist_merge_trans1 = trans_agg(hist_trans_pro, nunique_col="card_id" ,prefix='hist_')
data = data.merge(hist_merge_trans1, on='card_id', how='left')
# data.to_csv("../data_feat/his_data_card.csv", index=False)
del hist_merge_trans1
del hist_trans_pro
del hist_trans
gc.collect()

In [None]:
hist_merge_trans2 = trans_agg(hist_trans_pro, nunique_col="merchant_id", prefix='hist_')
hist_merge_trans2 = hist_trans_pro.merge(hist_merge_trans2, how="left", on="merchant_id")

In [None]:
hist_merge_trans2.to_csv("../feat_data/hist_merge_trans2.csv", index=False)

In [None]:
hist_merge_trans2 = pd.read_csv("../feat_data/hist_merge_trans2.csv")

In [None]:
hist_gby = hist_merge_trans2.groupby("card_id",as_index=False).mean()
data = data.merge(hist_gby, on="card_id", how="left")
data.to_csv("../data_feat/hist_data_merchant.csv", index=False)

In [None]:
hist_merge_trans2 = trans_agg(hist_trans_pro, nunique_col="merchant_id", prefix='hist_')
hist_merge_trans2 = hist_trans_pro.merge(hist_merge_trans2, how="left", on="merchant_id")
hist_gby = hist_merge_trans2.groupby("card_id",as_index=False).mean()
data = data.merge(hist_gby, on="card_id", how="left")
data.to_csv("../data_feat/hist_data_merchant.csv", index=False)
del hist_merge_trans2
del hist_trans_pro
del hist_trans
gc.collect()

In [None]:
new_trans = reduce_mem_usage(pd.read_csv('../data/new_merchant_transactions.csv'))
new_trans_pro = pro_trans(new_trans)

In [None]:
new_merge_trans1 = trans_agg(new_trans_pro,nunique_col="card_id" ,prefix='new_')
data = data.merge(new_merge_trans1, on='card_id', how='left')
# data.to_csv("../data_feat/new_data_card.csv", index=False)
del new_merge_trans1
gc.collect()

In [None]:
new_merge_trans2 = trans_agg(new_trans_pro, nunique_col="merchant_id", prefix='new_')
new_merge_trans2 = new_trans_pro.merge(new_merge_trans2, how="left", on="merchant_id")
new_gby = new_merge_trans2.groupby("card_id",as_index=False).mean()
data = data.merge(new_gby, on="card_id", how="left")
# data.to_csv("../data_feat/new_data_merchant.csv", index=False)
del new_merge_trans2
gc.collect()

In [None]:
# hist_data = hist_data_card.merge(hist_data_merchant, on="card_id", how="left")
data1 = data_add_feat(hist_data_card, prefix="hist_")
new_data = new_data_card.merge(new_data_merchant, on="card_id", how="left")
data2 = data_add_feat(new_data, prefix="new_")

data = data1.merge(data2, on="card_id", how="left")


In [None]:
hist_trans = reduce_mem_usage(pd.read_csv('../data/historical_transactions.csv'))
hist_trans_pro = pro_trans(hist_trans)
hist_merge_trans1 = trans_agg(hist_trans_pro, nunique_col="card_id" ,prefix='hist_')
del hist_trans_pro
gc.collect()

data = pd.merge(data, hist_merge_trans1, on='card_id', how='left')
del hist_merge_trans1
gc.collect()

data = data_add_feat(data, prefix="hist_")
gc.collect()


In [None]:
new_trans = reduce_mem_usage(pd.read_csv('../data/new_merchant_transactions.csv'))
new_trans_pro = pro_trans(new_trans)
new_merge_trans1 = trans_agg(new_trans_pro,nunique_col="card_id" ,prefix='new_')
# del new_merge_trans1
gc.collect()

new_merge_trans2 = trans_agg(new_trans_pro, nunique_col="merchant_id", prefix='new_')
new_merge_trans2 = new_trans_pro.merge(new_merge_trans2, how="left", on="merchant_id")
new_gby = new_merge_trans2.groupby("card_id",as_index=False).mean()
new_data = new_merge_trans1.merge(new_gby, on="card_id", how="left")
del new_merge_trans2
gc.collect()


In [None]:
data = data.merge(new_data, on="card_id", how="left")

In [None]:
# added new feature - Interactive
# data['card_id_total'] = data['new_card_id_size'] + data['hist_card_id_size']
# data['purchase_amount_total'] = data['new_purchase_amount_sum'] + data['hist_purchase_amount_sum']
data["purchase_amount_mean"] = data["new_purchase_amount_mean"] + data["hist_purchase_amount_mean"]
data["purchase_amount_max"] = data["new_purchase_amount_max"] + data["hist_purchase_amount_max"]
data["purchase_amount_min"] = data["new_purchase_amount_min"] + data["hist_purchase_amount_min"]
data["purchase_amount_std"] = data["new_purchase_amount_std"] + data["hist_purchase_amount_std"]

# data["installments_total"] = data["new_installments_sum"] + data["hist_installments_sum"]
data["installments_mean"] = data["new_installments_mean"] + data["hist_installments_mean"]
data["installments_max"] = data["new_installments_max"] + data["hist_installments_max"]
data["installments_min"] = data["new_installments_min"] + data["hist_installments_min"]
data["installments_std"] = data["new_installments_std"] + data["hist_installments_std"]

gc.collect()

In [None]:
data["hist_month_nunique_hist_month_diff_mean_add"] = data["hist_month_nunique"] + data["hist_month_diff_mean"]   
data["hist_month_nunique_hist_month_diff_mean_sub"] = data["hist_month_nunique"] - data["hist_month_diff_mean"]   
data["hist_month_nunique_hist_month_diff_mean_mul"] = data["hist_month_nunique"] * data["hist_month_diff_mean"]
data["hist_month_nunique_hist_month_diff_mean_div"] = data["hist_month_nunique"] / data["hist_month_diff_mean"]

data["hist_month_nunique_hist_authorized_flag_mean_add"] = data["hist_month_nunique"] + data["hist_authorized_flag_mean"]    
data["hist_month_nunique_hist_authorized_flag_mean_sub"] = data["hist_month_nunique"] - data["hist_authorized_flag_mean"]
data["hist_month_nunique_hist_authorized_flag_mean_mul"] = data["hist_month_nunique"] * data["hist_authorized_flag_mean"]
data["hist_month_nunique_hist_authorized_flag_mean_div"] = data["hist_month_nunique"] / data["hist_authorized_flag_mean"]

data["hist_month_diff_mean_hist_authorized_flag_mean_add"] = data["hist_month_diff_mean"] + data["hist_authorized_flag_mean"]   
data["hist_month_diff_mean_hist_authorized_flag_mean_sub"] = data["hist_month_diff_mean"] - data["hist_authorized_flag_mean"]
data["hist_month_diff_mean_hist_authorized_flag_mean_mul"] = data["hist_month_diff_mean"] * data["hist_authorized_flag_mean"]
data["hist_month_diff_mean_hist_authorized_flag_mean_div"] = data["hist_month_diff_mean"] / data["hist_authorized_flag_mean"]

data["hist_month_nunique_new_purchase_date_diff_add"] = data["hist_month_nunique"] + data["new_purchase_date_diff"]
data["hist_month_nunique_new_purchase_date_diff_sub"] = data["hist_month_nunique"] - data["new_purchase_date_diff"]
data["hist_month_nunique_new_purchase_date_diff_mul"] = data["hist_month_nunique"] * data["new_purchase_date_diff"]
data["hist_month_nunique_new_purchase_date_diff_div"] = data["hist_month_nunique"] / data["new_purchase_date_diff"]

data["hist_month_diff_mean_new_purchase_date_diff_add"] = data["hist_month_diff_mean"] + data["new_purchase_date_diff"]
data["hist_month_diff_mean_new_purchase_date_diff_sub"] = data["hist_month_diff_mean"] - data["new_purchase_date_diff"]
data["hist_month_diff_mean_new_purchase_date_diff_mul"] = data["hist_month_diff_mean"] * data["new_purchase_date_diff"]
data["hist_month_diff_mean_new_purchase_date_diff_div"] = data["hist_month_diff_mean"] / data["new_purchase_date_diff"]

data["hist_authorized_flag_mean_new_purchase_date_diff_add"] = data["hist_authorized_flag_mean"] + data["new_purchase_date_diff"]    
data["hist_authorized_flag_mean_new_purchase_date_diff_sub"] = data["hist_authorized_flag_mean"] - data["new_purchase_date_diff"]
data["hist_authorized_flag_mean_new_purchase_date_diff_mul"] = data["hist_authorized_flag_mean"] * data["new_purchase_date_diff"]
data["hist_authorized_flag_mean_new_purchase_date_diff_div"] = data["hist_authorized_flag_mean"] / data["new_purchase_date_diff"]

data["hist_month_nunique_hist_month_lag_mean_add"] = data["hist_month_nunique"] + data["hist_month_lag_mean"]
data["hist_month_nunique_hist_month_lag_mean_sub"] = data["hist_month_nunique"] - data["hist_month_lag_mean"]
data["hist_month_nunique_hist_month_lag_mean_mul"] = data["hist_month_nunique"] * data["hist_month_lag_mean"]
data["hist_month_nunique_hist_month_lag_mean_div"] = data["hist_month_nunique"] / data["hist_month_lag_mean"]

data["hist_month_diff_mean_hist_month_lag_mean_add"] = data["hist_month_diff_mean"] + data["hist_month_lag_mean"]
data["hist_month_diff_mean_hist_month_lag_mean_sub"] = data["hist_month_diff_mean"] - data["hist_month_lag_mean"]
data["hist_month_diff_mean_hist_month_lag_mean_mul"] = data["hist_month_diff_mean"] * data["hist_month_lag_mean"]
data["hist_month_diff_mean_hist_month_lag_mean_div"] = data["hist_month_diff_mean"] / data["hist_month_lag_mean"]

data["hist_authorized_flag_mean_hist_month_lag_mean_add"] = data["hist_authorized_flag_mean"] + data["hist_month_lag_mean"]
data["hist_authorized_flag_mean_hist_month_lag_mean_sub"] = data["hist_authorized_flag_mean"] - data["hist_month_lag_mean"]
data["hist_authorized_flag_mean_hist_month_lag_mean_mul"] = data["hist_authorized_flag_mean"] * data["hist_month_lag_mean"]
data["hist_authorized_flag_mean_hist_month_lag_mean_div"] = data["hist_authorized_flag_mean"] / data["hist_month_lag_mean"]

data["new_purchase_date_diff_hist_month_lag_mean_add"] = data["new_purchase_date_diff"] + data["hist_month_lag_mean"]
data["new_purchase_date_diff_hist_month_lag_mean_sub"] = data["new_purchase_date_diff"] - data["hist_month_lag_mean"]
data["new_purchase_date_diff_hist_month_lag_mean_mul"] = data["new_purchase_date_diff"] * data["hist_month_lag_mean"]
data["new_purchase_date_diff_hist_month_lag_mean_div"] = data["new_purchase_date_diff"] / data["hist_month_lag_mean"]

gc.collect()

In [None]:
# Check for missing values in training set
nulls = np.sum(data.isnull())
nullcols = nulls.loc[(nulls != 0)]
dtypes = data.dtypes
dtypes2 = dtypes.loc[(nulls != 0)]
info = pd.concat([nullcols, dtypes2], axis=1).sort_values(by=0, ascending=False)

numeric_dtypes = ['float64']
numerics = []
for i in data.columns:
    if data[i].dtype in numeric_dtypes:
        numerics.append(i)

train = data[data["target"] != -9999]
test = data[data["target"] == -9999]
test = test.drop(["target"], axis=1)

train['outliers'] = 0
train.loc[train['target'] < -30, 'outliers'] = 1
train['outliers'].value_counts()

for features in ['feature_1', 'feature_2', 'feature_3']:
    order_label = train.groupby([features])['outliers'].mean()
    train[features] = train[features].map(order_label)
    test[features] = test[features].map(order_label)

In [None]:
train_df = train
test_df = test
train_df = train_df[train_df["outliers"] == 0]
target = train_df["target"]
del train_df["target"]
features = [c for c in train_df.columns if c not in ["card_id", "first_active_month", "outliers"]]
categorical_feats = [c for c in features if "feature_" in c]

In [None]:
%%time
param = {'num_leaves': 31,
         'min_data_in_leaf': 32, 
         'objective':'regression',
         'max_depth': -1,
         'learning_rate': 0.001,
         "boosting": "gbdt",
         "feature_fraction": 0.9,
         "bagging_freq": 8,
         "bagging_fraction": 0.9,
         "bagging_seed": 10,
         "metric": 'rmse',
         "lambda_l1": 0.1,
         "verbosity": -1,
         "nthread": -1}

folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=2018)
oof = np.zeros(len(train_df))
predictions = np.zeros(len(test))
feature_importance_df = pd.DataFrame()

for fold_, (trn_idx, val_idx) in enumerate(folds.split(train_df,train_df['outliers'].values)):
    print("fold {}".format(fold_))
    trn_data = lgb.Dataset(train_df.iloc[trn_idx][features], label=target.iloc[trn_idx])
    val_data = lgb.Dataset(train_df.iloc[val_idx][features], label=target.iloc[val_idx])

    num_round = 10000
    clf = lgb.train(param, trn_data, num_round, valid_sets = [trn_data, val_data], verbose_eval= -1,
                    early_stopping_rounds = 200)
    oof[val_idx] = clf.predict(train_df.iloc[val_idx][features], num_iteration=clf.best_iteration)
    
    fold_importance_df = pd.DataFrame()
    fold_importance_df["Feature"] = features
    fold_importance_df["importance"] = clf.feature_importance()
    fold_importance_df["fold"] = fold_ + 1
    feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
    
    predictions += clf.predict(test[features], num_iteration=clf.best_iteration) / folds.n_splits

print("CV score: {:<8.5f}".format(mean_squared_error(oof, target)**0.5))
    

In [None]:
model_without_outliers = pd.DataFrame({"card_id":test["card_id"].values})
model_without_outliers["target"] = predictions
model_without_outliers.to_csv("../submission/"+str(int(time.strftime("%Y%m%d%H%M%S", time.localtime(time.time())))) +".csv", index=False)    

In [None]:
train_df = train
test_df = test
target = train_df["outliers"]
del train_df["outliers"]
del train_df["target"]

features = [c for c in train_df.columns if c not in ["card_id", "first_active_month"]]
categorical_feats = [c for c in features if "feature_" in c]

In [None]:
param = {'num_leaves': 31,
         'min_data_in_leaf': 30, 
         'objective':'binary',
         'max_depth': -1,
         'learning_rate': 0.001,
         "boosting": "gbdt",
         "feature_fraction": 0.9,
         "bagging_freq": 1,
         "bagging_fraction": 0.9 ,
         "bagging_seed": 10,
         "metric": 'binary_logloss',
         "lambda_l1": 0.1,
         "verbosity": -1,
         "nthread": -1}

folds = KFold(n_splits=5, shuffle=True, random_state=15)
oof = np.zeros(len(train_df))
predictions2 = np.zeros(len(test_df))
feature_importance_df = pd.DataFrame()

for fold_, (trn_idx, val_idx) in enumerate(folds.split(train_df.values, target.values)):
    print("fold n°{}".format(fold_))
    trn_data = lgb.Dataset(train_df.iloc[trn_idx][features], label=target.iloc[trn_idx], categorical_feature=categorical_feats)
    val_data = lgb.Dataset(train_df.iloc[val_idx][features], label=target.iloc[val_idx], categorical_feature=categorical_feats)

    num_round = 10000
    clf = lgb.train(param, trn_data, num_round, valid_sets = [trn_data, val_data], verbose_eval=-1, 
                    early_stopping_rounds = 200)
    oof[val_idx] = clf.predict(train_df.iloc[val_idx][features], num_iteration=clf.best_iteration)
    
    fold_importance_df = pd.DataFrame()
    fold_importance_df["feature"] = features
    fold_importance_df["importance"] = clf.feature_importance()
    fold_importance_df["fold"] = fold_ + 1
    feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
    
    predictions2 += clf.predict(test_df[features], num_iteration=clf.best_iteration) / folds.n_splits

print("CV score: {:<8.5f}".format(log_loss(target, oof)))


In [None]:
df_outlier_prob = pd.DataFrame({"card_id": test_df["card_id"].values})
df_outlier_prob["target"] = predictions2
outlier_id = pd.DataFrame(df_outlier_prob.sort_values(by="target", ascending=False).head(30000)["card_id"])
best_submission = pd.read_csv("../submission/6911+6912+26165731.csv")
most_likely_liers = best_submission.merge(outlier_id, how="right")

In [None]:
for card_id in most_likely_liers["card_id"]:
    model_without_outliers.loc[model_without_outliers["card_id"] == card_id, "target"] = \
    most_likely_liers.loc[most_likely_liers["card_id"] == card_id, "target"].values
model_without_outliers.to_csv("../submission/"+str(int(time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))))+".csv", index=False)             