# Datathon

In [1]:
# %pip install pandas numpy matplotlib seaborn scikit-learn xgboost tqdm

## Importing Modules

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import warnings

warnings.filterwarnings('ignore')

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


## Loading Dataset

In [3]:
df = pd.read_csv('train.csv')
df.describe()

Unnamed: 0,month_id,net_payment_count
count,291142.0,291142.0
mean,202159.68086,415.3842
std,104.214948,10820.67
min,202001.0,-1433.0
25%,202103.0,4.0
50%,202201.0,6.0
75%,202211.0,14.0
max,202309.0,1160429.0


In [4]:
df.head()

Unnamed: 0,merchant_id,month_id,merchant_source_name,settlement_period,working_type,mcc_id,merchant_segment,net_payment_count
0,merchant_43992,202307,Merchant Source - 3,Settlement Period - 3,Working Type - 2,mcc_197,Segment - 2,15106
1,merchant_43992,202301,Merchant Source - 3,Settlement Period - 3,Working Type - 2,mcc_197,Segment - 2,16918
2,merchant_43992,202305,Merchant Source - 3,Settlement Period - 3,Working Type - 2,mcc_197,Segment - 2,13452
3,merchant_43992,202308,Merchant Source - 3,Settlement Period - 3,Working Type - 2,mcc_197,Segment - 2,16787
4,merchant_43992,202302,Merchant Source - 3,Settlement Period - 3,Working Type - 2,mcc_197,Segment - 2,12428


## Adding Extra Features

In [5]:
extra_data = pd.read_csv('ExtraData\ExtraData.csv')
extra_data.head()

Unnamed: 0,month_id,usdtry,number_of_days,holiday_number,inflation,temperature,covid_era_restriction
0,202001,5.92,31,1,14.52,3.3,0
1,202002,6.04,29,0,13.94,4.9,0
2,202003,6.31,31,0,13.33,9.5,0
3,202004,6.82,30,1,12.66,12.1,1
4,202005,6.95,31,6,12.1,17.6,1


### Merge

In [6]:
df = df.merge(extra_data, on='month_id', how='left')
df.head()

Unnamed: 0,merchant_id,month_id,merchant_source_name,settlement_period,working_type,mcc_id,merchant_segment,net_payment_count,usdtry,number_of_days,holiday_number,inflation,temperature,covid_era_restriction
0,merchant_43992,202307,Merchant Source - 3,Settlement Period - 3,Working Type - 2,mcc_197,Segment - 2,15106,26.42,31,2,57.45,25.7,0
1,merchant_43992,202301,Merchant Source - 3,Settlement Period - 3,Working Type - 2,mcc_197,Segment - 2,16918,18.76,31,1,72.45,5.3,0
2,merchant_43992,202305,Merchant Source - 3,Settlement Period - 3,Working Type - 2,mcc_197,Segment - 2,13452,19.68,31,2,63.72,16.4,0
3,merchant_43992,202308,Merchant Source - 3,Settlement Period - 3,Working Type - 2,mcc_197,Segment - 2,16787,26.95,31,1,56.28,27.1,0
4,merchant_43992,202302,Merchant Source - 3,Settlement Period - 3,Working Type - 2,mcc_197,Segment - 2,12428,18.82,28,0,71.83,3.4,0


## Preprocessing

### Reformatting Date and Merchant ID

In [7]:
df['year'] = df['month_id'].astype(str).apply(lambda x: x[0:4]).astype(int) #.apply(lambda x: x - 2020)
df['month'] = df['month_id'].astype(str).apply(lambda x: x[4:]).astype(int)
df['date'] = pd.to_datetime(df['month_id'], format='%Y%m')
df = df.set_index('date').sort_index()
df = df.drop('month_id', axis=1)
df['merchant_id'] = df['merchant_id'].apply(lambda x: x[9:]).astype(int)

df['net_payment_count'] = df['net_payment_count'].astype(float)

df.head()

Unnamed: 0_level_0,merchant_id,merchant_source_name,settlement_period,working_type,mcc_id,merchant_segment,net_payment_count,usdtry,number_of_days,holiday_number,inflation,temperature,covid_era_restriction,year,month
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2020-01-01,66740,Merchant Source - 1,Settlement Period - 1,Working Type - 6,mcc_130,Segment - 4,6.0,5.92,31,1,14.52,3.3,0,2020,1
2020-01-01,12444,Merchant Source - 1,Settlement Period - 1,Working Type - 2,mcc_153,Segment - 4,3.0,5.92,31,1,14.52,3.3,0,2020,1
2020-01-01,40154,Merchant Source - 2,Settlement Period - 1,Working Type - 5,mcc_168,Segment - 4,3.0,5.92,31,1,14.52,3.3,0,2020,1
2020-01-01,33179,Merchant Source - 3,Settlement Period - 3,Working Type - 2,mcc_25,Segment - 2,787.0,5.92,31,1,14.52,3.3,0,2020,1
2020-01-01,16977,Merchant Source - 2,Settlement Period - 1,Working Type - 6,mcc_31,Segment - 4,5.0,5.92,31,1,14.52,3.3,0,2020,1


### Add Total Month Column

In [8]:
# df['total_month'] = (df['year'] - 2020) * 12 + df['month']
# df.head()

### Label Encoding

In [9]:
cols = ['merchant_source_name', 'settlement_period', 'working_type', 'mcc_id', 'merchant_segment']
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
for col in cols:
    df[col] = le.fit_transform(df[col])

df.head()

Unnamed: 0_level_0,merchant_id,merchant_source_name,settlement_period,working_type,mcc_id,merchant_segment,net_payment_count,usdtry,number_of_days,holiday_number,inflation,temperature,covid_era_restriction,year,month
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2020-01-01,66740,0,0,5,29,3,6.0,5.92,31,1,14.52,3.3,0,2020,1
2020-01-01,12444,0,0,1,49,3,3.0,5.92,31,1,14.52,3.3,0,2020,1
2020-01-01,40154,1,0,4,63,3,3.0,5.92,31,1,14.52,3.3,0,2020,1
2020-01-01,33179,2,2,1,98,1,787.0,5.92,31,1,14.52,3.3,0,2020,1
2020-01-01,16977,1,0,5,105,3,5.0,5.92,31,1,14.52,3.3,0,2020,1


In [10]:
df = df.drop(cols, axis=1).drop(['year', 'month'], axis=1)
df.head()

Unnamed: 0_level_0,merchant_id,net_payment_count,usdtry,number_of_days,holiday_number,inflation,temperature,covid_era_restriction
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-01-01,66740,6.0,5.92,31,1,14.52,3.3,0
2020-01-01,12444,3.0,5.92,31,1,14.52,3.3,0
2020-01-01,40154,3.0,5.92,31,1,14.52,3.3,0
2020-01-01,33179,787.0,5.92,31,1,14.52,3.3,0
2020-01-01,16977,5.0,5.92,31,1,14.52,3.3,0


In [11]:
merchant_df = df[df['merchant_id'] == 66740].sort_index()

merchant_df['usdtry'] = merchant_df['usdtry'].astype(float)
merchant_df['number_of_days'] = merchant_df['number_of_days'].astype(float)
merchant_df['holiday_number'] = merchant_df['holiday_number'].astype(float)
merchant_df['inflation'] = merchant_df['inflation'].astype(float)
merchant_df['temperature'] = merchant_df['temperature'].astype(float)

merchant_df.head(50)




Unnamed: 0_level_0,merchant_id,net_payment_count,usdtry,number_of_days,holiday_number,inflation,temperature,covid_era_restriction
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-01-01,66740,6.0,5.92,31.0,1.0,14.52,3.3,0
2020-02-01,66740,4.0,6.04,29.0,0.0,13.94,4.9,0
2020-03-01,66740,4.0,6.31,31.0,0.0,13.33,9.5,0
2020-04-01,66740,3.0,6.82,30.0,1.0,12.66,12.1,1
2020-05-01,66740,5.0,6.95,31.0,6.0,12.1,17.6,1
2020-06-01,66740,5.0,6.81,30.0,0.0,11.88,21.7,0
2020-07-01,66740,4.0,6.85,31.0,3.0,11.51,25.9,0
2020-08-01,66740,5.0,7.25,31.0,5.0,11.27,25.2,0
2020-09-01,66740,4.0,7.51,30.0,0.0,11.47,23.9,1
2020-10-01,66740,3.0,7.87,31.0,1.0,11.74,18.4,1


In [12]:
def merchant_df_to_window_df(merchant_df, beginning, end, n_months):
    beginning = pd.to_datetime(beginning)
    end = pd.to_datetime(end)
    total_months = (end.year - beginning.year) * 12 + end.month - beginning.month
    rows = []

    for i in range(total_months - n_months + 2):
        window_beginning = beginning + pd.DateOffset(months=i)
        indices = [window_beginning + pd.DateOffset(months=j) for j in range(n_months)]
        row_data = [0]*(n_months * 6 + 1)
        row_data[0] = window_beginning
        for i, index in enumerate (indices):
            if index in merchant_df.index: 
                merchant_index_df = merchant_df.loc[index]
                row_data[i + 1] = (merchant_index_df[ 'net_payment_count'])
                row_data[i+ n_months + 1] = (merchant_index_df[ 'usdtry'])
                row_data[i+ n_months * 2 + 1] = (merchant_index_df[ 'number_of_days'])
                row_data[i+ n_months * 3 + 1] = (merchant_index_df[ 'holiday_number'])
                row_data[i+ n_months * 4 + 1] = (merchant_index_df[ 'inflation'])
                row_data[i+ n_months * 5 + 1] = (merchant_index_df[ 'temperature'])
            else:
                pass
        rows.append(row_data)

    columns = ['window_beginning'] + ['payment_' + str(i + 1) for i in range(n_months)] + ['usdtry_' + str(i + 1) for i in range(n_months)] + ['number_of_days_' + str(i + 1) for i in range(n_months)] + ['holiday_number_' + str(i + 1) for i in range(n_months)] + ['inflation_' + str(i + 1) for i in range(n_months)] + ['temperature_' + str(i + 1) for i in range(n_months)]

    window_df = pd.DataFrame(columns=columns, data=rows)
    window_df = window_df.set_index('window_beginning')

    for column in window_df.columns:
        window_df[column] = window_df[column].astype(float)
    
    return window_df


with pd.option_context('display.max_rows', 50, 'display.max_columns', None):
    display(merchant_df_to_window_df(merchant_df, '2020-01-01', '2023-09-01', 7).head(100))

Unnamed: 0_level_0,payment_1,payment_2,payment_3,payment_4,payment_5,payment_6,payment_7,usdtry_1,usdtry_2,usdtry_3,usdtry_4,usdtry_5,usdtry_6,usdtry_7,number_of_days_1,number_of_days_2,number_of_days_3,number_of_days_4,number_of_days_5,number_of_days_6,number_of_days_7,holiday_number_1,holiday_number_2,holiday_number_3,holiday_number_4,holiday_number_5,holiday_number_6,holiday_number_7,inflation_1,inflation_2,inflation_3,inflation_4,inflation_5,inflation_6,inflation_7,temperature_1,temperature_2,temperature_3,temperature_4,temperature_5,temperature_6,temperature_7
window_beginning,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1
2020-01-01,6.0,4.0,4.0,3.0,5.0,5.0,4.0,5.92,6.04,6.31,6.82,6.95,6.81,6.85,31.0,29.0,31.0,30.0,31.0,30.0,31.0,1.0,0.0,0.0,1.0,6.0,0.0,3.0,14.52,13.94,13.33,12.66,12.1,11.88,11.51,3.3,4.9,9.5,12.1,17.6,21.7,25.9
2020-02-01,4.0,4.0,3.0,5.0,5.0,4.0,5.0,6.04,6.31,6.82,6.95,6.81,6.85,7.25,29.0,31.0,30.0,31.0,30.0,31.0,31.0,0.0,0.0,1.0,6.0,0.0,3.0,5.0,13.94,13.33,12.66,12.1,11.88,11.51,11.27,4.9,9.5,12.1,17.6,21.7,25.9,25.2
2020-03-01,4.0,3.0,5.0,5.0,4.0,5.0,4.0,6.31,6.82,6.95,6.81,6.85,7.25,7.51,31.0,30.0,31.0,30.0,31.0,31.0,30.0,0.0,1.0,6.0,0.0,3.0,5.0,0.0,13.33,12.66,12.1,11.88,11.51,11.27,11.47,9.5,12.1,17.6,21.7,25.9,25.2,23.9
2020-04-01,3.0,5.0,5.0,4.0,5.0,4.0,3.0,6.82,6.95,6.81,6.85,7.25,7.51,7.87,30.0,31.0,30.0,31.0,31.0,30.0,31.0,1.0,6.0,0.0,3.0,5.0,0.0,1.0,12.66,12.1,11.88,11.51,11.27,11.47,11.74,12.1,17.6,21.7,25.9,25.2,23.9,18.4
2020-05-01,5.0,5.0,4.0,5.0,4.0,3.0,0.0,6.95,6.81,6.85,7.25,7.51,7.87,0.0,31.0,30.0,31.0,31.0,30.0,31.0,0.0,6.0,0.0,3.0,5.0,0.0,1.0,0.0,12.1,11.88,11.51,11.27,11.47,11.74,0.0,17.6,21.7,25.9,25.2,23.9,18.4,0.0
2020-06-01,5.0,4.0,5.0,4.0,3.0,0.0,0.0,6.81,6.85,7.25,7.51,7.87,0.0,0.0,30.0,31.0,31.0,30.0,31.0,0.0,0.0,0.0,3.0,5.0,0.0,1.0,0.0,0.0,11.88,11.51,11.27,11.47,11.74,0.0,0.0,21.7,25.9,25.2,23.9,18.4,0.0,0.0
2020-07-01,4.0,5.0,4.0,3.0,0.0,0.0,6.0,6.85,7.25,7.51,7.87,0.0,0.0,7.39,31.0,31.0,30.0,31.0,0.0,0.0,31.0,3.0,5.0,0.0,1.0,0.0,0.0,1.0,11.51,11.27,11.47,11.74,0.0,0.0,12.53,25.9,25.2,23.9,18.4,0.0,0.0,5.4
2020-08-01,5.0,4.0,3.0,0.0,0.0,6.0,6.0,7.25,7.51,7.87,0.0,0.0,7.39,7.07,31.0,30.0,31.0,0.0,0.0,31.0,28.0,5.0,0.0,1.0,0.0,0.0,1.0,0.0,11.27,11.47,11.74,0.0,0.0,12.53,12.81,25.2,23.9,18.4,0.0,0.0,5.4,6.2
2020-09-01,4.0,3.0,0.0,0.0,6.0,6.0,4.0,7.51,7.87,0.0,0.0,7.39,7.07,7.63,30.0,31.0,0.0,0.0,31.0,28.0,31.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,11.47,11.74,0.0,0.0,12.53,12.81,13.18,23.9,18.4,0.0,0.0,5.4,6.2,7.0
2020-10-01,3.0,0.0,0.0,6.0,6.0,4.0,5.0,7.87,0.0,0.0,7.39,7.07,7.63,8.16,31.0,0.0,0.0,31.0,28.0,31.0,30.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,11.74,0.0,0.0,12.53,12.81,13.18,13.7,18.4,0.0,0.0,5.4,6.2,7.0,13.4


### Train & Test Split

In [13]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error, mean_absolute_error

# X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.3)
# train = df[df.index <= '2023-06-01']
# test = df[df.index >= '2023-04-01']

## Model Training

In [14]:
def evaluate(model, X_train, X_test, y_train, y_test):
    # predict the results
    y_p = model.predict(X_test)
    
    # cross validation
    cv_rmse_score = cross_val_score(model, X, y, scoring='neg_mean_squared_error', cv=5)
    cv_rmse_score = np.sqrt(np.abs(np.mean(cv_rmse_score)))

    cv_mae_score = cross_val_score(model, X, y, scoring='neg_mean_absolute_error', cv=5)
    cv_mae_score = np.abs(np.mean(cv_mae_score))
    
    print("Results")
    print("Training score:", np.mean(cross_val_score(model, X_train, y_train)))
    print("Test score:", np.mean(cross_val_score(model, X_test, y_test)))
    print("RMSE:", np.sqrt(mean_squared_error(y_test, y_p)))
    print( f"Model Score: {model.score(X_test, y_test)}")
    print("RMSE CV Score:", cv_rmse_score)
    print("MAE CV Score:", cv_mae_score)
    print("")
    print("RMSE:", np.sqrt(mean_squared_error(y_test, y_p)))
    print("MAE:", np.sqrt(mean_absolute_error(y_test, y_p)))

In [15]:
def evaluate_merchant(y_test, y_p_test):
    test_rmse = np.sqrt(mean_squared_error(y_test, y_p_test))
    test_mae = mean_absolute_error(y_test, y_p_test)
    return test_rmse, test_mae

In [16]:
def plot_feature_importance(model_importance, features, model_name):

    # Create arrays of importance and features
    feature_importance = np.array(model_importance)
    feature_names = np.array(features)

    # Create  Dictionary for storing importance and names
    data={'features':feature_names,'importance':feature_importance}
    f_d = pd.DataFrame(data)

    # Sort by feature importance
    f_d.sort_values(by=['importance'], ascending=False,inplace=True)

    # Modify plot options
    plt.figure(figsize=(10,8))

    sns.barplot(x=f_d['importance'], y=f_d['features'])

    plt.title(model_name + 'Feature Importance')
    plt.xlabel('Feature Importance')
    plt.ylabel('Features')

### XGBRegressor

In [17]:
from xgboost import XGBRegressor
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor

test_merchant_ids = df['merchant_id'].unique()[:1000]

test_rmse_list = []
test_mae_list = []

for merchant_id in tqdm(test_merchant_ids):
    merchant_df = df[df['merchant_id'] == merchant_id]
    train_merchant_df = merchant_df_to_window_df(merchant_df, beginning='2020-01-01', end='2023-01-01', n_months=7)
    test_merchant_df = merchant_df_to_window_df(merchant_df, beginning='2023-02-01', end='2023-09-01', n_months=7)

    X_train = train_merchant_df.drop('payment_7', axis=1)
    y_train = train_merchant_df['payment_7']

    X_test = test_merchant_df.drop('payment_7', axis=1)
    y_test = test_merchant_df['payment_7']

    model = XGBRegressor()
    model.fit(X_train, y_train)
    y_p_test = model.predict(X_test)
    test_rmse, test_mae = evaluate_merchant(y_test, y_p_test)
    test_rmse_list.append(test_rmse)
    test_mae_list.append(test_mae)

print(test_rmse_list)
print(test_mae_list)

print(np.mean(test_rmse_list))
print(np.mean(test_mae_list))

In [18]:
train()

In [19]:
submission = pd.read_csv('sample_submission.csv')
submission.head()

Unnamed: 0,id,net_payment_count
0,202311merchant_36004,0
1,202312merchant_36004,0
2,202310merchant_36004,0
3,202311merchant_23099,0
4,202312merchant_23099,0


In [20]:
# df['year'] = df['month_id'].astype(str).apply(lambda x: x[0:4]).astype(int)
month_id = submission['id'].apply(lambda x: x[:6])
submission['date'] = pd.to_datetime(month_id, format='%Y%m')
submission = submission.set_index('date')
submission['merchant_id'] = submission['id'].astype(str).apply(lambda x: x[15:]).astype(int)
submission.drop('id', axis=1, inplace=True)
submission.head()

Unnamed: 0_level_0,net_payment_count,merchant_id
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-11-01,0,36004
2023-12-01,0,36004
2023-10-01,0,36004
2023-11-01,0,23099
2023-12-01,0,23099


In [21]:
submission.describe()

Unnamed: 0,net_payment_count,merchant_id
count,78180.0,78180.0
mean,0.0,33870.055986
std,0.0,19593.630993
min,0.0,1.0
25%,0.0,16893.75
50%,0.0,33856.5
75%,0.0,50883.5
max,0.0,67816.0


In [23]:
from xgboost import XGBRegressor
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor

submission_merchant_ids = submission['merchant_id'].unique()
predictions = []

for merchant_id in tqdm(submission_merchant_ids):
    merchant_df = df[df['merchant_id'] == merchant_id]
    train_merchant_df = merchant_df_to_window_df(merchant_df, beginning='2020-01-01', end='2023-09-01', n_months=7)
    submission_merchant_df = merchant_df_to_window_df(merchant_df, beginning='2023-03-01', end='2023-09-01', n_months=7).drop('payment_7', axis=1)

    X_train = train_merchant_df.drop('payment_7', axis=1)
    y_train = train_merchant_df['payment_7']

    X_submission = submission_merchant_df
    
    # Initialize XGBRegressor model
    model = XGBRegressor()
    
    # Fit the model with training data
    model.fit(X_train, y_train)
    
    # Make predictions
    predictions.append([model.predict(X_submission)] * 3)


  2%|▏         | 506/26060 [00:25<21:54, 19.44it/s]

In [None]:
finalpredictions = []
for prediction in predictions:
    finalpredictions.append(prediction[1][0])
    finalpredictions.append(prediction[1][0])
    finalpredictions.append(prediction[1][0])

In [None]:
submission['net_payment_count'] = finalpredictions
submission.head()

In [None]:
submission.describe()


In [None]:
submission['month_id'] = submission.index.strftime('%Y%m')
submission['id'] = submission['month_id'].astype(str) + 'merchant_' + submission['merchant_id'].astype(str)

In [None]:
submission = submission[['id', 'net_payment_count']]
submission = submission.round({'net_payment_count': 0})
submission.head(600)


In [None]:
submission.to_csv('utkullah.csv', index=False)