## <b>1. Import Packages</b>

In [1]:
import pandas as pd
import sys
import hashlib
import numpy as np
from tqdm import tqdm

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from tensorflow.keras.layers import Dropout
from tensorflow.keras.callbacks import EarlyStopping
from keras.utils import to_categorical

import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from src import SRC_DIR

2024-02-02 15:10:31.285093: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


<br></br>
## <b>2. Data Preprocessing</b>

In [50]:
# Import data and keep the target pages
event_log = pd.read_csv(SRC_DIR / 'Datasets' / 'Real' / 'Customer_Journey' / 'Website Event Log.csv')
target_page = pd.read_csv(SRC_DIR / 'Datasets' / 'Real' / 'Customer_Journey' / 'Target_Pages.csv')
event_log = event_log.loc[event_log.Visited_Page.isin(target_page.Pages)].reset_index(drop=True)

In [51]:
event_log.head()

Unnamed: 0,User_ID,Case_Start_Date,Activity_Start_Date,Browser,Operating_System,Device,Country,Visited_Page,Time_on_Page
0,hv5xru,12/30/2021 23:58:00,12/30/2021 23:58:00,ChromeMobile,Android,Mobile,Iran,Visited:learning.emofid.com,168
1,hv5xru,12/30/2021 23:58:00,12/31/2021 0:00:48,ChromeMobile,Android,Mobile,Iran,Visited:.../online-issuance-and-cancellation/,153
2,hv5xru,12/30/2021 23:58:00,12/31/2021 0:03:21,ChromeMobile,Android,Mobile,Iran,Visited:learning.emofid.com,5
3,hv5xru,12/30/2021 23:58:00,12/31/2021 0:04:07,ChromeMobile,Android,Mobile,Iran,Visited:learning.emofid.com,26
4,92c26h,12/30/2021 23:57:00,12/30/2021 23:57:00,Firefox,Windows,PC,Iran,Visited:learning.emofid.com,129


In [52]:
# Create unique user id
event_log.User_ID = (event_log['User_ID'] + '_' + event_log['Case_Start_Date']).apply(hash)

In [53]:
event_log.head()

Unnamed: 0,User_ID,Case_Start_Date,Activity_Start_Date,Browser,Operating_System,Device,Country,Visited_Page,Time_on_Page
0,-8166798384875866007,12/30/2021 23:58:00,12/30/2021 23:58:00,ChromeMobile,Android,Mobile,Iran,Visited:learning.emofid.com,168
1,-8166798384875866007,12/30/2021 23:58:00,12/31/2021 0:00:48,ChromeMobile,Android,Mobile,Iran,Visited:.../online-issuance-and-cancellation/,153
2,-8166798384875866007,12/30/2021 23:58:00,12/31/2021 0:03:21,ChromeMobile,Android,Mobile,Iran,Visited:learning.emofid.com,5
3,-8166798384875866007,12/30/2021 23:58:00,12/31/2021 0:04:07,ChromeMobile,Android,Mobile,Iran,Visited:learning.emofid.com,26
4,-8909181270288674887,12/30/2021 23:57:00,12/30/2021 23:57:00,Firefox,Windows,PC,Iran,Visited:learning.emofid.com,129


In [54]:
# sort rows
event_log = event_log.sort_values(['User_ID', 'Activity_Start_Date'])
event_log.reset_index(drop=True, inplace=True)
event_log.head(6)

Unnamed: 0,User_ID,Case_Start_Date,Activity_Start_Date,Browser,Operating_System,Device,Country,Visited_Page,Time_on_Page
0,-9221018531700382662,12/15/2021 16:14:00,12/15/2021 16:14:00,SamsungInternet,Android,Mobile,Iran,Visited:learning.emofid.com,1
1,-9219326855617226648,12/27/2021 15:14:00,12/27/2021 15:14:00,Chrome,Android,Tablet,Iran,Visited:learning.emofid.com,105
2,-9214627976030825068,12/21/2021 13:54:00,12/21/2021 14:09:27,Edge,Windows,PC,Iran,Visited:.../investment-fund/,213
3,-9214627976030825068,12/21/2021 13:54:00,12/21/2021 14:13:00,Edge,Windows,PC,Iran,Visited:.../dictionary/,4
4,-9214627976030825068,12/21/2021 13:54:00,12/21/2021 14:13:11,Edge,Windows,PC,Iran,Visited:.../dictionary/,2
5,-9214627976030825068,12/21/2021 13:54:00,12/21/2021 14:13:17,Edge,Windows,PC,Iran,Visited:.../dictionary/,357


In [68]:
def deplicate_check(data, cols):
    d_temp = data[cols[0]].astype(str) + data[cols[1]].astype(str)
    d_temp = d_temp.map(hash)
    d_temp = d_temp.diff()
    res = d_temp[d_temp != 0].index
    return d_temp

deplicate_check(event_log, ['User_ID','Visited_Page'])

Index([    4,     5,    10,    12,    18,    22,    23,    25,    26,    28,
       ...
       40002, 40006, 40007, 40008, 40009, 40010, 40011, 40012, 40013, 40014],
      dtype='int64', length=11573)

In [56]:
# Even     Odd
# 0        1
# 2        3
# 4        5
# ...

# 1 -> 0 then 1 ×
# 5 -> 4 then 5 ×


# 0
# 1
# 2
# 3
# 4

# Even     Odd
# 0        1
# 2        3
# 4        5
# ...

In [45]:
# %%timeit
# event_log['Time_on_Page'] + 1

In [46]:
# %%timeit
# for time in event_log.Time_on_Page:
#     time = time + 1

In [47]:
# Merging refreshed pages

class IncorrectMethodError(Exception):
    def __init__(self, method):
        super().__init__(f"Method should be 'event_with_odd' or 'odd_with_even', {method} is incorrect")


def merge_refreshed_pages(event_log, method='even_with_odd') -> pd.DataFrame:
    if method == 'even_with_odd':
        chunk_1 = event_log[::2]   # 0   2   4   6   ...   40010
        chunk_2 = event_log[1::2]  # 1   3   5   7   ...   40011
    elif method == 'odd_with_even':
        chunk_1 = event_log[1::2]  # 1   3   5   7   ...   40011
        chunk_2 = event_log[2::2]  # 2   4   6   8   ...   40010
    else:
        raise IncorrectMethodError(method)

    chunk_1 = chunk_1[['User_ID', 'Visited_Page']]
    chunk_2 = chunk_2[['User_ID', 'Visited_Page']]
    chunk_2.rename(columns={'User_ID': 'User_ID_chunk2',
                            'Visited_Page': 'Visited_Page_chunk2'},
                   inplace=True)
    

    chunk_2.index = chunk_2.index - 1
    merged_df = pd.concat([chunk_1, chunk_2], axis=1)

    merged_df = merged_df.loc[(merged_df.User_ID == merged_df.User_ID_chunk2) & 
                              (merged_df.Visited_Page == merged_df.Visited_Page_chunk2)]

    index = merged_df.index
    index_rm = merged_df.index + 1

    event_log.loc[event_log.index.isin(index), 'Time_on_Page'] += list(event_log.loc[event_log.index.isin(index_rm)]['Time_on_Page'])
    event_log = event_log.loc[~event_log.index.isin(index_rm)]
    return event_log.reset_index(drop=True)

In [48]:
while True:
    event_log_len = len(event_log)
    event_log = merge_refreshed_pages(event_log, method='even_with_odd')
    event_log = merge_refreshed_pages(event_log, method='odd_with_even')

    if event_log_len == len(event_log):
        break

In [49]:
event_log

Unnamed: 0,User_ID,Case_Start_Date,Activity_Start_Date,Browser,Operating_System,Device,Country,Visited_Page,Time_on_Page
0,-9221018531700382662,12/15/2021 16:14:00,12/15/2021 16:14:00,SamsungInternet,Android,Mobile,Iran,Visited:learning.emofid.com,1
1,-9219326855617226648,12/27/2021 15:14:00,12/27/2021 15:14:00,Chrome,Android,Tablet,Iran,Visited:learning.emofid.com,105
2,-9214627976030825068,12/21/2021 13:54:00,12/21/2021 14:09:27,Edge,Windows,PC,Iran,Visited:.../investment-fund/,213
3,-9214627976030825068,12/21/2021 13:54:00,12/21/2021 14:13:00,Edge,Windows,PC,Iran,Visited:.../dictionary/,363
4,-9212965318323144091,12/6/2021 10:47:00,12/6/2021 11:17:58,Chrome,Windows,PC,Iran,Visited:.../bourse-view-in-one-look-introduction/,34
...,...,...,...,...,...,...,...,...,...
28437,9215216476432254526,12/12/2021 10:11:00,12/12/2021 10:57:10,Chrome,Windows,PC,Iran,Visited:.../index-fund/,345
28438,9217420127799053394,12/14/2021 21:44:00,12/14/2021 21:44:00,ChromeMobile,Android,Mobile,Iran,Visited:learning.emofid.com,52
28439,9217420127799053394,12/14/2021 21:44:00,12/14/2021 21:45:27,ChromeMobile,Android,Mobile,Iran,Visited:.../fundamental-analysis/,3
28440,9222803154101322825,12/24/2021 12:18:00,12/24/2021 12:18:00,ChromeMobile,Android,Mobile,Iran,Visited:.../all-you-need-to-know-about-equities/,536


In [12]:
# Function to create code mappings
def create_code_mapping(column, prefix):
    unique_items = column.unique()
    code_map = {item: f"{prefix}_{idx}" for idx, item in enumerate(unique_items)}
    return code_map

# Creating code mappings for each column
user_code = create_code_mapping(event_log['User_ID'], 'U')
activities_code = create_code_mapping(event_log['Visited_Page'], 'Page')
device_code = create_code_mapping(event_log['Device'], 'D')
os_code = create_code_mapping(event_log['Operating_System'], 'OS')
browser_code = create_code_mapping(event_log['Browser'], 'B')

# Create a copy of the DataFrame for modification
mapped_event_log = event_log.copy()

# Applying the code mappings using .loc
mapped_event_log.replace(
    {'User_ID': user_code,
     'Visited_Page': activities_code,
     'Device': device_code,
     'Operating_System': os_code,
     'Browser': browser_code,
    },
    inplace=True)

# Displaying the updated DataFrame
mapped_event_log.head()

Unnamed: 0,User_ID,Case_Start_Date,Activity_Start_Date,Browser,Operating_System,Device,Country,Visited_Page,Time_on_Page
0,U_0,12/15/2021 16:14:00,12/15/2021 16:14:00,B_0,OS_0,D_0,Iran,Page_0,1
1,U_1,12/27/2021 15:14:00,12/27/2021 15:14:00,B_1,OS_0,D_1,Iran,Page_0,105
2,U_2,12/21/2021 13:54:00,12/21/2021 14:09:27,B_2,OS_1,D_2,Iran,Page_1,213
3,U_2,12/21/2021 13:54:00,12/21/2021 14:13:00,B_2,OS_1,D_2,Iran,Page_2,363
4,U_3,12/6/2021 10:47:00,12/6/2021 11:17:58,B_1,OS_1,D_2,Iran,Page_3,34


In [13]:
mapped_event_log.to_csv(SRC_DIR / 'Datasets' / 'Real' / 'Customer_Journey' / 'Website_EventLog_Preprossed_With_Python.csv', index=False)

<br></br>
## <b>3. Prediction with vanilla method</b>

In [14]:
# Import Eventlog
event_log = pd.read_csv(SRC_DIR / 'Datasets' / 'Real' / 'Customer_Journey' / 'Website_EventLog_Preprossed_With_Python.csv')

<br></br>
### <b>3.1. Filter Visited Pages</b>

In [15]:
# Filter Activities based on duration
event_log = event_log.loc[event_log["Time_on_Page"] > 10]
event_log = event_log.loc[event_log["Time_on_Page"] <= 600]
event_log.reset_index(drop=True, inplace=True)

In [16]:
user_id_visited_pages_count = event_log.groupby('User_ID').agg(
                                        Count_Page=('User_ID', 'count')
                              )

filtered_users = user_id_visited_pages_count.loc[user_id_visited_pages_count['Count_Page'] >= 3].index

event_log = event_log[event_log['User_ID'].isin(filtered_users)].reset_index(drop=True)
event_log

Unnamed: 0,User_ID,Case_Start_Date,Activity_Start_Date,Browser,Operating_System,Device,Country,Visited_Page,Time_on_Page
0,U_11,12/17/2021 22:42:00,12/17/2021 22:42:00,B_3,OS_1,D_2,Iran,Page_13,133
1,U_11,12/17/2021 22:42:00,12/17/2021 23:21:00,B_3,OS_1,D_2,Iran,Page_14,571
2,U_11,12/17/2021 22:42:00,12/17/2021 23:39:02,B_3,OS_1,D_2,Iran,Page_15,11
3,U_11,12/17/2021 22:42:00,12/17/2021 23:39:13,B_3,OS_1,D_2,Iran,Page_16,68
4,U_11,12/17/2021 22:42:00,12/17/2021 23:40:21,B_3,OS_1,D_2,Iran,Page_14,20
...,...,...,...,...,...,...,...,...,...
13882,U_7899,12/20/2021 23:44:00,12/20/2021 23:44:00,B_3,OS_1,D_2,Iran,Page_13,472
13883,U_7899,12/20/2021 23:44:00,12/20/2021 23:51:52,B_3,OS_1,D_2,Iran,Page_15,300
13884,U_7899,12/20/2021 23:44:00,12/20/2021 23:56:52,B_3,OS_1,D_2,Iran,Page_23,104
13885,U_7899,12/20/2021 23:44:00,12/20/2021 23:58:36,B_3,OS_1,D_2,Iran,Page_25,79


<br></br>
### <b>3.2. Making dummy variables for Device and OS</b>

In [17]:
event_log = pd.get_dummies(
    event_log,
    columns=['Device', 'Operating_System'],
    prefix=["Device", 'Operating_System'])

event_log.head()

Unnamed: 0,User_ID,Case_Start_Date,Activity_Start_Date,Browser,Country,Visited_Page,Time_on_Page,Device_D_0,Device_D_1,Device_D_2,Device_D_3,Operating_System_OS_0,Operating_System_OS_1,Operating_System_OS_2,Operating_System_OS_3,Operating_System_OS_4,Operating_System_OS_5,Operating_System_OS_6
0,U_11,12/17/2021 22:42:00,12/17/2021 22:42:00,B_3,Iran,Page_13,133,False,False,True,False,False,True,False,False,False,False,False
1,U_11,12/17/2021 22:42:00,12/17/2021 23:21:00,B_3,Iran,Page_14,571,False,False,True,False,False,True,False,False,False,False,False
2,U_11,12/17/2021 22:42:00,12/17/2021 23:39:02,B_3,Iran,Page_15,11,False,False,True,False,False,True,False,False,False,False,False
3,U_11,12/17/2021 22:42:00,12/17/2021 23:39:13,B_3,Iran,Page_16,68,False,False,True,False,False,True,False,False,False,False,False
4,U_11,12/17/2021 22:42:00,12/17/2021 23:40:21,B_3,Iran,Page_14,20,False,False,True,False,False,True,False,False,False,False,False


<br></br>
### <b>3.3. Reshape Eventlog</b>

In [18]:
from tqdm import tqdm

In [19]:
reshaped_event_log = pd.DataFrame()

for group_name, group in tqdm(event_log.groupby('User_ID')):
    group.sort_values("Activity_Start_Date", inplace=True)
    group.reset_index(drop=True, inplace=True)

    # Input Variables
    prefix = [tuple(group["Visited_Page"].values[:i+1]) for i in range(len(group))]
    elapsed_time = [sum(group["Time_on_Page"].values[:i+1]) for i in range(len(group))]
    number_of_visited_page = [len(group["Visited_Page"].values[:i+1]) for i in range(len(group))]

    operating_systems = {}
    devices = {}
    for col in group.columns:
        if 'OS_' in col:
            operating_systems[col] = [max(group[col]) for i in range(len(group))]
        if 'Device_' in col:
            devices[col] = [max(group[col]) for i in range(len(group))]

    # Output Variable
    next_page = [group["Visited_Page"].values[i+1] for i in range(len(group) - 1)] + ["End"]

    reshaped_group = pd.DataFrame({
        'Visited_Page': prefix,
        'Elapsed_Time': elapsed_time,
        '#Visited_Page': number_of_visited_page,
        'Next_Page': next_page,
        **operating_systems,
        **devices,
    })
    reshaped_event_log = pd.concat([reshaped_event_log, reshaped_group], axis=0)

100%|██████████████████████████████████████████████████████████████████████████████| 2804/2804 [00:05<00:00, 529.66it/s]


In [20]:
reshaped_event_log.reset_index(drop=True, inplace=True)
reshaped_event_log.head()

Unnamed: 0,Visited_Page,Elapsed_Time,#Visited_Page,Next_Page,Operating_System_OS_0,Operating_System_OS_1,Operating_System_OS_2,Operating_System_OS_3,Operating_System_OS_4,Operating_System_OS_5,Operating_System_OS_6,Device_D_0,Device_D_1,Device_D_2,Device_D_3
0,"(Page_0,)",55,1,Page_69,True,False,False,False,False,False,False,True,False,False,False
1,"(Page_0, Page_69)",416,2,Page_0,True,False,False,False,False,False,False,True,False,False,False
2,"(Page_0, Page_69, Page_0)",439,3,Page_1,True,False,False,False,False,False,False,True,False,False,False
3,"(Page_0, Page_69, Page_0, Page_1)",460,4,End,True,False,False,False,False,False,False,True,False,False,False
4,"(Page_0,)",37,1,Page_12,False,True,False,False,False,False,False,False,False,True,False


<br></br>
### <b>3.4. Making dummy variables for Visited Pages</b>

In [21]:
reshaped_event_log = pd.get_dummies(reshaped_event_log, columns=['Visited_Page'], prefix="Visited")

reshaped_event_log.head()

Unnamed: 0,Elapsed_Time,#Visited_Page,Next_Page,Operating_System_OS_0,Operating_System_OS_1,Operating_System_OS_2,Operating_System_OS_3,Operating_System_OS_4,Operating_System_OS_5,Operating_System_OS_6,...,"Visited_('Page_99', 'Page_8')","Visited_('Page_99', 'Page_8', 'Page_0')","Visited_('Page_99', 'Page_8', 'Page_56')","Visited_('Page_99', 'Page_8', 'Page_63')","Visited_('Page_99', 'Page_8', 'Page_63', 'Page_317')","Visited_('Page_99', 'Page_8', 'Page_63', 'Page_317', 'Page_99')","Visited_('Page_99', 'Page_8', 'Page_99')","Visited_('Page_99', 'Page_99')","Visited_('Page_99', 'Page_99', 'Page_1')","Visited_('Page_99', 'Page_99', 'Page_57')"
0,55,1,Page_69,True,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,416,2,Page_0,True,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,439,3,Page_1,True,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,460,4,End,True,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,37,1,Page_12,False,True,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


<br></br>
### <b>3.5. Train Models</b>

In [22]:
x_columns_name = ["Elapsed_Time", "#Visited_Page"] + [item for item in reshaped_event_log.columns if "Visited_" in item or
                                                                                                     "Device_" in item or 
                                                                                                     "OS_" in item]
y_column_name = ["Next_Page"]

In [23]:
# Train Test Split
x = reshaped_event_log[x_columns_name]
y = reshaped_event_log[y_column_name]

y = np.array(y).flatten()

label_encoder = LabelEncoder()

# Fit and transform the y array to integer labels
y = label_encoder.fit_transform(y)
label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))

# Train Test split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

<br></br>
#### <b>3.5.1. Logistic Regression</b>

In [None]:
# Fit Mode
model = LogisticRegression(solver='liblinear', random_state=0)

# model = LogisticRegression(solver='liblinear', C=0.05, multi_class='ovr', random_state=0)
model.fit(x_train, y_train)

In [None]:
y_pred = model.predict(x_test)

print(model.score(x_train, y_train))
print(model.score(x_test, y_test))

<br></br>
#### <b>3.5.2. KNN</b>

In [31]:
knn_model = KNeighborsClassifier(n_neighbors=15)
knn_model.fit(x_train, y_train)

In [32]:
pred = knn_model.predict(x_test)
accuracy_score(pred, y_test)

0.09287257019438445

<br></br>
#### <b>3.5.3. Neural Network</b>

In [24]:
# Train Test split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

y_encoded = to_categorical([*y_train, *y_test])

y_train_encoded = y_encoded[:y_train.shape[0]]
y_test_encoded = y_encoded[y_train.shape[0]:]

In [25]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)


# Creating the neural network model
model = keras.Sequential([
    layers.Dense(32, activation='relu', input_shape=(x_train.shape[1],)),
    Dropout(0.3),
    layers.Dense(32, activation='relu'),
    Dropout(0.2),
    layers.Dense(y_encoded.shape[1], activation='softmax')
])

# Compiling the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# # Early stopping callback
# early_stopping = EarlyStopping(patience=20, restore_best_weights=True)

# Training the model
epochs = 10
batch_size = 32
model.fit(x_train, y_train_encoded, epochs=epochs, batch_size=batch_size, validation_split=0.2)

# # Evaluating the model on the test set
# test_loss, test_accuracy = model.evaluate(x_test, y_test_encoded)
# print(f"Test Accuracy: {test_accuracy}")

2024-02-02 15:19:53.945977: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:266] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f3de5930eb0>

In [26]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 32)                284576    
                                                                 
 dropout (Dropout)           (None, 32)                0         
                                                                 
 dense_1 (Dense)             (None, 32)                1056      
                                                                 
 dropout_1 (Dropout)         (None, 32)                0         
                                                                 
 dense_2 (Dense)             (None, 376)               12408     
                                                                 
Total params: 298,040
Trainable params: 298,040
Non-trainable params: 0
_________________________________________________________________


<br></br>
#### <b>3.5.4. Calculate recommendation accuracy</b>

In [35]:
def get_top_n_classes(probs, n):
    top_n_indices = np.argsort(probs)[-n:]
    return top_n_indices


# Make predictions on the test set
predictions = model.predict(x_test)

# predictions

# Initialize variables for accuracy calculation
num_correct = 0
total_samples = len(x_test)

# Evaluate accuracy based on recommending the top five classes
for i in range(total_samples):
    true_class = y_test[i]
    predicted_probs = predictions[i]
    top_classes = get_top_n_classes(predicted_probs, n=5)
    if true_class in top_classes:
        num_correct += 1

# Calculate and print the accuracy
accuracy = num_correct / total_samples
print(f"Accuracy based on recommending top five classes: {accuracy}")

Accuracy based on recommending top five classes: 0.2498200143988481


In [29]:
true_class

0

In [31]:
top_classes

array([ 18, 367, 375,   2,   0])