In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/final-csv/final_4.2.csv


In [2]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, LSTM, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import MinMaxScaler

In [3]:
# Load your dataset
data = pd.read_csv('/kaggle/input/final-csv/final_4.2.csv') 

In [4]:
# Parse 'action_id' and 'day_minutes' as sequences
def safe_eval_string_to_list(s):
    try:
        return list(map(int, s.strip('[]').split()))  # Adjust based on actual data format
    except Exception as e:
        print(f"Error parsing {s}: {e}")
        return []

data['action_id_seq'] = data['action_id'].apply(safe_eval_string_to_list)
data['day_minutes_seq'] = data['day_minutes'].apply(safe_eval_string_to_list)

In [5]:
# Scale and pad sequences for uniform length
scaler = MinMaxScaler()
action_id_scaled = scaler.fit_transform(pad_sequences(data['action_id_seq'], padding='post', dtype='float32'))
day_minutes_scaled = scaler.fit_transform(pad_sequences(data['day_minutes_seq'], padding='post', dtype='float32'))

# Combine action_id and day_minutes as separate features in the sequence data
X_sequences = np.stack((action_id_scaled, day_minutes_scaled), axis=-1)

In [6]:
# Define the LSTM + CNN autoencoder model
input_shape = X_sequences.shape[1:]
input_layer = Input(shape=input_shape)
x = Conv1D(64, kernel_size=3, activation='relu')(input_layer)
x = MaxPooling1D(pool_size=2)(x)
x = LSTM(50, return_sequences=True)(x)
x = Flatten()(x)
x = Dense(50, activation='relu')(x)
output_layer = Dense(input_shape[0] * input_shape[1], activation='sigmoid')(x)  # Flattened output

autoencoder = Model(inputs=input_layer, outputs=output_layer)
autoencoder.compile(optimizer=Adam(learning_rate=1e-4), loss='mse')
autoencoder.summary()


In [7]:
# Reshape output for training
X_train = X_sequences.reshape((X_sequences.shape[0], -1))

In [10]:
from tensorflow.keras.callbacks import ModelCheckpoint

# Define the ModelCheckpoint callback to save the model during training
checkpoint_callback = ModelCheckpoint(
    'lstm_cnn_autoencoder_best.keras',  # Save with .keras extension
    monitor='val_loss',                 # Monitor validation loss
    save_best_only=True,                # Only save when validation loss improves
    mode='min',                         # Save when the loss is minimized
    verbose=1
)

# Train the model with the checkpoint callback
history = autoencoder.fit(
    X_sequences, X_train, 
    epochs=50, 
    batch_size=32, 
    validation_split=0.2, 
    verbose=1, 
    callbacks=[checkpoint_callback]  # Include the callback here
)

print("Model training complete. The best model is saved as 'lstm_cnn_autoencoder_best.keras'")


Epoch 1/50
[1m34851/34851[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0114
Epoch 1: val_loss improved from inf to 0.00084, saving model to lstm_cnn_autoencoder_best.keras
[1m34851/34851[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m241s[0m 7ms/step - loss: 0.0114 - val_loss: 8.4133e-04
Epoch 2/50
[1m34843/34851[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - loss: 7.0783e-04
Epoch 2: val_loss improved from 0.00084 to 0.00051, saving model to lstm_cnn_autoencoder_best.keras
[1m34851/34851[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m235s[0m 7ms/step - loss: 7.0781e-04 - val_loss: 5.0778e-04
Epoch 3/50
[1m34845/34851[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - loss: 4.6543e-04
Epoch 3: val_loss improved from 0.00051 to 0.00040, saving model to lstm_cnn_autoencoder_best.keras
[1m34851/34851[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m235s[0m 7ms/step - loss: 4.6543e-04 - val_loss: 3.9597e-04
Epoch 4/50
[1m3