In [1]:
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from tensorflow.random import set_seed

set_seed(42)

# split a univariate sequence into samples
def split_sequence(sequence,n_steps):
    X, y = list(),list()
    for i in range(len(sequence)):
        # find the end of this pattern
        end_idx = i+n_steps
        # Check if we are beyond the sequence 
        if end_idx  > len(sequence)-1:
            break
        # gather i/o parts of the pattern
        seq_x,seq_y = sequence[i:end_idx], sequence[end_idx]
        X.append(seq_x)
        y.append(seq_y)
        
    return np.array(X), np.array(y)

# define input sequence
raw_seq = [10, 20, 30, 40, 50, 60, 70, 80, 90]
# choose a number of time steps
n_steps = 3
# split into samples
X, y = split_sequence(raw_seq, n_steps)
# reshape from [samples, timesteps] into [samples, timesteps, features]
n_features = 1
X = X.reshape((X.shape[0], X.shape[1], n_features))
X.T

array([[[10, 20, 30, 40, 50, 60],
        [20, 30, 40, 50, 60, 70],
        [30, 40, 50, 60, 70, 80]]])

In [2]:
# define model
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(n_steps, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=200, verbose=0)


<keras.callbacks.History at 0x19224dc5030>

In [3]:
def get_predictions(n_periods:int, x_input:np.array):
    predicted_arr = np.array([], dtype='float32')
    
    for i in range(n_periods):
         
        x_input = x_input.reshape((1, n_steps, n_features))
        
        yhat = model.predict(x_input, verbose=0)
         
        predicted_arr = np.append(predicted_arr, yhat.squeeze())
       
        # append and roll the prediction
        x_input = np.vstack((x_input.reshape(3,-1), yhat))[1:].ravel()
        
  
    return predicted_arr
    
pred = get_predictions(n_periods=4, x_input=X[-1])
pred

array([ 90.61119, 101.902  , 113.83073, 126.62605], dtype=float32)

In [32]:
np.expand_dims(X[-1,:], axis=1)#.shape

array([[[60]],

       [[70]],

       [[80]]])

In [96]:
yhat#.shape

array([[333]])

In [99]:
np.vstack((np.expand_dims(X[-1,:], axis=1), yhat)).shape

(4, 1)

In [78]:
predicted_arr = np.array([])

In [79]:
yhat = np.array([[333]])#.shape
yhat

array([[333]])

In [83]:
predicted_arr = np.append(predicted_arr, yhat.reshape(-1,))
predicted_arr

array([333., 333.])

In [76]:
# append element
xxx = np.hstack((x_input, np.array([[[333]]])))
xxx

array([[[ 60],
        [ 70],
        [ 80],
        [333]]])

In [77]:
# drop 1st element
np.delete(xxx, 0)

array([ 70,  80, 333])

In [13]:
import pandas as pd

df = pd.DataFrame({
    'number': [0, 1, 2, 3, 4],
    'cust_id': [128, 1201, 9832, 4392, 7472],
    'type': ['a','b','c','d','e'],
    #'cust_age': [13, 21, 19, 21, 60],
    #'cust_sale': [0, 0, 0.2, 0.15, 0.3],
    #'cust_year_birth': [2008, 2000, 2002, 2000, 1961],
    #'cust_order': [1400, 14142, 900, 1240, 8430]
})
df

Unnamed: 0,number,cust_id,type
0,0,128,a
1,1,1201,b
2,2,9832,c
3,3,4392,d
4,4,7472,e


In [14]:
limit_1 = 1202
df['cust_id'] = df['cust_id'].apply(lambda x: limit_1 if x > limit_1 else x)
df

Unnamed: 0,number,cust_id,type
0,0,128,a
1,1,1201,b
2,2,1202,c
3,3,1202,d
4,4,1202,e


In [15]:
from datetime import datetime
x = []

for i in reversed(range(5)):
    x.append((datetime.now().date()-pd.Timedelta(days=i)))
    
#df['dates'] = x
df['dates'] = pd.to_datetime(x)
df

Unnamed: 0,number,cust_id,type,dates
0,0,128,a,2024-02-08
1,1,1201,b,2024-02-09
2,2,1202,c,2024-02-10
3,3,1202,d,2024-02-11
4,4,1202,e,2024-02-12


In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   number   5 non-null      int64         
 1   cust_id  5 non-null      int64         
 2   type     5 non-null      object        
 3   dates    5 non-null      datetime64[ns]
dtypes: datetime64[ns](1), int64(2), object(1)
memory usage: 288.0+ bytes


In [22]:
df.loc[df['dates'] > pd.Timestamp('2024-02-10'),'cust_id'] = 1500
df

Unnamed: 0,number,cust_id,type,dates
0,0,128,a,2024-02-08
1,1,1201,b,2024-02-09
2,2,1202,c,2024-02-10
3,3,1500,d,2024-02-11
4,4,1500,e,2024-02-12
