# Consolidated Training Notebook

## Data Loading & Preprocessing

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import os
import matplotlib.pyplot as plt

# Load and merge datasets
data_dir = os.path.join(os.path.dirname(os.getcwd()), 'Practice_Level_Crosstab_Jan_24')
files = [f for f in os.listdir(data_dir) if f.endswith('.csv')]
dfs = [pd.read_csv(os.path.join(data_dir, f)) for f in files]
merged = pd.concat(dfs)

# Convert date
merged['Date'] = pd.to_datetime(merged['APPOINTMENT_MONTH_START_DATE'], format='%d%b%Y')
merged = merged.sort_values('Date')

# Feature engineering
merged['day_of_week'] = merged['Date'].dt.dayofweek
merged['is_weekend'] = merged['day_of_week'].isin([5,6]).astype(int)
features = ['COUNT_OF_APPOINTMENTS', 'day_of_week', 'is_weekend']
target = 'COUNT_OF_APPOINTMENTS'

# Data visualization
plt.figure(figsize=(12,6))
plt.plot(merged['Date'], merged[target])
plt.title('Appointments Over Time')
plt.xlabel('Date')
plt.ylabel('Appointments Count')
plt.show()

ValueError: No objects to concatenate

## Sequence Creation & Normalization

In [None]:
def create_sliding_windows(data, window_size=14):
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data[i:(i + window_size)])
        y.append(data[i + window_size, 0])
    return np.array(X), np.array(y)

# Normalize data
scaler = RobustScaler()
scaled_data = scaler.fit_transform(merged[features].values)

# Create sequences
X, y = create_sliding_windows(scaled_data)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

## Model Architecture

In [None]:
def build_lstm_model(input_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(64, return_sequences=True, input_shape=input_shape),
        tf.keras.layers.LSTM(32),
        tf.keras.layers.Dense(1)
    ])
    return model

model = build_lstm_model((X_train.shape[1], X_train.shape[2]))
model.compile(optimizer='adam', 
              loss=tf.keras.losses.Huber(),
              metrics=['mae'])
model.summary()

## Training & Evaluation

In [None]:
# Callbacks
callbacks = [
    EarlyStopping(patience=10, restore_best_weights=True),
    ModelCheckpoint('best_model.h5', save_best_only=True)
]

# Train model
history = model.fit(X_train, y_train,
                   epochs=100,
                   batch_size=32,
                   validation_split=0.1,
                   callbacks=callbacks)

# Plot training history
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training History')
plt.legend()
plt.show()

# Evaluate
test_loss, test_mae = model.evaluate(X_test, y_test)
print(f"Test MAE: {test_mae:.2f}")