In [1]:
import pandas as pd
import numpy as np
from data_preprocessor import DataPreprocessor
from lstm_model import LSTMForecaster
from random_forest_model import RandomForestForecaster
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import joblib
import os
os.makedirs("results", exist_ok=True)

def main():
    # Initialize components
    

    preprocessor = DataPreprocessor()
    
    # 1. Load and prepare data
    

    df = preprocessor.load_sales_data('data/historical_sales.csv')
    df = preprocessor.add_weather_data(df, 'New York')
    df = preprocessor.add_time_features(df)
    df = preprocessor.add_holidays(df)
    df = preprocessor.preprocess_data(df)
    
    # 2. Prepare features and target
    features = df.drop(['quantity_sold', 'date', 'product_id'], axis=1)
    # Remove non-numeric features
    features = features.select_dtypes(include=[np.number])

    target = df['quantity_sold']
    
    # 3. Train-test split
    X_train, X_test, y_train, y_test = train_test_split(
        features, target, test_size=0.2, shuffle=False
    )
    
    # Option 1: Train LSTM model
    lstm = LSTMForecaster(time_steps=7, n_features=features.shape[1])
    X_train_seq, y_train_seq = lstm.create_sequences(X_train.values, y_train.values)
    lstm.train(X_train_seq, y_train_seq)
    
    # Evaluate LSTM
    X_test_seq, y_test_seq = lstm.create_sequences(X_test.values, y_test.values)
    lstm_preds = lstm.predict(X_test_seq)
    
    # Option 2: Train Random Forest
    rf = RandomForestForecaster()
    rf.train(X_train, y_train)
    rf_preds = rf.predict(X_test)
    
    # 4. Save models
    lstm.model.save('models/lstm_model.h5')
    joblib.dump(rf.model, 'models/random_forest.pkl')
    
    # 5. Visualize results
    import pandas as pd

# Smooth actual values for better visualization
    smoothed_actual = pd.Series(y_test.values).rolling(window=7).mean()

    plt.figure(figsize=(12, 6))
    plt.plot(smoothed_actual, label='Smoothed Actual', color='blue', linewidth=1)
    plt.plot(lstm_preds, label='LSTM Predictions', color='orange', linewidth=1)
    plt.plot(rf_preds, label='RF Predictions', color='green', linewidth=1)
    plt.title("Forecast Comparison")
    plt.xlabel("Time")
    plt.ylabel("Sales (Normalized)")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig('results/forecast_comparison.png')
    plt.close()


if __name__ == "__main__":
    main()




  super().__init__(**kwargs)


Epoch 1/50
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step - loss: 3.6986 - val_loss: 1.9483
Epoch 2/50
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 2.1562 - val_loss: 2.0203
Epoch 3/50
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - loss: 2.0758 - val_loss: 1.9940
Epoch 4/50
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 2.1238 - val_loss: 1.9459
Epoch 5/50
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 2.1615 - val_loss: 1.9450
Epoch 6/50
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 2.0648 - val_loss: 1.9427
Epoch 7/50
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 2.0093 - val_loss: 1.9465
Epoch 8/50
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 2.0761 - val_loss: 1.9444
Epoch 9/50
[1m100/100[0m [32m━━━━

