In [None]:
import pandas as pd
from scripts.data_processing import clean_and_prepare_data
from scripts.sentiment_analysis import perform_sentiment_analysis
from scripts.sentiment_transformation import transform_sentiment_scores
from scripts.feature_engineering import calculate_hourly_sentiment
from scripts.process_xauusd_data import process_xauusd_data
from scripts.merge_xauusd_and_sentiment import merge_xauusd_and_sentiment
from scripts.process_crude_oil_data import process_crude_oil_data
from scripts.process_vix_data import process_vix_data
from scripts.process_sp500_data import process_sp500_data
from scripts.merge_all_data import merge_datasets
from scripts.xgboost_model import train_and_evaluate_xgboost_model

def main():
    # Step 1: Data Cleaning and Preparation
    print("Step 1: Cleaning and preparing data...")
    clean_and_prepare_data(input_folder="data/json_files", output_file="data/cleaned_data.xlsx")

    # Step 2: Sentiment Analysis
    print("Step 2: Performing sentiment analysis...")
    perform_sentiment_analysis(input_file="data/cleaned_data.xlsx", output_file="data/sentiment_scores.xlsx")

    # Step 3: Transform Sentiment Scores
    print("Step 3: Transforming sentiment scores...")
    transform_sentiment_scores(input_file="data/sentiment_scores.xlsx", output_file="data/single_sentiment_score_file.xlsx", start_date='2020-07-01', end_date='2023-12-31')

    # Step 4: Calculate Hourly Sentiment
    print("Step 4: Calculating hourly sentiment...")
    calculate_hourly_sentiment(input_file="data/single_sentiment_score_file.xlsx", output_file="data/hourly_sentiment.xlsx")

    # Step 5: Process XAU/USD Data
    print("Step 5: Processing XAU/USD data...")
    process_xauusd_data(input_file="data/xauusd_hourly.xlsx", output_file="data/processed_hourly_xauusd_data.xlsx")

    # Step 6: Merge XAU/USD Data and Sentiment
    print("Step 6: Merging XAU/USD data with sentiment data...")
    merge_xauusd_and_sentiment(price_file="data/processed_hourly_xauusd_data.xlsx", sentiment_file="data/hourly_sentiment.xlsx", output_file="data/merged_data.xlsx")

    # Step 7: Process Crude Oil Data
    print("Step 7: Processing crude oil data...")
    process_crude_oil_data(input_file="data/crudeOil.csv", output_file="data/crudeOil.xlsx", start_date='2020-07-01', end_date='2023-12-31')

    # Step 8: Process VIX Data
    print("Step 8: Processing VIX data...")
    process_vix_data(input_file="data/vix-1h.csv", output_file="data/vix.xlsx", start_date='2020-07-01', end_date='2023-12-31')

    # Step 9: Process S&P 500 Data
    print("Step 9: Processing S&P 500 data...")
    process_sp500_data(input_file="data/es-1h.csv", output_file="data/S&P500_hourly.xlsx", start_date='2020-07-01', end_date='2023-12-31')

    # Step 10: Merge all datasets
    print("Step 10: Merging all datasets...")
    merge_datasets(crude_file="data/crudeOil.xlsx", xau_usd_file="data/merged_data.xlsx", vix_file="data/vix.xlsx", sp_file="data/S&P500_hourly.xlsx", output_file="data/final_merged_data.xlsx")

    # Step 11: Train and Evaluate XGBoost Model
    print("Step 11: Training and evaluating the XGBoost model...")
    final_merged_data = pd.read_excel("data/final_merged_data.xlsx")
    output_model_file = "models/xgboost_model.json"
    train_and_evaluate_xgboost_model(final_merged_data, output_model_file)

if __name__ == "__main__":
    main()
