In [1]:
# import neccessary library
from general_used_functions import *

### Model Training 

In [2]:
# Load training data
config_data = load_config_file()
stock_list = config_data['stock_dict']
training_stock_df = load_training_data()

# Import required modules
from jumpmodels.sparse_jump import SparseJumpModel
from jumpmodels.preprocess import StandardScalerPD, DataClipperStd

def SJM_training(stock, train_df, n_components=2, max_feats=3, jump_penalty=50.0):
    """Train a Sparse Jump Model for regime identification with proper preprocessing"""
    # Feature processing pipeline
    clipper = DataClipperStd(mul=3.)
    scaler = StandardScalerPD()
    
    # Process features
    features = train_df.drop(columns=['date'])
    features_processed = scaler.fit_transform(clipper.fit_transform(features))

    # Initialize and train SJM with recommended parameters
    model = SparseJumpModel(
        n_components=n_components,
        max_feats=max_feats,
        jump_penalty=jump_penalty,
        max_iter=10000, 
        random_state=42
    )
    
    # Fit model with returns series for regime sorting
    ret_ser = train_df[stock]  # Ensure returns are calculated
    model.fit(features_processed, ret_ser=ret_ser, sort_by="cumret")

    # Get regime labels
    hidden_states = model.labels_

    # Build states DataFrame
    states = pd.DataFrame({
        'Date': train_df['date'],
        stock: train_df[stock],
        'states': hidden_states
    })

    # Visualize results
    plot_market_regime(states, stock)
    
    return model, states

# Example usage
stock = 'GOOGL'
target_columns = config_data['selected_features_dict'][stock]
train_df = training_stock_df[stock][target_columns]

# Train with recommended parameters
model, states = SJM_training(
    stock, 
    train_df,
    n_components=2,  # Reduced from 5 components (search results show better performance with 2)
    max_feats=3,
    jump_penalty=50.0
)


ModuleNotFoundError: No module named 'jumpmodels'

In [None]:
# # Save the model and states
# save_HMM_states_excel(stock, states)
# save_HMM_model(stock, model)

In [None]:
# def SJM_testing(stock, test_df, model):
#     """Test a trained SJM model on new data"""
#     # Exclude the date column when testing the model
#     features = test_df.drop(columns=['date'])
    
#     # Predict hidden states using the trained model
#     hidden_states = model.predict_hidden_states(features)

#     # Build the states DataFrame
#     states = pd.DataFrame({
#         'Date': test_df['date'],
#         stock: test_df[stock],
#         'states': hidden_states
#     })

#     # Visualize the results
#     plot_market_regime(states, stock, True)

#     return states


# # Load testing data
# config_data = load_config_file()
# stock_list = config_data['stock_dict']
# testing_stock_df = load_testing_data()
# target_columns = config_data['selected_features_dict'][stock]
# test_df = testing_stock_df[stock][target_columns]

# model = joblib.load(f'model/{stock}_HMM_model.joblib')
# states = HMM_testing(stock, test_df, model)
