In [None]:
# Load the FTSE-JSE All Share Index dataset
jse_index_data = pd.read_csv("data/FTSE-JSEAllShareIndex.csv", parse_dates=["Date"])

# Select relevant columns and rename 'Close' to 'JSEIndex'
jse_index_data = jse_index_data[['Date', 'Close']].rename(columns={'Close': 'JSEIndex'})

# Merge JSE Index data with filtered data
filtered_data = pd.merge(filtered_data, jse_index_data, on="Date", how="left")

# Handle missing JSE Index values
filtered_data['JSEIndex'].fillna(filtered_data['JSEIndex'].mean(), inplace=True)

# Discretize JSE Index into categories (Low, Medium, High)
filtered_data['JSEIndex'] = pd.qcut(filtered_data['JSEIndex'], q=3, labels=['Low', 'Medium', 'High'])

# Encode the JSE Index categorical variable
le_jse_index = LabelEncoder()
filtered_data['JSEIndex'] = le_jse_index.fit_transform(filtered_data['JSEIndex'])

# Update the list of variables to include JSE Index
variables = ['MarketCondition', 'SectorPerformance', 'SharePerformance', 'ExchangeRate', 'JSEIndex']

# Automatically create time slice columns
for var in variables:
    filtered_data[f'{var}_0'] = filtered_data[var]
    filtered_data[f'{var}_1'] = filtered_data[var].shift(-1)

# Drop rows with NaN values in slice_1 columns
filtered_data.dropna(subset=[f'{var}_1' for var in variables], inplace=True)

# Convert `slice_1` columns to integer type to avoid floating points
for var in variables:
    filtered_data[f'{var}_1'] = filtered_data[f'{var}_1'].astype(int)

# Create the Dynamic Bayesian Network (DBN) with JSE Index and Exchange Rate
def initialize_dbn(include_exchange_rate=True, include_jse_index=True):
    global dbn
    dbn = gum.BayesNet("Dynamic_BDN")

    # Define variables dynamically based on inclusion of external factors
    active_vars = variables[:3]  # Always include MarketCondition, SectorPerformance, and SharePerformance

    if include_exchange_rate:
        active_vars.append('ExchangeRate')
    if include_jse_index:
        active_vars.append('JSEIndex')
    
    # Define variables for time slice 0 and 1 dynamically
    for slice_num in [0, 1]:
        for var in active_vars:
            num_states = len(filtered_data[f"{var}_{slice_num}"].unique())
            dbn.add(gum.LabelizedVariable(f"{var}_{slice_num}", f"{var} at time slice {slice_num}", num_states))

    # Define intra-slice arcs for slice_0
    dbn.addArc("MarketCondition_0", "SectorPerformance_0")
    dbn.addArc("SectorPerformance_0", "SharePerformance_0")

    if 'ExchangeRate' in active_vars:
        dbn.addArc("ExchangeRate_0", "MarketCondition_0")

    if 'JSEIndex' in active_vars:
        dbn.addArc("JSEIndex_0", "MarketCondition_0")

    # Temporal arcs connecting slice 0 to slice 1
    for var in active_vars:
        dbn.addArc(f"{var}_0", f"{var}_1")

# Initialize with both Exchange Rate and JSE Index
initialize_dbn(include_exchange_rate=True, include_jse_index=True)

# Train the DBN and display the structure
learned_dbn_with_all_factors = learn_parameters_from_data(filtered_data, dbn, variables)
gnb.showBN(learned_dbn_with_all_factors)

# Similar steps for making predictions with different combinations of external factors...
