# DO NOT RERUN THIS FILE

In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Paths to the prepped data files
base_path = './data/prepped_data/'  # Adjusted base path to where your files are now
X_train_path = base_path + 'X_train.csv'
X_test_path = base_path + 'X_test.csv'
y_train_path = base_path + 'y_train.csv'
y_test_path = base_path + 'y_test.csv'

# Loading the datasets
X_train = pd.read_csv(X_train_path)
X_test = pd.read_csv(X_test_path)
y_train = pd.read_csv(y_train_path)
y_test = pd.read_csv(y_test_path)

# Assuming the last columns of X_train and X_test are year, month, day, hour for reconstructing the datetime index
X_train['datetime'] = pd.to_datetime(X_train[['year', 'month', 'day', 'hour']])
X_test['datetime'] = pd.to_datetime(X_test[['year', 'month', 'day', 'hour']])
y_train.index = pd.to_datetime(X_train['datetime'])
y_test.index = pd.to_datetime(X_test['datetime'])

# Exclude non-numerical columns before scaling
columns_to_scale = X_train.columns.difference(['year', 'month', 'day', 'hour', 'datetime'])

# Initialize the scaler for the features
scaler_X = MinMaxScaler()

# Scale 'X' features (excluding non-numerical columns)
X_train_scaled = scaler_X.fit_transform(X_train[columns_to_scale])
X_test_scaled = scaler_X.transform(X_test[columns_to_scale])

# Initialize a separate scaler for the target variable
scaler_y = MinMaxScaler()

# Scale 'y' (the target variable)
y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1)).flatten()
y_test_scaled = scaler_y.transform(y_test.values.reshape(-1, 1)).flatten()


In [2]:
import pandas as pd

# List of column names from columns_to_scale for X features
feature_names = list(columns_to_scale)

# Add a placeholder for the scaled target variables
feature_names.append("Scaled Target Variable (y)")

# Create a DataFrame for displaying in a table format
table_data = [{"Feature Name": name} for name in feature_names]

# Adding description for y_train_scaled and y_test_scaled as they do not have feature names
table_data[-1]["Description"] = "Represents the scaled target variable"

# Display the table
table_df = pd.DataFrame(table_data)
print(table_df)


                                         Feature Name  \
0                      central_bank_policy_rate_prcnt   
1                            classification_numerical   
2                                         cpi_monthly   
3                                           dia_close   
4                                          dia_volume   
5                                 domestic_credit_gdp   
6                                    durables_monthly   
7                            federal_funds_rate_daily   
8                     foreign_direct_investment_value   
9                             gdp_growth_annual_prcnt   
10                                          gld_close   
11                                         gld_volume   
12  individuals_using_the_internet_prcnt_of_popula...   
13                               inflation_rate_value   
14                                               macd   
15       mobile_cellular_subscriptions_per_100_people   
16                            n

In [3]:
import numpy as np
import random

def select_random_features(X_scaled, feature_names, num_features):
    """
    Select a random subset of features from the scaled data, limited to the first 5 rows.
    
    Args:
    - X_scaled: numpy array of scaled features.
    - feature_names: list of all feature names.
    - num_features: number of features to select.
    
    Returns:
    - X_subset: subset of the scaled features limited to the first 5 rows.
    - selected_feature_names: names of the selected features.
    """
    # Ensure num_features does not exceed the number of available features
    num_features = min(num_features, len(feature_names))
    
    # Generate random indices for feature selection
    selected_indices = random.sample(range(len(feature_names)), num_features)
    
    # Select the features based on the random indices and limit to the first 5 rows
    X_subset = X_scaled[:30, selected_indices]
    
    # Extract the names of the selected features
    selected_feature_names = [feature_names[i] for i in selected_indices]
    
    return X_subset, selected_feature_names, selected_indices

# When using the function, also capture the indices
num_features_to_select = 4
X_subset, selected_feature_names, feature_indices = select_random_features(X_train_scaled, columns_to_scale, num_features_to_select)
print("Selected features:", selected_feature_names)
print(feature_indices)


Selected features: ['gld_close', 'spy_close', 'spy_volume', 'trade_balance_value']
[10, 24, 25, 29]


In [4]:
X_subset

array([[0.00000000e+00, 6.41738531e-02, 6.51443418e-05, 1.00000000e+00],
       [0.00000000e+00, 6.41738531e-02, 6.51443418e-05, 1.00000000e+00],
       [0.00000000e+00, 6.41738531e-02, 6.51443418e-05, 1.00000000e+00],
       [0.00000000e+00, 6.41738531e-02, 6.51443418e-05, 1.00000000e+00],
       [0.00000000e+00, 6.41738531e-02, 6.51443418e-05, 1.00000000e+00],
       [0.00000000e+00, 6.41738531e-02, 6.51443418e-05, 1.00000000e+00],
       [0.00000000e+00, 6.41738531e-02, 6.51443418e-05, 1.00000000e+00],
       [0.00000000e+00, 6.41738531e-02, 6.51443418e-05, 1.00000000e+00],
       [0.00000000e+00, 6.41738531e-02, 6.51443418e-05, 1.00000000e+00],
       [0.00000000e+00, 6.41738531e-02, 6.51443418e-05, 1.00000000e+00],
       [0.00000000e+00, 6.41738531e-02, 6.51443418e-05, 1.00000000e+00],
       [0.00000000e+00, 6.41738531e-02, 6.51443418e-05, 1.00000000e+00],
       [0.00000000e+00, 6.41738531e-02, 6.51443418e-05, 1.00000000e+00],
       [0.00000000e+00, 6.41738531e-02, 6.51443418e

In [5]:
def generate_dynamic_fuzzy_rules(selected_feature_names, num_clusters=3):
    """
    Generates fuzzy rules dynamically based on the selected features.
    
    Args:
    - selected_feature_names: List of names of the selected features.
    - num_clusters: Number of clusters used for each feature.
    
    Returns:
    - List of dynamically generated fuzzy rules.
    """
    rules = []
    
    # Define a template for the rules you want to generate
    rule_template = "IF ({feature_name} IS cluster_{cluster_number}) THEN (PricePrediction IS PricePrediction)"
    
    # Dynamically generate rules based on selected features
    for feature_name in selected_feature_names:
        for cluster_number in range(1, num_clusters + 1):
            # Construct each rule based on the template
            rule = rule_template.format(feature_name=feature_name, cluster_number=cluster_number)
            rules.append(rule)
    
    return rules

# Example usage
# Assuming 'selected_feature_names' contains the features selected randomly
# And 'num_clusters' is set to the number of clusters you used
dynamic_rules = generate_dynamic_fuzzy_rules(selected_feature_names, num_clusters=3)

# Example logging to verify the generated rules
print("Dynamically generated fuzzy rules:")
for rule in dynamic_rules:
    print(rule)


Dynamically generated fuzzy rules:
IF (gld_close IS cluster_1) THEN (PricePrediction IS PricePrediction)
IF (gld_close IS cluster_2) THEN (PricePrediction IS PricePrediction)
IF (gld_close IS cluster_3) THEN (PricePrediction IS PricePrediction)
IF (spy_close IS cluster_1) THEN (PricePrediction IS PricePrediction)
IF (spy_close IS cluster_2) THEN (PricePrediction IS PricePrediction)
IF (spy_close IS cluster_3) THEN (PricePrediction IS PricePrediction)
IF (spy_volume IS cluster_1) THEN (PricePrediction IS PricePrediction)
IF (spy_volume IS cluster_2) THEN (PricePrediction IS PricePrediction)
IF (spy_volume IS cluster_3) THEN (PricePrediction IS PricePrediction)
IF (trade_balance_value IS cluster_1) THEN (PricePrediction IS PricePrediction)
IF (trade_balance_value IS cluster_2) THEN (PricePrediction IS PricePrediction)
IF (trade_balance_value IS cluster_3) THEN (PricePrediction IS PricePrediction)


In [6]:
import numpy as np
from simpful import *
from sklearn.cluster import KMeans

num_clusters = 3

print("Using feature indices:", list(feature_indices))

# Now, use 'feature_indices' for KMeans clustering
kmeans = KMeans(n_clusters=num_clusters, random_state=0).fit(X_train_scaled[:, list(feature_indices)])
centers = kmeans.cluster_centers_

print("KMeans centers calculated.")

# Initialize the Fuzzy Inference System
FS = FuzzySystem()

# Correctly adding linguistic variables with their fuzzy sets
for i, feature_name in enumerate(selected_feature_names):
    fuzzy_sets = []
    for cluster_index, center in enumerate(centers[:, i]):
        # Create a Gaussian fuzzy set for each cluster center
        fs = GaussianFuzzySet(mu=center, sigma=0.1, term=f"cluster_{cluster_index+1}")
        fuzzy_sets.append(fs)
    # Create and add the linguistic variable with its fuzzy sets
    LV = LinguisticVariable(fuzzy_sets, concept=feature_name)
    FS.add_linguistic_variable(feature_name, LV)

print("Linguistic variables and fuzzy sets added.")

# Define an output variable with a dummy fuzzy set
dummy_fs = GaussianFuzzySet(mu=0, sigma=1, term="dummy")
dummy_lv = LinguisticVariable([dummy_fs], concept="dummy_output")
FS.add_linguistic_variable("PricePrediction", dummy_lv)

print("Output variable defined.")

# Define the output function for the system (ensure it matches your model's logic)
FS.set_output_function("PricePrediction", " + ".join([f"0.25*{name}" for name in selected_feature_names]))

print("Output function set.")

# Now you can add these dynamically generated rules to your Fuzzy System
FS.add_rules(dynamic_rules)

print("Fuzzy rules added:", dynamic_rules)

# Function to make predictions with the FIS
def predict_with_fis(FS, input_features):
    print(f"Predicting with input features: {input_features}")
    for feature_name, value in zip(selected_feature_names, input_features):
        print(f"Setting variable {feature_name} to {value}")
        FS.set_variable(feature_name, value)
    result = FS.inference(["PricePrediction"])
    print(f"Prediction result: {result}")
    return result["PricePrediction"]

# Example usage
print("First 5 rows of X_test_scaled:")
print(X_test_scaled[:5])

input_features = X_test_scaled[0, list(feature_indices)]  # Adjust indices accordingly
prediction = predict_with_fis(FS, input_features)
print("Fuzzy prediction:", prediction)


Using feature indices: [10, 24, 25, 29]
KMeans centers calculated.
  ____  __  _  _  ____  ____  _  _  __   
 / ___)(  )( \/ )(  _ \(  __)/ )( \(  ) v2.12.0 
 \___ \ )( / \/ \ ) __/ ) _) ) \/ (/ (_/\ 
 (____/(__)\_)(_/(__)  (__)  \____/\____/

 https://github.com/aresio/simpful

Linguistic variables and fuzzy sets added.
Output variable defined.
 * Detected Sugeno model type
Output function set.
Fuzzy rules added: ['IF (gld_close IS cluster_1) THEN (PricePrediction IS PricePrediction)', 'IF (gld_close IS cluster_2) THEN (PricePrediction IS PricePrediction)', 'IF (gld_close IS cluster_3) THEN (PricePrediction IS PricePrediction)', 'IF (spy_close IS cluster_1) THEN (PricePrediction IS PricePrediction)', 'IF (spy_close IS cluster_2) THEN (PricePrediction IS PricePrediction)', 'IF (spy_close IS cluster_3) THEN (PricePrediction IS PricePrediction)', 'IF (spy_volume IS cluster_1) THEN (PricePrediction IS PricePrediction)', 'IF (spy_volume IS cluster_2) THEN (PricePrediction IS PricePredictio

# Pilot with get_firing_strengths

In [9]:
import numpy as np
from simpful import *
from sklearn.cluster import KMeans

num_clusters = 3

print("Using feature indices:", list(feature_indices))

# Now, use 'feature_indices' for KMeans clustering
kmeans = KMeans(n_clusters=num_clusters, random_state=0).fit(X_train_scaled[:, list(feature_indices)])
centers = kmeans.cluster_centers_
print("Centers:",centers)

print("KMeans centers calculated.")

# Initialize the Fuzzy Inference System
FS = FuzzySystem()

# Correctly adding linguistic variables with their fuzzy sets
for i, feature_name in enumerate(selected_feature_names):
    fuzzy_sets = []
    for cluster_index, center in enumerate(centers[:, i]):
        # Create a Gaussian fuzzy set for each cluster center
        fs = GaussianFuzzySet(mu=center, sigma=0.1, term=f"cluster_{cluster_index+1}")
        fuzzy_sets.append(fs)
    # Create and add the linguistic variable with its fuzzy sets
    LV = LinguisticVariable(fuzzy_sets, concept=feature_name)
    FS.add_linguistic_variable(feature_name, LV)

print("Linguistic variables and fuzzy sets added.")

# Define an output variable with a dummy fuzzy set
dummy_fs = GaussianFuzzySet(mu=0, sigma=1, term="dummy")
dummy_lv = LinguisticVariable([dummy_fs], concept="dummy_output")
FS.add_linguistic_variable("PricePrediction", dummy_lv)

print("Output variable defined.")

# Define the output function for the system (ensure it matches your model's logic)
FS.set_output_function("PricePrediction", " + ".join([f"0.25*{name}" for name in selected_feature_names]))

print("Output function set.")

# Now you can add these dynamically generated rules to your Fuzzy System
FS.add_rules(dynamic_rules)

print("Fuzzy rules added:", dynamic_rules)

# Function to make predictions with the FIS
def predict_with_fis(FS, input_features):
    print(f"Predicting with input features: {input_features}")
    input_values_dict = {}
    for feature_name, value in zip(selected_feature_names, input_features):
        print(f"Setting variable {feature_name} to {value}")
        FS.set_variable(feature_name, value)
        input_values_dict[feature_name] = [value]
    result = FS.inference(["PricePrediction"])
    # Calculate firing strengths
    firing_strengths = FS.get_firing_strengths(input_values=input_values_dict)
    print(f"Firing strengths: {firing_strengths}")
    print(f"Prediction result: {result}")
    return result["PricePrediction"]


# Example usage
print("First 5 rows of X_test_scaled:")
print(X_test_scaled[:5])

input_features = X_test_scaled[0, list(feature_indices)]  # Adjust indices accordingly
prediction = predict_with_fis(FS, input_features)
print("Fuzzy prediction:", prediction)


Using feature indices: [10, 24, 25, 29]
Centers: [[6.20574500e-01 4.59706135e-01 2.40951266e-02 6.92164760e-01]
 [7.42012127e-01 8.27708605e-01 2.24187039e-02 3.95239397e-14]
 [2.06487920e-01 2.17997402e-01 2.61895084e-02 8.62469513e-01]]
KMeans centers calculated.
  ____  __  _  _  ____  ____  _  _  __   
 / ___)(  )( \/ )(  _ \(  __)/ )( \(  ) v2.12.0 
 \___ \ )( / \/ \ ) __/ ) _) ) \/ (/ (_/\ 
 (____/(__)\_)(_/(__)  (__)  \____/\____/

 https://github.com/aresio/simpful

Linguistic variables and fuzzy sets added.
Output variable defined.
 * Detected Sugeno model type
Output function set.
Fuzzy rules added: ['IF (gld_close IS cluster_1) THEN (PricePrediction IS PricePrediction)', 'IF (gld_close IS cluster_2) THEN (PricePrediction IS PricePrediction)', 'IF (gld_close IS cluster_3) THEN (PricePrediction IS PricePrediction)', 'IF (spy_close IS cluster_1) THEN (PricePrediction IS PricePrediction)', 'IF (spy_close IS cluster_2) THEN (PricePrediction IS PricePrediction)', 'IF (spy_close IS

In [10]:
import numpy as np

x = 0.7389532389532392
mu_center = 0.6205745
sigma = 0.1

membership_value = np.exp(-((x - mu_center) ** 2) / (2 * sigma ** 2))
print("Membership value for gld_close in cluster_1:", membership_value)


Membership value for gld_close in cluster_1: 0.49624958075466924


In [11]:
gld_close = 0.7389532389532392
spy_close = 0.7252572156969862
spy_volume = 0.0005847548080539313
trade_balance_value = 0.0

PricePrediction = 0.25 * gld_close + 0.25 * spy_close + 0.25 * spy_volume + 0.25 * trade_balance_value

print("Fuzzy prediction:", PricePrediction)


Fuzzy prediction: 0.36619880236456986
