In [None]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
# Define the features and target variable for the decision tree, NOTICE HOW WE REMOVE GROWTH RATE
features = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Norm return']
X = df[features]
Y = df['Action']

# Split the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Initialize and train the decision tree classifier
clf = DecisionTreeClassifier()
clf.fit(X_train, Y_train)

# Make predictions on the test set
Y_pred = clf.predict(X_test)

dec_tree_confusion = confusion_matrix(Y_test, Y_pred)
dec_tree_report = classification_report(Y_test, Y_pred)
# Evaluate the model
print("Confusion Matrix: ")
print(dec_tree_confusion)
print("\nReport: ")
print(dec_tree_report)

In [None]:
# Define the Model_invest function to simulate investment decisions based on predictions
def Model_invest(predictions, investment):
    ivs = [investment]  # Initialize a list to store investment values over time
    left = [0]  # Initialize a list to store leftover values over time

    # Iterate through predictions starting from the second element
    for k, p in enumerate(predictions[1:]):
        growth = df['Growth rate'][k] # Get the growth rate for the current step
        current_investment = ivs[-1] + (ivs[-1] * growth)  # Calculate the current investment value
        current_left = left[-1]  # Get the current leftover value

        if p == 1:  # If the prediction is to invest
            if current_left == 0:
              pass
            new_investment = current_investment + current_left  # Add leftover to investment
            ivs.append(new_investment)  # Update the investment list
            left.append(0)  # Reset the leftover list

        elif p == -1:  # If the prediction is to stop loss
            new_left = current_investment*0.25 + left[-1]  # Add current investment to leftover
            new_investment = current_investment*0.75 # Change the investment to represent the sell

            ivs.append(new_investment)  # Update the investment list
            left.append(new_left)  # Update the leftover list

        else:  # If the prediction is to Hold
            ivs.append(current_investment)  # Keep the investment unchanged
            left.append(current_left)  # Keep the leftover unchanged

    # Return a list of tuples with investment and leftover values
    return list(zip(ivs, left))

In [None]:
print("###########################################################")
print("Decision tree")
print(f"Confusion Matrix:\n{dec_tree_confusion}")
print(f"Report:\n{dec_tree_report}")
print("\n#########################################################")
print("Random Forest")
print(f"Confusion Matrix:\n{rand_forest_confusion}")
print(f"Report:\n{rand_forest_report}")

In [None]:
# Plotting the series
plt.figure(figsize=(10, 6))
plt.plot(rf_total, label='Random Forest', marker='o')
plt.plot(dt_total, label='Decision Tree', marker='x')
plt.xlabel('Time')
plt.ylabel('Total Investment Value')
plt.title('Investment Value Over Time: Random Forest vs Decision Tree')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
feature_importances = rf_clf.feature_importances_

# Create a DataFrame to store feature names and their corresponding importances
importance_df = pd.DataFrame({
    'Feature': features,
    'Importance': feature_importances
})

# Sort the DataFrame by the 'Importance' column in descending order
importance_df = importance_df.sort_values(by='Importance', ascending=False)

# Plot the feature importances
plt.figure(figsize=(10, 6))
plt.barh(importance_df['Feature'], importance_df['Importance'])
plt.xlabel('Importance')
plt.ylabel('Feature')
plt.title('Feature Importances from Random Forest')
plt.gca().invert_yaxis()  # Invert y-axis to have the most important feature on top
plt.show()

In [None]:
# Define your features and target
#Remeber to ditch the target variable and the one closely related to that Growth Rate
features = robust_df.columns.difference(['Growth rate', 'Action'])
X = robust_df[features]
Y = robust_df['Action']

# Split the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Train the random forest model
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
rf_clf.fit(X_train, Y_train)

# Make predictions on the test set
Y_pred = rf_clf.predict(X_test)

In [None]:
feature_importances = rf_clf.feature_importances_

# Create a DataFrame to store feature names and their corresponding importances
importance_df = pd.DataFrame({
    'Feature': features,
    'Importance': feature_importances
})

# Sort the DataFrame by the 'Importance' column in descending order
importance_df = importance_df.sort_values(by='Importance', ascending=False)

# Plot the feature importances
plt.figure(figsize=(10, 6))
plt.barh(importance_df['Feature'], importance_df['Importance'])
plt.xlabel('Importance')
plt.ylabel('Feature')
plt.title('Feature Importances from Random Forest')
plt.gca().invert_yaxis()  # Invert y-axis to have the most important feature on top
plt.show()

In [None]:
# Call the Model_invest function with predictions and initial investment
investment_data = Model_invest(Y_pred, 10000)
# Create a DataFrame from the investment data
investment_df = pd.DataFrame(investment_data, columns=['Investment', 'Left'])
# Calculate the total investment by summing 'Investment' and 'Left' columns
investment_df["Total"] = investment_df["Investment"] + investment_df["Left"]
# Calculate the Holding scenario based on normalized returns
investment_df["Hold"] = df.reset_index()["Norm return"] * 10000
investment_df.plot()

In [None]:
rf_total_expanded = investment_df['Total']

# Plotting the series
plt.figure(figsize=(10, 6))
plt.plot(rf_total, label='Random Forest', marker='o')
plt.plot(rf_total_expanded, label='Random Forest Augmented Variables', marker='+')
plt.plot(dt_total, label='Decision Tree', marker='x')
plt.xlabel('Time')
plt.ylabel('Total Investment Value')
plt.title('Investment Value Over Time: Random Forest vs Decision Tree')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Define your features and target
# Remeber to ditch the target variable and the one closely related to that Growth Rate
features = filtered_features
X = robust_df[features]
Y = robust_df['Action']

# Split the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=24)

# Train the random forest model
rf_clf = RandomForestClassifier(random_state=40)
rf_clf.fit(X_train, Y_train)

# Make predictions on the test set
Y_pred = rf_clf.predict(X_test)

In [None]:
# Call the Model_invest function with predictions and initial investment
investment_data = Model_invest(Y_pred, 10000)
# Create a DataFrame from the investment data
investment_df = pd.DataFrame(investment_data, columns=['Investment', 'Left'])
# Calculate the total investment by summing 'Investment' and 'Left' columns
investment_df["Total"] = investment_df["Investment"] + investment_df["Left"]
# Calculate the Holding scenario based on normalized returns
investment_df["Hold"] = df.reset_index()["Norm return"] * 10000
investment_df.plot()

In [None]:
rf_total_filtered = investment_df['Total']

# Plotting the series
plt.figure(figsize=(10, 6))
plt.plot(rf_total, label='Random Forest', marker='o')
plt.plot(rf_total_expanded, label='Random Forest Tree Augmented Variables', marker='+')
plt.plot(rf_total_filtered, label='Random Forest Filtered Variables', marker='*')
plt.plot(dt_total, label='Decision Tree', marker='x')
plt.xlabel('Time')
plt.ylabel('Total Investment Value')
plt.title('Investment Value Over Time: Random Forest vs Decision Tree')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
import pandas as pd
import yfinance as yf
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
import warnings
# Silence all warnings
warnings.filterwarnings('ignore')

class InvestmentModel:
    def __init__(self, assets, portfolio, start_date, end_date, initial_investment=1000, buy_threshold=0.001, sell_threshold=0.002, sell_percentage=0.25, rs = 42):
        self.assets = assets
        self.rs = rs
        self.portfolio = portfolio
        self.start_date = start_date
        self.end_date = end_date
        self.initial_investment = initial_investment
        self.buy_threshold = buy_threshold
        self.sell_threshold = sell_threshold
        self.sell_percentage = sell_percentage
        self.result_df = None
        self.investment_df = None
        self.rf_total = None
        self.df_hold = None

    def fetch_and_process_data(self):
        dfs = []
        for asset in self.assets:
            data = yf.download(tickers=asset, start=self.start_date, end=self.end_date, progress=False)
            data['Norm return'] = data['Adj Close'] / data.iloc[0]['Adj Close']
            data['Growth rate'] = data['Adj Close'].pct_change()
            data['Volume_Dif'] = data['Volume'].diff() / data['Volume']
            data['V_MA'] = data['Volume'].rolling(window=3).mean()
            data['Price_Rstd'] = data['Close'].rolling(window=3).std()
            data['Price_MA'] = data['Close'].rolling(window=3).mean()
            data['Price_Rvariance'] = data['Close'].rolling(window=3).var()
            data['Change_Rstd'] = data['Growth rate'].rolling(window=3).std()
            data['Change_MA'] = data['Growth rate'].rolling(window=3).mean()
            data['Change_Rvariance'] = data['Growth rate'].rolling(window=3).var()
            data['Last_Rstd'] = data['Adj Close'].rolling(window=3).std()
            data['Last_MA'] = data['Adj Close'].rolling(window=3).mean()
            data['Last_Rvariance'] = data['Adj Close'].rolling(window=3).var()
            data['V_Rvariance'] = data['Volume'].rolling(window=3).var()
            data['V_Rstd'] = data['Volume'].rolling(window=3).std()
            data['Volume_MA3'] = data['Volume'].rolling(window=3).mean()
            data['PCh_MA3'] = data['Growth rate'].rolling(window=3).mean()
            data = data.dropna()
            data.columns = [f"{col} {asset}" for col in data.columns]
            dfs.append(data)
        self.result_df = pd.concat(dfs, axis=1)

    @staticmethod
    def ternary_to_decimal(ternary_str):
        return int(ternary_str, 3)

    def label_action(self, row, growth_rate_columns):
        actions = []
        for col in growth_rate_columns:
            if row[col] > self.buy_threshold:
                actions.append(2)
            elif row[col] < -self.sell_threshold:
                actions.append(0)
            else:
                actions.append(1)
        ternary_str = ''.join(map(str, actions))
        return self.ternary_to_decimal(ternary_str)

    @staticmethod
    def decimal_to_ternary_array(decimal, length):
        ternary_str = ''
        while decimal:
            ternary_str = str(decimal % 3) + ternary_str
            decimal //= 3
        ternary_array = [0] * (length - len(ternary_str)) + [int(d) for d in ternary_str]
        return ternary_array

    def Model_invest(self, predictions):
        num_assets = len(self.portfolio)
        ivs = [[self.initial_investment * weight for weight in self.portfolio]]
        left = [0]
        asset_values = [[self.initial_investment * weight] for weight in self.portfolio]

        for k, p in enumerate(predictions[1:], start=1):
            ternary_array = self.decimal_to_ternary_array(p, num_assets)
            growth_rates = [self.result_df[f'Growth rate {asset}'][k] for asset in self.assets]
            current_investments = [inv + (inv * growth) for inv, growth in zip(ivs[-1], growth_rates)]
            current_left = left[-1]
            new_investments = []
            new_left = current_left
            buy_count = ternary_array.count(2)

            for idx, action in enumerate(ternary_array):
                if action == 2:
                    buy_amount = new_left / buy_count if buy_count else 0
                    new_investments.append(current_investments[idx] + buy_amount)
                    new_left -= buy_amount
                elif action == 0:
                    sell_amount = current_investments[idx] * self.sell_percentage
                    new_investments.append(current_investments[idx] * (1 - self.sell_percentage))
                    new_left += sell_amount
                else:
                    new_investments.append(current_investments[idx])

            ivs.append(new_investments)
            left.append(new_left)
            for idx, value in enumerate(new_investments):
                asset_values[idx].append(value)

        total_investments = [sum(inv) for inv in ivs]
        return total_investments, left, asset_values

    def run_model(self):
        self.fetch_and_process_data()
        growth_rate_columns = [col for col in self.result_df.columns if 'Growth rate' in col]
        self.result_df['Action_Code'] = self.result_df.apply(lambda row: self.label_action(row, growth_rate_columns), axis=1)
        features = self.result_df.columns.difference(['Growth rate', 'Action_Code'])
        features = [col for col in features if 'Growth rate' not in col]
        X = self.result_df[features]
        Y = self.result_df['Action_Code']
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=24)
        rf_clf = RandomForestClassifier(random_state=self.rs)
        rf_clf.fit(X_train, Y_train)
        Y_pred = rf_clf.predict(X_test)
        total_investments, left, asset_values = self.Model_invest(Y_pred)
        investment_df = pd.DataFrame({'Total Investments': total_investments, 'Left': left})
        for idx, asset in enumerate(self.assets):
            investment_df[f'Asset {asset}'] = asset_values[idx]
        self.investment_df = investment_df


    def plot_results(self):
        fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(16, 10))

        # Plot portfolio values
        self.investment_df.plot(ax=axes[0])
        axes[0].set_title('Portfolio Values Over Time')
        axes[0].set_xlabel('Time')
        axes[0].set_ylabel('Value')
        axes[0].grid(True)

        # Plot final performance comparison
        self.rf_total = self.investment_df['Total Investments']
        self.df_hold = (self.result_df.reset_index()[[f'Norm return {asset}' for asset in self.assets]] * self.portfolio).sum(axis=1) * self.initial_investment
        num_samples = int(0.2 * len(self.df_hold))
        self.df_hold = self.df_hold.iloc[:num_samples]
        axes[1].plot(self.rf_total, label='Random Forest', marker='o')
        axes[1].plot(self.df_hold, label='Hold Strategy', marker='x')
        axes[1].set_title('Investment Value Over Time: Random Forest vs Hold Strategy')
        axes[1].set_xlabel('Time')
        axes[1].set_ylabel('Total Investment Value')
        axes[1].legend()
        axes[1].grid(True)

        plt.tight_layout()
        plt.show()

    def return_total(self):

        self.df_hold = (self.result_df.reset_index()[[f'Norm return {asset}' for asset in self.assets]] * self.portfolio).sum(axis=1) * self.initial_investment
        num_samples = int(0.2 * len(self.df_hold))
        self.df_hold = self.df_hold.iloc[:num_samples]

        return (self.investment_df['Total Investments'], self.df_hold)

In [None]:
assets = ['TSLA', 'GOOG', 'IBM']
portfolio = [0.35, 0.45, 0.2]
start_date = "2019-07-07"
end_date = "2024-07-07"
initial_investment = 1000
buy_threshold=0.001
sell_threshold=0.002
sell_percentage=0.25

model = InvestmentModel(assets, portfolio, start_date, end_date, initial_investment, buy_threshold, sell_threshold, sell_percentage)
model.run_model()
model.plot_results()
total_investments = model.return_total()
