In [None]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from optStrat import OptStrat



df = pd.read_csv('/Users/hatim/Desktop/Applied Forecasting/Final Project/Algo Trading/Data.csv', index_col=0, parse_dates=True)
df

# Finading the optimal trading strategy in Hindsight
- 1 will be buy
- -1 will be sell
- 0 will be hold

The goal is to maximize return while considering trasaction costs. We ignore the volatility adjustment here because it then becomes a non-convex optimization problem that is computationally infeasible. Furthermore, in hindsight the problem is deterministic and the concept of risk is incoherent so we ignore it.


## Defining the optimization problem
### Starting assumptions
we will reduce the labels to the following:
- 1 will be in the market (buy all that we can)
- 0 will be out of the market (sell all that we can)
This can then be trasnformed into our original labels easily.

We use dynamic programming for this



In [None]:
strat=OptStrat(df=df)

In [None]:
strat.df['Signals'].value_counts()

In [None]:
df.drop(columns='Strategy Returns', inplace=True)

# Learning signals and predicting for the future

## Feature Engineering

## Getting Features

In [None]:
from features import FeatureEngineer


engineer = FeatureEngineer(df)
engineer.add_all_features()
X, y = engineer.get_feature_target_split()
X_best, scores, selected_features = engineer.select_best_features(k=25)   

## Cleaning and Preprocessing

### Check Missing Values

In [None]:
X_selected = X[selected_features]



### Standardize Features

In [None]:
def test_train_split(X, y, test_size=0.2):
    assert len(X) == len(y)
    n = len(X)
    test_size = int(n * test_size)
    X_train = X[:-test_size]
    X_test = X[-test_size:]
    y_train = y[:-test_size]
    y_test = y[-test_size:]
    return X_train, X_test, y_train, y_test

X_train, X_test, y_train, y_test = test_train_split(X_selected, y, test_size=0.2)

In [None]:
from sklearn.preprocessing import StandardScaler


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



# Models

## Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt


rf = RandomForestClassifier(
    n_estimators=500, 
    random_state=42
)

rf.fit(X_train_scaled, y_train)

y_pred = rf.predict(X_test_scaled)
y_pred_proba = rf.predict_proba(X_test_scaled)

decisions = pd.DataFrame(y_pred_proba, columns=['Sell', 'Hold', 'Buy'], index=X_test.index)

conf_matrix = confusion_matrix(y_test, y_pred)

# plot the confusion matrix
plt.figure(figsize=(10, 7))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Sell', 'Hold', 'Buy'], yticklabels=['Sell', 'Hold', 'Buy'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

In [None]:
importances = rf.feature_importances_
# plot the feature importances
plt.figure(figsize=(10, 7))
plt.barh(X_train.columns, importances)
plt.xlabel('Feature Importance')
plt.title('Random Forest Feature Importances')
plt.show()

# Simulate Trade

In [None]:
btc_data = df[['Close', 'Risk Free Rate']]
trade_data = pd.merge(decisions, btc_data, left_index=True, right_index=True, how='left')
trade_data.head()



In [None]:
px.histogram(trade_data[['Sell', 'Hold', 'Buy']], barmode='group', title='Probabilities of Predictions').show()

In [None]:
from trader import TradingSimulator

simulator = TradingSimulator(trade_data, initial_capital=100, transaction_cost=0.005)
simulator.simulate(decision_method='highest_prob')
simulator.plot_portfolio_performance()
simulator.plot_performance_metrics()
