<a href="https://colab.research.google.com/github/JaimRM/Portfolio-Management/blob/main/algo1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# Data Acquisition and Preprocessing
import pandas as pd
import yfinance as yf
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# --- Configuration ---
ticker = "CL=F" # WTI Crude Oil Futures
start_date = "2023-01-01"
end_date = "2024-01-01"
prediction_horizon = 1 # Number of days ahead to predict movement

# Fetch historical data
print(f"Fetching data for {ticker} from {start_date} to {end_date}...")
data = yf.download(ticker, start=start_date, end=end_date)

# --- Column Flattening and Cleaning ---
# yfinance can return MultiIndex columns, which can cause issues with direct column access.
# This flattens the columns and ensures 'Close' refers to the 'Adj Close' price.
if isinstance(data.columns, pd.MultiIndex):
    # Drop the ticker level (level 1) if columns are like ('Attribute', 'Ticker')
    # This results in 'Close', 'Adj Close', 'Open', etc.
    data.columns = data.columns.droplevel(1)

# Ensure 'Close' column is available and represents the adjusted close price.
# 'Adj Close' is generally preferred for financial analysis as it's adjusted for splits/dividends.
if 'Adj Close' in data.columns:
    data['Close'] = data['Adj Close']
# Drop 'Adj Close' if it exists as a separate column to avoid ambiguity after copying to 'Close'.
data.drop(columns=['Adj Close'], errors='ignore', inplace=True)

# Calculate VWAP
data['VWAP'] = (data['Close'] * data['Volume']).cumsum() / data['Volume'].cumsum()

# Calculate Cumulative Delta (simplified)
# Assuming a close > open is a 'buy' day and close < open is a 'sell' day for simple delta
data['Delta'] = data['Volume'] * (data['Close'] > data['Open']).astype(int) - \
                data['Volume'] * (data['Close'] < data['Open']).astype(int)
data['CumulativeDelta'] = data['Delta'].cumsum()

# --- Market Profile (Simplified Proxy) ---
# Full Market Profile requires minute data and TPO (Time Price Opportunity) logic.
# Here we use Volatility and Range as proxies for Market Profile components.
def calculate_market_profile_proxy(df, window):
    """Calculates proxies for Market Profile features."""
    # Range is a proxy for the 'Profile' height
    df[f'Range_W{window}'] = (df['High'] - df['Low']).rolling(window=window).mean()

    # Calculate True Range (TR) first
    high_minus_low = df['High'] - df['Low']
    high_minus_prev_close = (df['High'] - df['Close'].shift()).abs()
    low_minus_prev_close = (df['Low'] - df['Close'].shift()).abs()

    # True Range is the maximum of these three, using numpy.maximum for element-wise comparison
    true_range = np.maximum(high_minus_low, np.maximum(high_minus_prev_close, low_minus_prev_close))

    # Average True Range (ATR) is the rolling mean of True Range
    df[f'ATR_W{window}'] = true_range.rolling(window=window).mean()
    return df

# Calculate market profile proxies for different timeframes
data = calculate_market_profile_proxy(data, window=5) # 5-day window (~1 week)
data = calculate_market_profile_proxy(data, window=20) # 20-day window (~1 month)


# --- Feature Engineering and Target Definition ---
# Feature engineering based on VWAP, cumulative delta, and MP proxies
data['VWAP_diff'] = data['Close'] - data['VWAP']
data['Range_diff'] = data['Range_W5'] - data['Range_W20'] # Comparing short-term vs long-term range
data['ATR_W5'] = data['ATR_W5'] # Volatility proxy

# Define the Target Variable (y): Binary Classification (Predicting next day's movement)
# Target = 1 (Price goes up tomorrow) or 0 (Price goes down/stays flat)
data['Target'] = np.where(data['Close'].shift(-prediction_horizon) > data['Close'], 1, 0)

# --- Final Preprocessing ---
# Handle remaining NaN values created by rolling windows and target shifting
data.fillna(method='ffill', inplace=True)
data.dropna(inplace=True) # Drop any remaining NaNs

# --- Machine Learning Model Training ---
print("\nTraining Model...")

# Split data into features (X) and target variable (y)
X = data[['VWAP_diff', 'CumulativeDelta', 'Range_W5', 'Range_W20', 'ATR_W5']]  # Features
y = data['Target']

# Check if we have enough data points
if len(X) < 100:
    print(f"Warning: Only {len(X)} data points available. Need more data for robust training.")
    # Exit or use a smaller test_size

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Choose a suitable machine learning algorithm
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# --- Trading Strategy: Model Prediction and Evaluation ---
print("\nModel Evaluation (Out-of-Sample Test)")
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

def generate_signals(data, model, features):
    """
    Uses the trained model to generate trading signals (1=Buy/Hold, 0=Sell/Avoid).
    The last row of data is used for the current day's prediction.
    """
    # Use the last row for current prediction
    current_features = data[features].iloc[[-1]]

    # Model predicts the movement (1 for up, 0 for down/flat)
    signal = model.predict(current_features)[0]

    if signal == 1:
        return "BUY/LONG: Predicted price increase tomorrow."
    else:
        return "SELL/SHORT/HOLD: Predicted price decrease or flat tomorrow."

# Generate a final signal using the whole dataset's structure
features_list = ['VWAP_diff', 'CumulativeDelta', 'Range_W5', 'Range_W20', 'ATR_W5']
final_signal = generate_signals(data, model, features_list)

print(f"\n--- Final Trading Signal ---")
print(f"Based on the last available data point (as of {data.index[-1].strftime('%Y-%m-%d')}):")
print(final_signal)

#Important Considerations

#Data Quality:** Ensure accurate and reliable data.
#Feature Selection:** Choose features that have strong predictive power.
#Model Evaluation:** Continuously evaluate model performance and retrain as needed.
#Overfitting:** Avoid overfitting by using appropriate regularization techniques.
#Transaction Costs:** Consider transaction costs in your strategy.
#Risk Management:** Implement stop-loss and take-profit orders.

#Additional Enhancements

#Deep Learning:Explore deep learning models for complex pattern recognition.
#Reinforcement Learning:** Train an agent to learn optimal trading actions.
#Ensemble Methods: Combine multiple models for improved performance.
#Hyperparameter Tuning: Optimize model parameters for better results.

#Remember: This is a simplified example. Building a robust trading system requires significant effort and expertise.
#Would you like to focus on a specific part of this process, such as data acquisition, feature engineering, or model selection?**
#Support Vector Machine or LSTM and train it on the prepared data.

  data = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed

Fetching data for CL=F from 2023-01-01 to 2024-01-01...

Training Model...



  data.fillna(method='ffill', inplace=True)



Model Evaluation (Out-of-Sample Test)
              precision    recall  f1-score   support

           0       0.59      0.59      0.59        27
           1       0.42      0.42      0.42        19

    accuracy                           0.52        46
   macro avg       0.51      0.51      0.51        46
weighted avg       0.52      0.52      0.52        46


--- Final Trading Signal ---
Based on the last available data point (as of 2023-12-29):
BUY/LONG: Predicted price increase tomorrow.
