In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from utility.fetch_ import download_stock_data
ticker = ["tatasteel.ns"]
i=0
dd , dm = download_stock_data(ticker=ticker)
daily_data = dd[:]

# Feature Engineering: Create lag features for previous day data
daily_data['Prev_Open'] = daily_data['Open'].shift(1)
daily_data['Prev_Close'] = daily_data['Close'].shift(1)
daily_data['Prev_High'] = daily_data['High'].shift(1)
daily_data['Prev_Low'] = daily_data['Low'].shift(1)
daily_data['Prev_Volume'] = daily_data['Volume'].shift(1)

# Drop rows with NaN values (as they don't have previous day data)
daily_data.dropna(inplace=True)

# Load the historical trade data (for training purposes)
trade_history_df = pd.read_csv(f'./database/{ticker[i]}/trade_history.csv')

# Feature Engineering: Extract useful features from the buy/sell data
trade_history_df['Buy Date'] = pd.to_datetime(trade_history_df['Buy Date'])
trade_history_df['Sell Date'] = pd.to_datetime(trade_history_df['Sell Date'])

# Merge daily data with trade history to incorporate lag and current day's features
trade_history_df = pd.merge(trade_history_df, daily_data, left_on='Buy Date', right_on='Date', how='inner')

# Create additional features based on the lag and current day data
trade_history_df['Price Change'] = trade_history_df['Sell Price'] - trade_history_df['Buy Price']
trade_history_df['Holding Period (Months)'] = (trade_history_df['Sell Date'] - trade_history_df['Buy Date']).dt.days / 30
trade_history_df['Price Change %'] = (trade_history_df['Price Change'] / trade_history_df['Buy Price']) * 100

# Target classification: Based on historical trade data, determine Buy (1), Sell (2), or Hold (0)
def classify_action(row):
    if row['Price Change %'] > 10:  # Condition for a profitable trade to Sell
        return 2  # Sell
    elif row['Price Change %'] < -5:  # Condition for a loss to Buy again (lower price)
        return 1  # Buy
    else:
        return 0  # Hold

trade_history_df['Action'] = trade_history_df.apply(classify_action, axis=1)

# Prepare the feature set and target
X = trade_history_df[['Buy Price', 'Open', 'Close', 'Prev_Open', 'Prev_Close', 'Prev_High', 'Prev_Low', 'Prev_Volume']]
y = trade_history_df['Action']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest Classifier for multi-class classification
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Predict on the test set
y_pred = clf.predict(X_test)



# Calculate the closest buy price
def calculate_closest_buy_price(row):
    low = row['Low']
    volume = row['Volume']
    open_price = row['Open']
    close_price = row['Close']
    
    # If today's low price is greater than the threshold, suggest a new buy price
    if open_price <= low:
        suggested_buy_price = low
    else:
        # Calculate a new price based on volume (example calculation, adjust as needed)
        suggested_buy_price = low + (volume / 100000)  # Example: Adding a small increment based on volume
    
    return suggested_buy_price



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  daily_data['Prev_Open'] = daily_data['Open'].shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  daily_data['Prev_Close'] = daily_data['Close'].shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  daily_data['Prev_High'] = daily_data['High'].shift(1)
A value is trying to be set on a copy of 

In [24]:
# Evaluate the model performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:\n", classification_report(y_test, y_pred))

# Now, let's use the trained model to predict a new action (Buy, Hold, Sell) based on a new day's data

new_trade = pd.DataFrame({
    'Buy Price': [daily_data['Close'][len(daily_data) - 1]],
    'Open': [daily_data['Open'][len(daily_data) - 1]],
    'Close': [daily_data['Close'][len(daily_data) - 1]],
    'Prev_Open': [daily_data['Prev_Open'][len(daily_data) - 1]],
    'Prev_Close': [daily_data['Prev_Close'][len(daily_data) - 1]],
    'Prev_High': [daily_data['Prev_High'][len(daily_data) - 1]],
    'Prev_Low': [daily_data['Prev_Low'][len(daily_data) - 1]],
    'Prev_Volume': [daily_data['Prev_Volume'][len(daily_data) - 1]]
})
# Predict action (0: Hold, 1: Buy, 2: Sell)
predicted_action = clf.predict(new_trade)
action_dict = {0: 'Hold', 1: 'Buy', 2: 'Sell'}
print("Predicted Action:", action_dict[predicted_action[0]])


Model Accuracy: 100.00%
Classification Report:
               precision    recall  f1-score   support

           2       1.00      1.00      1.00         4

    accuracy                           1.00         4
   macro avg       1.00      1.00      1.00         4
weighted avg       1.00      1.00      1.00         4

Predicted Action: Sell


In [26]:
# Apply the function to daily data to get suggested buy prices
daily_data['Suggested_Buy_Price'] = daily_data.apply(calculate_closest_buy_price, axis=1)

# Display suggested buy prices for today
today_data = daily_data[daily_data['Date'] == pd.to_datetime('today')]
print("Suggested Buy Prices for Today:\n", today_data[['Date', 'Open', 'Close', 'Low', 'High', 'Volume', 'Suggested_Buy_Price']])


Suggested Buy Prices for Today:
 Empty DataFrame
Columns: [Date, Open, Close, Low, High, Volume, Suggested_Buy_Price]
Index: []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  daily_data['Suggested_Buy_Price'] = daily_data.apply(calculate_closest_buy_price, axis=1)
