Preparing Dataset for Tata Motors 

Equity instrument name: NSE_EQ|INE155A01022

Futures instrument name:
June: NSE_FO|63871
July: NSE_FO|63853
August: NSE_FO|56532

Options Contract Instrument Name:
TATAMOTORS24JUN960CE : NSE_FO|124632
TATAMOTORS24JUN960PE : NSE_FO|124633
TATAMOTORS24JUN1000CE : NSE_FO|124641
TATAMOTORS24JUN1000PE : NSE_FO|124642



Upper Break and Buy Signal:

Checks if there was an overlap between the Upper Break and Buy Signal.
Lower Break and Sell Signal:

Checks if there was an overlap between the Lower Break and Sell Signal.
Upper BB values with Buy and Sell Signals:

Checks if there was an overlap between Upper BB and Buy Signal, and Upper BB and Sell Signal.
Lower BB values with Buy and Sell Signals:

Checks if there was an overlap between Lower BB and Buy Signal, and Lower BB and Sell Signal.
RSI values with Buy and Sell Signals:

Checks if there was an overlap between RSI values and Buy Signal, and RSI values and Sell Signal.
OBV values with Buy and Sell Signals:

Checks if there was an overlap between OBV values and Buy Signal, and OBV values and Sell Signal.
MACD values with Buy and Sell Signals:

In [1]:
import pandas as pd
import json

# Load datasets
ohlcv_data = pd.read_csv('../Charting System/IndicatorData/NSE_EQ_INE155A01022/ohlcv_data_NSE_EQ_INE155A01022_2024-06-14.csv')
hardy_indicator_data = pd.read_csv('../Charting System/IndicatorData/NSE_EQ_INE155A01022/hardy_indicator_data_NSE_EQ_INE155A01022_2024-06-14.csv')
obv_data = pd.read_csv('../Charting System/IndicatorData/NSE_EQ_INE155A01022/obv_data_NSE_EQ_INE155A01022_2024-06-14.csv')
rsi_histo_alert_data = pd.read_csv('../Charting System/IndicatorData/NSE_EQ_INE155A01022/rsiHistoAlert_data_NSE_EQ_INE155A01022_2024-06-14.csv')
trendlines_data = pd.read_csv('../Charting System/IndicatorData/NSE_EQ_INE155A01022/trendlines_data_NSE_EQ_INE155A01022_2024-06-14.csv')
volume_spike_data = pd.read_csv('../Charting System/IndicatorData/NSE_EQ_INE155A01022/volume_spike_data_NSE_EQ_INE155A01022_2024-06-14.csv')

# Merge datasets on common keys (e.g., 'Date')
merged_data = ohlcv_data.merge(hardy_indicator_data, on=['Date'], how='left', suffixes=('', '_hardy'))
merged_data = merged_data.merge(obv_data, on=['Date'], how='left', suffixes=('', '_obv'))
merged_data = merged_data.merge(rsi_histo_alert_data, on=['Date'], how='left', suffixes=('', '_rsi'))
merged_data = merged_data.merge(trendlines_data, on=['Date'], how='left', suffixes=('', '_trendlines'))
merged_data = merged_data.merge(volume_spike_data, on=['Date'], how='left', suffixes=('', '_volumespike'))

# Function to generate instruction-input-output triplets
def generate_qa_pairs(row):
    pairs = []

    # OHLCV Data
    instruction = "Provide the OHLCV data for the given date."
    input_text = f"Date: {row['Date']}"
    output_text = f"Open: {row['Open']}, High: {row['High']}, Low: {row['Low']}, Close: {row['Close']}, Volume: {row['Volume']}"
    pairs.append({"instruction": instruction, "input": input_text, "output": output_text})

    # RSI Histo Alert Data
    instruction = "Provide the RSI Histo Alert data for the given date."
    input_text = f"Date: {row['Date']}"
    color = 'Positive' if row['Color'] == 'Green' else 'Neutral' if row['Color'] == 'Blue' else 'Negative'
    output_text = f"RSI: {row['RSI_Histo']}, Candle: {color}"
    pairs.append({"instruction": instruction, "input": input_text, "output": output_text})

    # Volume Spike Data
    instruction = "Provide the Volume Spike data for the given date."
    input_text = f"Date: {row['Date']}"
    output_text = f"HA_High: {row['HA_High']}, HA_Low: {row['HA_Low']}, Volume_Flow: {row['Volume_Flow']}, Volume_Spike: {row['Volume_Spike']}"
    pairs.append({"instruction": instruction, "input": input_text, "output": output_text})

    # Trendlines Data
    instruction = "Provide the Trendlines data for the given date."
    input_text = f"Date: {row['Date']}"
    output_text = f"Upper_Trendline: {row.get('Upper_Trendline', 'N/A')}, Lower_Trendline: {row.get('Lower_Trendline', 'N/A')}, Upper_Break: {row.get('Upper_Break', 'N/A')}, Lower_Break: {row.get('Lower_Break', 'N/A')}"
    pairs.append({"instruction": instruction, "input": input_text, "output": output_text})

    # OBV Data
    instruction = "Provide the OBV data for the given date."
    input_text = f"Date: {row['Date']}"
    output_text = f"OBV: {row['OBV']}, OBV_Fast_EMA: {row.get('OBV_Fast_EMA', 'N/A')}, OBV_Medium_EMA: {row.get('OBV_Medium_EMA', 'N/A')}, OBV_Slow_EMA: {row.get('OBV_Slow_EMA', 'N/A')}, Donchian_Baseline: {row.get('Donchian_Baseline', 'N/A')}"
    pairs.append({"instruction": instruction, "input": input_text, "output": output_text})

    # Hardy Indicator Data
    instruction = "Provide the Hardy Indicator data for the given date."
    input_text = f"Date: {row['Date']}"
    output_text = (f"MiddleBB: {row['MiddleBB']}, StdDev: {row['StdDev']}, UpperBB: {row['UpperBB']}, LowerBB: {row['LowerBB']}, EMA_Fast: {row['EMA_Fast']}, EMA_Slow: {row['EMA_Slow']}, MACD: {row['MACD']}, Signal_Line: {row['Signal_Line']}, "
                   f"BuySignal: {row['BuySignal']}, SellSignal: {row['SellSignal']}, MACD_Angle: {row['MACD_Angle']}, Crossed_LowerBB: {row['Crossed_LowerBB']}, MACD_Angle_Less_than_5: {row['MACD_Angle_Less_Than_5']}, "
                   f"Final_Buy_Signal: {row['Buy_Signal']}, Crossed_UpperBB: {row['Crossed_UpperBB']}, MACD_Greater_than_Neg5: {row['MACD_Angle_Greater_Than_Neg5']}, Final_Short_Signal: {row['Short_Signal']}")
    pairs.append({"instruction": instruction, "input": input_text, "output": output_text})

    # Overlap checks for each indicator combination
    overlap_checks = [
        ("BuySignal", "SellSignal"),
        ("MACD_Angle_Less_Than_5", "Crossed_LowerBB"),
        ("MACD_Angle_Greater_Than_Neg5", "Crossed_UpperBB"),
        ("Buy_Signal", "Short_Signal"),
        ("Upper_Break", "Buy_Signal"),
        ("Lower_Break", "Short_Signal"),
        ("UpperBB", "Buy_Signal"),
        ("UpperBB", "Short_Signal"),
        ("LowerBB", "BuySignal"),
        ("LowerBB", "Short_Signal"),
        ("RSI_Histo", "BuySignal"),
        ("RSI_Histo", "Short_Signal"),
        ("OBV", "BuySignal"),
        ("OBV", "Short_Signal"),
        ("MiddleBB", "BuySignal"),
        ("MiddleBB", "Short_Signal"),
        ("MACD", "BuySignal"),
        ("MACD", "Short_Signal"),
        # Add more combinations as needed
    ]

    for indicator1, indicator2 in overlap_checks:
        instruction = f"Was there an overlap of {indicator1} and {indicator2} on the given date?"
        input_text = f"Date: {row['Date']}"
        if row.get(indicator1) and row.get(indicator2):
            output_text = f"Yes, there was an overlap of {indicator1} and {indicator2} on {row['Date']}."
        else:
            output_text = f"No, there was no overlap of {indicator1} and {indicator2} on {row['Date']}."
        pairs.append({"instruction": instruction, "input": input_text, "output": output_text})

    return pairs

# Generate QA pairs for the entire dataset
qa_dataset = []
for _, row in merged_data.iterrows():
    qa_dataset.extend(generate_qa_pairs(row))

# Save the dataset to a JSONL file
with open('qa_TATAMOTORS_dataset.jsonl', 'w') as f:
    f.write('[\n')
    for i, entry in enumerate(qa_dataset):
        json.dump(entry, f, indent=4)
        if i < len(qa_dataset) - 1:
            f.write(',\n')
    f.write('\n]')

print(f"Generated {len(qa_dataset)} instruction-input-output triplets.")


Generated 1139040 instruction-input-output triplets.


Completed Generation of Training Dataset from 2023-01-01 to 2024-06-15 for Equity TATA MOTORS

In [4]:
import torch

print(torch.__version__)
print(torch.cuda.is_available())  # This will tell you if CUDA (GPU support) is available


2.3.1+cpu
False


In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os

# Define the path where the model is saved
model_path = 'finetuned_llama3'  # Update this to the path where you saved the model

# Check for GPU/CPU availability
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")
    print("Using CPU")

# Limit the number of threads used by PyTorch
torch.set_num_threads(4)
os.environ["OMP_NUM_THREADS"] = "4"

try:
    print("Loading tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    print("Tokenizer loaded.")

    print("Loading model...")
    model = AutoModelForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True).to(device)
    print("Model loaded.")

    # Example usage
    input_text = "What is the latest price of TATA Motors?"
    input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device)

    # Generate a response from the model
    print("Generating response...")
    outputs = model.generate(input_ids, max_length=50, num_return_sequences=1)
    print("Response generated.")

    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print("Decoded generated text.")

    print(f"Generated text: {generated_text}")

except Exception as e:
    print(f"An error occurred: {str(e)}")


  from .autonotebook import tqdm as notebook_tqdm


Using CPU
Loading tokenizer...


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Tokenizer loaded.
Loading model...


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

: 