In [None]:
pip install transformers datasets

In [4]:
import json
from datasets import load_dataset, Dataset
from transformers import BertTokenizer, BertForQuestionAnswering, TrainingArguments, Trainer
import evaluate

# Load the dataset
with open('stock_market_qa.json', 'r') as f:
    qa_pairs = json.load(f)

# Convert to Hugging Face Dataset
dataset = Dataset.from_dict({
    'id': [item['id'] for item in qa_pairs],
    'question': [item['question'] for item in qa_pairs],
    'answers': [{'text': item['answers'], 'answer_start': [0]} for item in qa_pairs]  # Dummy answer_start
})



In [5]:
# Split the dataset into train and validation sets
dataset = dataset.train_test_split(test_size=0.2)
train_dataset = dataset['train']
val_dataset = dataset['test']

In [None]:
# Select a pre-trained model
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForQuestionAnswering.from_pretrained(model_name)

In [None]:
def preprocess_function(examples):
    questions = [q.strip() for q in examples["question"]]
    inputs = tokenizer(
        questions,
        padding="max_length",
        truncation=True,
        max_length=512,
        return_tensors="pt"
    )
    return inputs

# Use parallel processing with num_proc
tokenized_train_dataset = train_dataset.map(preprocess_function, batched=True, num_proc=4)  # Adjust num_proc based on your CPU cores
tokenized_val_dataset = val_dataset.map(preprocess_function, batched=True, num_proc=4)  # Adjust num_proc based on your CPU cores

In [None]:
pip install evaluate

In [None]:
import evaluate
# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
)


# Load the evaluation metric
accuracy_metric = evaluate.load("accuracy")
f1_metric = evaluate.load("f1")
bleu_metric = evaluate.load("bleu")

# Function to compute metrics
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    # Compute accuracy
    accuracy = accuracy_metric.compute(predictions=decoded_preds, references=decoded_labels)

    # Compute F1-score
    f1 = f1_metric.compute(predictions=decoded_preds, references=decoded_labels, average="weighted")

    # Compute BLEU score
    bleu = bleu_metric.compute(predictions=[pred.split() for pred in decoded_preds], references=[[label.split()] for label in decoded_labels])

    return {
        "accuracy": accuracy["accuracy"],
        "f1": f1["f1"],
        "bleu": bleu["bleu"]
    }

# Initialize the Trainer with compute_metrics
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_val_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

# Train the model
trainer.train()


# Evaluate the model
eval_results = trainer.evaluate()
print(eval_results)



In [None]:
pip install gradio

In [None]:
import gradio as gr

def answer_question(question):
    inputs = tokenizer(question, return_tensors="pt")
    outputs = model.generate(**inputs)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return answer

interface = gr.Interface(
    fn=answer_question,
    inputs=gr.inputs.Textbox(lines=2, placeholder="Enter your question here..."),
    outputs="text",
    title="Financial Question Answering",
    description="Ask any financial question related to the stock market data."
)

# Launch the interface
interface.launch()