In [None]:
%pip install tf-keras
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
import tensorflow as tf

# FinBERT - Pre-trained on financial text
model_name = "ProsusAI/finbert"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = TFAutoModelForSequenceClassification.from_pretrained(model_name)

In [None]:
import pandas as pd
import json
import numpy as np
from sklearn.model_selection import train_test_split

# Load your dataset
df = pd.read_csv('dataset_model_1.csv')

# Parse decisions to get labels
def get_sentiment_label(decision_str):
    try:
        decisions = json.loads(decision_str.replace("'", '"'))
        sentiment = list(decisions.values())[0]
        sentiment_map = {'positive': 2, 'neutral': 1, 'negative': 0}
        return sentiment_map[sentiment]
    except:
        return 1

df['label'] = df['Decisions'].apply(get_sentiment_label)

# Split data
train_df, test_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)

# Tokenize the data
train_encodings = tokenizer(
    train_df['Title'].tolist(),
    truncation=True,
    padding=True,
    max_length=128,
    return_tensors='tf'
)

test_encodings = tokenizer(
    test_df['Title'].tolist(),
    truncation=True,
    padding=True,
    max_length=128,
    return_tensors='tf'
)

# Create TF datasets
train_dataset = tf.data.Dataset.from_tensor_slices((
    dict(train_encodings),
    train_df['label'].values
)).batch(8)

test_dataset = tf.data.Dataset.from_tensor_slices((
    dict(test_encodings),
    test_df['label'].values
)).batch(8)

# Compile model
optimizer = tf.keras.optimizers.Adam(learning_rate=2e-5)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

# Fine-tune the model
history = model.fit(
    train_dataset,
    validation_data=test_dataset,
    epochs=3  # 3-5 epochs is typical for fine-tuning
)

# Evaluate
results = model.evaluate(test_dataset)
print(f"\nTest Loss: {results[0]:.4f}")
print(f"Test Accuracy: {results[1]:.4f}")

In [None]:
# Save the fine-tuned model and tokenizer
model.save_pretrained('finbert_finetuned')
tokenizer.save_pretrained('finbert_finetuned')

print("Model saved successfully!")