In [2]:
import re
import matplotlib.pyplot as plt # type: ignore
from transformers import T5ForConditionalGeneration, T5Tokenizer # type: ignore

# Function to extract revenue data from 10-K filing text
def extract_revenue_data(filing_text):
    # Regular expression pattern to match revenue figures
    revenue_pattern = r"(?i)\b(revenue|sales|income)\b.*?\$?(\d{1,3}(?:,\d{3})*(?:\.\d+)?)\b"
    
    # Find all matches of revenue pattern in the filing text
    revenue_matches = re.findall(revenue_pattern, filing_text)
    
    # Extract revenue figures from matches
    revenues = []
    for match in revenue_matches:
        revenue = float(match[1].replace(",", ""))  # Convert revenue string to float
        revenues.append(revenue)
    
    return revenues

# Function to generate insights from 10-K filings using T5 model
def generate_insight(filing_text):
    # Load pre-trained T5 model and tokenizer
    model_name = "t5-small"
    tokenizer = T5Tokenizer.from_pretrained(model_name)
    model = T5ForConditionalGeneration.from_pretrained(model_name)

    # Preprocess the filing text
    prompt = "Generate insight about revenue growth based on the provided 10-K filing text: "
    input_text = prompt + filing_text[:2048]  # Limit input text to avoid exceeding maximum token length

    # Tokenize input text
    input_ids = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)

    # Generate insight using the T5 model
    output = model.generate(input_ids, max_length=100, num_return_sequences=1)

    # Decode the generated insight
    generated_insight = tokenizer.decode(output[0], skip_special_tokens=True)

    return generated_insight

# Function to visualize revenue data for a specific year
def visualize_revenue_growth(year, revenues):
    # Plot revenue data for the given year
    plt.figure(figsize=(8, 6))
    plt.plot(revenues, marker='o', color='b', linestyle='-')
    plt.title(f'Company XYZ Revenue Growth ({year})')
    plt.xlabel('Quarter')
    plt.ylabel('Revenue (USD)')
    plt.grid(True)
    plt.tight_layout()
    plt.show()

# Load the 10-K filing text from the uploaded file
with open("full-submission.txt", "r") as file:
    filing_text = file.read()

# Generate insight from the filing text
insight = generate_insight(filing_text)
print("Generated Insight:", insight)

# Extract revenue data from the filing text
revenues = extract_revenue_data(filing_text)

# Visualize revenue growth for the specific year
visualize_revenue_growth("2022", revenues)


FileNotFoundError: [Errno 2] No such file or directory: 'full-submission.txt'

: 