In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

# import numpy as np # linear algebra
# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# # Input data files are available in the read-only "../input/" directory
# # For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
# --- 1. Install Dependencies ---
# Using quiet mode to keep the output clean
!pip install -q \
    transformers \
    accelerate \
    bitsandbytes \
    sentence-transformers \
    pypdf \
    unstructured[html] \
    networkx \
    matplotlib \
    streamlit \
    langchain \
    langchain-community \
    langchain-huggingface \
    qdrant-client \
    altair \
    pandas \
    bitsandbytes \
    pyngrok

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m30.5 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.0/10.0 MB[0m [31m95.8 MB/s[0m eta [36m0:00:00[0m:00:01[0m0:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m71.3 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m43.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m337.3/337.3 kB[0m [31m17.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m561.5/561.5 kB[0m [31m29.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
!pip install plotly



In [4]:
# --- 2. Load Models, Process Data, and Build Resources ---
import os
import torch
import warnings
from kaggle_secrets import UserSecretsClient
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain_community.document_loaders import UnstructuredHTMLLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Qdrant
import networkx as nx
import re
import requests
import json
import altair as alt


# Suppress warnings
warnings.filterwarnings("ignore")

# --- Hugging Face Token Setup ---
try:
    user_secrets = UserSecretsClient()
    hf_token = user_secrets.get_secret("HF_TOKEN")
    os.environ['HUGGING_FACE_HUB_TOKEN'] = hf_token
except Exception as e:
    print("Could not retrieve Hugging Face token. Make sure it's set in Kaggle Secrets.", e)

# --- Document Download and Processing ---
# This uses the correct, modern HTML version of the SEC filing
file_url = "https://www.sec.gov/Archives/edgar/data/789019/000095017024087843/msft-20240630.htm"
file_path = "msft-20240630.htm"
headers = {'User-Agent': "MyKaggleProject myemail@example.com"}

print(f"Downloading file from {file_url}...")
response = requests.get(file_url, headers=headers)
if response.status_code == 200:
    with open(file_path, "w", encoding="utf-8") as f:
        f.write(response.text)
    print("Download complete.")
    loader = UnstructuredHTMLLoader(file_path)
    documents = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    docs = text_splitter.split_documents(documents)
    print(f"Document loaded and split into {len(docs)} chunks.")
else:
    print(f"Failed to download file. Status code: {response.status_code}")
    docs = []

# --- Model Loading ---
llm_model_name = "mistralai/Mistral-7B-Instruct-v0.2"
print("Loading LLM...")
bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16)
model = AutoModelForCausalLM.from_pretrained(llm_model_name, quantization_config=bnb_config, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(llm_model_name)

# --- Create the Transformers Pipeline ---
# This defines the `text_generation_pipeline`
print("Creating transformers pipeline...")
text_generation_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=1024,
    top_p=0.95,
    temperature=0.1,
    repetition_penalty=1.15,
    return_full_text=False # Important for clean Gradio/Streamlit output
)

# --- THIS IS THE CRITICAL STEP THAT FIXES THE ERROR ---
.
llm_pipeline = HuggingFacePipeline(pipeline=text_generation_pipeline)
print("`llm_pipeline` created successfully.")

# --- Now you can safely use llm_pipeline ---

graph_prompt_template = "..." # Your prompt template here
graph_prompt = PromptTemplate.from_template(graph_prompt_template)

print("\nAll components are ready.")

2025-09-01 08:39:30.581166: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1756715970.964254      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1756715971.079842      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Downloading file from https://www.sec.gov/Archives/edgar/data/789019/000095017024087843/msft-20240630.htm...
Download complete.
Document loaded and split into 375 chunks.
Loading LLM...


config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

Device set to use cuda:0


Creating transformers pipeline...
`llm_pipeline` created successfully.

All components are ready.


In [5]:
%%writefile app.py
import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextIteratorStreamer
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import UnstructuredHTMLLoader
from langchain_community.vectorstores import Qdrant
from threading import Thread
import pandas as pd
import io
import re
import plotly.express as px
import json

# --- Page Configuration ---
st.set_page_config(page_title="Kepler Financial Analyst", page_icon="🪐", layout="wide")

# --- Definitive UI Styling ---
st.markdown("""
<style>
    /* Set all text color to be visible on white background */
    body, .st-emotion-cache-1kyxreq, .st-emotion-cache-1y4p8pa, .st-emotion-cache-1629p8f, .st-emotion-cache-1wivap2, .st-emotion-cache-4oy321 p, .st-emotion-cache-1c7y2kd p {
        color: #111111 !important;
    }
    /* Titles and Headers */
    h1, h2, h3 {
        color: #000000;
    }
    /* Sidebar */
    .st-emotion-cache-163cm81 {
        background-color: #F0F2F6;
    }
    /* Chat Bubbles */
    .st-emotion-cache-1c7y2kd { /* Assistant bubble */
        background-color: #F0F2F6;
    }
    .st-emotion-cache-4oy321 { /* User bubble */
        background-color: #FFFBEA; /* Light yellow */
    }
    /* Buttons */
    .stButton > button {
        background-color: #FFD700; /* Yellow */
        color: #111111; /* Black text for buttons */
        border-radius: 12px;
        font-weight: bold;
    }
    .stButton > button:hover {
        background-color: #FFC700;
    }
    /* Stop button specific style */
    .st-emotion-cache-19n6bn1 { /* This targets the stop button specifically */
        background-color: #D32F2F !important; /* Red */
        color: white !important;
    }
</style>
""", unsafe_allow_html=True)

# --- State Management ---
if "messages" not in st.session_state:
    st.session_state.messages = [{"role": "assistant", "content": "Hello! I am an AI analyst. How can I help you analyze or visualize the loaded financial document?"}]
if "is_generating" not in st.session_state:
    st.session_state.is_generating = False
if "stop_generation" not in st.session_state:
    st.session_state.stop_generation = False

# --- Model and Resources Caching ---
@st.cache_resource
def load_resources():
    llm_model_name = "mistralai/Mistral-7B-Instruct-v0.2"
    bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16)
    model = AutoModelForCausalLM.from_pretrained(llm_model_name, quantization_config=bnb_config, device_map="auto")
    tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
    embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': 'cpu'})
    file_path = "msft-20240630.htm"
    
    # Load tables from HTML
    try:
        tables = pd.read_html(file_path)
        tables = [t.dropna(how='all').dropna(axis=1, how='all').fillna(0) for t in tables if not t.empty]
    except Exception as e:
        st.warning(f"Failed to read tables from HTML file: {e}")
        tables = []
    
    loader = UnstructuredHTMLLoader(file_path)
    documents = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=200)
    docs = text_splitter.split_documents(documents)
    vectorstore = Qdrant.from_documents(docs, embedding_model, location=":memory:", collection_name="sec_filing")
    retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
    return model, tokenizer, retriever, tables

model, tokenizer, retriever, financial_tables = load_resources()

# --- AI Agent & Helper Functions ---
def get_llm_response(prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_new_tokens=1024, pad_token_id=tokenizer.eos_token_id)
    response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    if prompt in response_text:
        return response_text.split(prompt)[-1].strip()
    return response_text.strip()

def stream_llm_response(prompt):
    st.session_state.stop_generation = False
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
    generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, pad_token_id=tokenizer.eos_token_id)
    thread = Thread(target=model.generate, kwargs=generation_kwargs)
    thread.start()
    for new_text in streamer:
        if st.session_state.stop_generation:
            break
        yield new_text

def classify_intent(user_prompt: str) -> str:
    """A deterministic, rule-based classifier for user intent with LLM fallback for ambiguous cases."""
    user_prompt = user_prompt.lower()
    chart_keywords = ["chart", "plot", "graph", "visualize", "draw", "show me a chart"]
    data_terms = ["trend", "compare", "over time", "bar", "line", "pie"]
    
    # Explicit chart intent
    if any(kw in user_prompt for kw in chart_keywords):
        return "chart"
    # Implicit chart intent (e.g., "show trend" or "compare X and Y")
    if any(term in user_prompt for term in data_terms) and any(word in user_prompt for word in ["show", "display", "visual"]):
        return "chart"
    # Fallback to LLM for ambiguous cases
    router_prompt = f'You are a router. Based on the user question, is the intent "text" or "chart"? Respond with a single word.\nQuestion: "{user_prompt}"\nResponse:'
    intent = get_llm_response(router_prompt).lower().strip()
    return intent if intent in ["text", "chart"] else "text"

# --- UI Layout ---
st.title("🪐 Kepler Financial Analyst")

# Display chat history
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        if "chart" in message:
            st.plotly_chart(message["chart"], use_container_width=True)
        else:
            st.markdown(message["content"])

# --- Main Logic Execution ---
if st.session_state.is_generating:
    last_user_prompt = st.session_state.messages[-1]["content"]
    
    with st.chat_message("assistant"):
        with st.status("Analyzing request...", expanded=True) as status:
            status.update(label="Retrieving context from document...")
            retrieved_docs = retriever.invoke(last_user_prompt)
            retrieved_text = "\n\n".join([doc.page_content for doc in retrieved_docs])
            
            status.update(label="Classifying user intent...")
            intent = classify_intent(last_user_prompt)
            
            if intent == "chart":
                status.update(label="Chart requested. Classifying chart type...")
                chart_type_prompt = f'You are a chart type classifier. Is the chart "time-series" or "comparison"? Respond with a single word.\nQuestion: "{last_user_prompt}"\nResponse:'
                chart_type = get_llm_response(chart_type_prompt).lower().strip()
                
                status.update(label="Extracting data for chart...")
                if "comparison" in chart_type:
                    extractor_prompt = f"""
                    Extract data for a comparison chart. Respond ONLY in a <chart_data> tag with CSV data ('Category,Value').
                    Do NOT include any other tags, JSON, or text outside the <chart_data> tag.
                    Source Text: {retrieved_text}
                    Question: "{last_user_prompt}"
                    Response:
                    """
                else:
                    extractor_prompt = f"""
                    Extract data for a time-series chart. Respond ONLY in a <chart_data> tag with CSV data ('Year,Value').
                    Do NOT include any other tags, JSON, or text outside the <chart_data> tag.
                    Source Text: {retrieved_text}
                    Question: "{last_user_prompt}"
                    Response:
                    """
                
                response_str = get_llm_response(extractor_prompt)
                
                status.update(label="Building visualization with Plotly...")
                try:
                    # Robust regex to handle whitespace and malformed tags
                    match = re.search(r'<chart_data>\s*(.*?)\s*</chart_data>', response_str, re.DOTALL)
                    if not match:
                        raise ValueError("No chart data found in the AI response.")
                    
                    csv_data = match.group(1).strip()
                    if not csv_data:
                        raise ValueError("Chart data is empty.")
                    
                    # Check for unexpected tags
                    if '<bar_chart>' in response_str:
                        st.warning("LLM included unexpected <bar_chart> tag. Ignoring and using <chart_data>.")
                        response_str = re.sub(r'<bar_chart>.*?</bar_chart>', '', response_str, flags=re.DOTALL)
                    
                    # Try table-based extraction as fallback
                    target_df = None
                    x_axis_label = 'year' if 'time-series' in chart_type else 'category'
                    y_axis_label = 'value'
                    for table in financial_tables:
                        table.columns = [str(col).lower().strip() for col in table.columns]
                        if x_axis_label in table.columns and y_axis_label in table.columns:
                            target_df = table[[x_axis_label, y_axis_label]].copy()
                            target_df[y_axis_label] = pd.to_numeric(target_df[y_axis_label], errors='coerce')
                            target_df.dropna(inplace=True)
                            break
                    
                    if target_df is None or target_df.empty:
                        # Fallback to LLM-extracted CSV
                        df = pd.read_csv(io.StringIO(csv_data))
                    else:
                        df = target_df
                    
                    x_col, y_col = df.columns[0], df.columns[1]
                    fig = px.bar(df, x=x_col, y=y_col, title=last_user_prompt, text_auto=True)
                    fig.update_xaxes(type='category')
                    fig.update_layout(title_x=0.5, xaxis_title=x_col.replace("_", " ").title(), yaxis_title=y_col.replace("_", " ").title())
                    
                    st.plotly_chart(fig, use_container_width=True)
                    st.session_state.messages.append({"role": "assistant", "chart": fig})
                except Exception as e:
                    error_message = f"I tried to create a chart, but failed. Error: {e}\n\nHere is the raw data I received:\n```\n{response_str}\n```"
                    st.error(error_message)
                    # Fallback to text response
                    status.update(label="Falling back to text response...")
                    answer_prompt = f"You are a financial analyst AI. Answer the question based on the 'Source Text', followed by a 'Source Citation'.\nSource Text:\n{retrieved_text}\nQuestion: \"{last_user_prompt}\"\nAnswer:"
                    response_generator = stream_llm_response(answer_prompt)
                    full_response = st.write_stream(response_generator)
                    st.session_state.messages.append({"role": "assistant", "content": error_message + "\n\n" + full_response})
            else:
                status.update(label="Generating text-based answer...")
                answer_prompt = f"You are a financial analyst AI. Answer the question based on the 'Source Text', followed by a 'Source Citation'.\nSource Text:\n{retrieved_text}\nQuestion: \"{last_user_prompt}\"\nAnswer:"
                response_generator = stream_llm_response(answer_prompt)
                full_response = st.write_stream(response_generator)
                st.session_state.messages.append({"role": "assistant", "content": full_response})
            
            status.update(label="Done!", state="complete", expanded=False)
    st.session_state.is_generating = False
    st.rerun()

# --- Dynamic Input Area ---
st.markdown("---")
if st.session_state.is_generating:
    if st.button("■ Stop Generation", use_container_width=True, type="primary"):
        st.session_state.is_generating = False
        st.session_state.stop_generation = True
        st.rerun()
else:
    if prompt := st.chat_input("Ask a question, e.g., 'Chart the net income...'"):
        st.session_state.messages.append({"role": "user", "content": prompt})
        st.session_state.is_generating = True
        st.rerun()

Writing app.py


In [6]:
# --- 4. Extract Information and Build Knowledge Graph (Corrected) ---
import re # Import the regular expressions library

print("Extracting entities and relationships to build the knowledge graph...")

# We are using the same prompt template as before
graph_prompt_template = """
You are a network graph maker. Your task is to extract entities and their relationships from a given text.
You must extract the full entity name. If a relationship is not explicitly mentioned, do not create one.
Format your output as a list of tuples, where each tuple represents a relationship: ('entity1', 'relationship', 'entity2').
Do not add any explanation or text before or after the list.

Example:
Text: Microsoft, a technology company, announced a partnership with OpenAI to develop new AI products.
Output: [('Microsoft', 'is a', 'technology company'), ('Microsoft', 'partnered with', 'OpenAI')]

Text: {chunk}
Output:
"""

graph_prompt = PromptTemplate.from_template(graph_prompt_template)

# Create the LLM chain for graph extraction
graph_extraction_chain = LLMChain(llm=llm_pipeline, prompt=graph_prompt)

# Initialize the knowledge graph
G = nx.DiGraph()

# Process a subset of chunks to build the graph
chunks_to_process = docs[:30]

for i, chunk in enumerate(chunks_to_process):
    print(f"Processing chunk {i+1}/{len(chunks_to_process)}...")
    
    response = graph_extraction_chain.run(chunk.page_content)
    
    # --- NEW ROBUST PARSING LOGIC ---
    # We use regex to find all tuples in the format ('entity1', 'relation', 'entity2')
    # This is much more resilient to errors than eval()
    try:
        # The pattern looks for a parenthesis, a single-quoted string, a comma, 
        # another single-quoted string, a comma, a final single-quoted string, and a closing parenthesis.
        pattern = r"\('([^']*)',\s*'([^']*)',\s*'([^']*)'\)"
        triplets = re.findall(pattern, response)
        
        if triplets:
            for subject, predicate, obj in triplets:
                # Clean up the extracted strings
                subject = subject.strip()
                predicate = predicate.strip()
                obj = obj.strip()
                if subject and predicate and obj: # Ensure no empty strings
                    G.add_edge(subject, obj, label=predicate)
    except Exception as e:
        print(f"Error processing chunk {i+1}: {e}")
        continue

print(f"Knowledge graph created with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges.")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Extracting entities and relationships to build the knowledge graph...
Processing chunk 1/30...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing chunk 2/30...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing chunk 3/30...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing chunk 4/30...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing chunk 5/30...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing chunk 6/30...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing chunk 7/30...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing chunk 8/30...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing chunk 9/30...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing chunk 10/30...


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing chunk 11/30...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing chunk 12/30...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing chunk 13/30...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing chunk 14/30...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing chunk 15/30...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing chunk 16/30...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing chunk 17/30...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing chunk 18/30...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing chunk 19/30...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing chunk 20/30...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing chunk 21/30...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing chunk 22/30...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing chunk 23/30...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing chunk 24/30...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing chunk 25/30...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing chunk 26/30...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing chunk 27/30...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing chunk 28/30...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing chunk 29/30...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processing chunk 30/30...
Knowledge graph created with 208 nodes and 161 edges.


In [11]:
!pkill -f ngrok
# This command finds the process using port 8501 and terminates it.
!kill $(lsof -t -i:8501)


kill: usage: kill [-s sigspec | -n signum | -sigspec] pid | jobspec ... or kill -l [sigspec]


In [8]:
# --- Final Step: Launch the App and Get the Link ---
import os
import subprocess
from pyngrok import ngrok
from kaggle_secrets import UserSecretsClient



# --- Set up ngrok ---
# This ensures the token is ready. Since you've already configured it, this will just confirm.

try:
    user_secrets = UserSecretsClient()
    ngrok_token = user_secrets.get_secret("NGROK_AUTHTOKEN")
    ngrok.set_auth_token(ngrok_token)
    print("Ngrok authtoken configured successfully.")
except Exception as e:
    print(f"Could not configure ngrok authtoken: {e}")

# --- Launch the Streamlit App in the background ---
print("Launching Streamlit app in the background...")
process = subprocess.Popen(['streamlit', 'run', 'app.py', '--server.port', '8501'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)

# --- Get the Public URL from ngrok and Print It ---
try:
    public_url = ngrok.connect(8501)
    print("---" * 20)
    print(f"✅ Your Streamlit app is LIVE at: {public_url}")
    print("---" * 20)
except Exception as e:
    print(f"Could not connect ngrok. Error: {e}")
    # If it fails, kill the streamlit process
    process.kill()

Ngrok authtoken configured successfully.                                                            
Launching Streamlit app in the background...
------------------------------------------------------------
✅ Your Streamlit app is LIVE at: NgrokTunnel: "https://b0ae027d2b1d.ngrok-free.app" -> "http://localhost:8501"
------------------------------------------------------------


In [9]:
Visualize the change in net income over the last three years as a bar chart.

"Plot the common stock repurchases for 2023 and 2024."

"Can you create a chart comparing cash dividends and stock repurchases?"

SyntaxError: invalid syntax (1546121222.py, line 1)

In [None]:
What was the net income for the most recent fiscal year?
How did the amount spent on common stock repurchases change between 2023 and 2024?
What was the declared cash dividend per share in 2022?
Is there a mention of seasonality affecting the company's revenue? ## Business Segments & Strategy 

#These questions explore the company's operations and future plans. 
What are the company's main reportable segments?
What does the report say about the company's investments in Artificial Intelligence?
Were there any significant acquisitions or partnerships mentioned in the document? 
How did the company change its estimate for the useful lives of server equipment?

## Risk Factors ⚠️ These questions probe the "Risk Factors" section, which is crucial for understanding potential challenges
What are the top 3 business risks identified by the company? 
Does the report mention any risks related to cybersecurity or data breaches? 
What are the potential impacts of global competition on the company's business? 
Are there any legal proceedings mentioned that could materially harm the company?