# Financial AI - Refactored Notebook
This notebook contains the refactored code for the Financial AI project.

In [4]:

# Import necessary libraries
import os
from transformers import pipeline
import spacy
import gradio as gr

# Initialize spaCy once and load the model
nlp = spacy.load('en_core_web_sm')
nlp.add_pipe('sentencizer')


  warn(








<spacy.pipeline.sentencizer.Sentencizer at 0x222f492bf50>

In [5]:

# Function to initialize all models at once
def initialize_models():
    try:
        asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
        summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
        fin_model = pipeline("sentiment-analysis", model='yiyanghkust/finbert-tone', tokenizer='yiyanghkust/finbert-tone')
        return asr, summarizer, fin_model
    except Exception as e:
        print(f"Error initializing models: {e}")
        return None, None, None


In [6]:

# Function for splitting text into sentences
def split_in_sentences(text):
    """
    Splits the input text into sentences using spaCy.

    Args:
        text (str): The input text to be split.

    Returns:
        list: A list of sentences.
    """
    doc = nlp(text)
    return [str(sent).strip() for sent in doc.sents]


In [7]:

# Function to transcribe audio using ASR
def transcribe_audio(audio, pipelines):
    """
    Transcribes audio to text using the ASR pipeline.

    Args:
        audio: Audio input to be transcribed.
        pipelines (dict): Dictionary containing the initialized models.

    Returns:
        str: The transcribed text.
    """
    try:
        return pipelines['asr'](audio)["text"]
    except Exception as e:
        print(f"Error transcribing audio: {e}")
        return ""


In [8]:

# Function for summarizing text
def summarize_text(text, pipelines):
    """
    Summarizes the input text using the summarizer pipeline.

    Args:
        text (str): The text to be summarized.
        pipelines (dict): Dictionary containing the initialized models.

    Returns:
        str: The summarized text.
    """
    try:
        resp = pipelines['summarizer'](text)
        return resp[0]['summary_text']
    except Exception as e:
        print(f"Error summarizing text: {e}")
        return ""


In [9]:

# Function for sentiment analysis using FinBERT
def text_to_sentiment(text, pipelines):
    """
    Analyzes the sentiment of the input text using the FinBERT model.

    Args:
        text (str): The text for sentiment analysis.
        pipelines (dict): Dictionary containing the initialized models.

    Returns:
        str: The sentiment label.
    """
    try:
        return pipelines['sentiment'](text)[0]["label"]
    except Exception as e:
        print(f"Error in sentiment analysis: {e}")
        return ""


In [10]:

# Initialize pipelines
pipelines = {
    'asr': None,
    'summarizer': None,
    'sentiment': None
}

# Load the models into the pipeline dictionary
pipelines['asr'], pipelines['summarizer'], pipelines['sentiment'] = initialize_models()


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  return self.fget.__get__(instance, owner)()


In [3]:
import gradio as gr

def transcribe_audio_with_pipeline(audio):
    return transcribe_audio(audio, pipelines)

def summarize_text_with_pipeline(text):
    return summarize_text(text, pipelines)

def text_to_sentiment_with_pipeline(text):
    return text_to_sentiment(text, pipelines)

def create_interface():
    with gr.Blocks() as demo:
        gr.Markdown("## Financial Analyst AI")
        gr.Markdown("This project applies AI trained to analyze earning calls and other financial documents.")
        with gr.Row():
            with gr.Column():
                audio_file = gr.Audio(source="microphone", type="filepath")
                text_box = gr.Textbox(label="Transcribed Text")
                b1 = gr.Button("Recognize Speech")
                b1.click(fn=transcribe_audio_with_pipeline, inputs=audio_file, outputs=text_box)
                
                summary_box = gr.Textbox(label="Summary")
                b2 = gr.Button("Summarize Text")
                b2.click(fn=summarize_text_with_pipeline, inputs=text_box, outputs=summary_box)
                
                sentiment_label = gr.Label(label="Financial Tone")
                b3 = gr.Button("Classify Financial Tone")
                b3.click(fn=text_to_sentiment_with_pipeline, inputs=summary_box, outputs=sentiment_label)

    demo.launch()

# Launch the interface
create_interface()


Running on local URL:  http://127.0.0.1:7861
IMPORTANT: You are using gradio version 3.41.0, however version 5.0.1 is available, please upgrade.
--------

To create a public link, set `share=True` in `launch()`.


Your max_length is set to 62, but your input_length is only 18. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=9)
Traceback (most recent call last):
  File "c:\Users\Vaishnav\AppData\Local\Programs\Python\Python311\Lib\site-packages\gradio\routes.py", line 488, in run_predict
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Vaishnav\AppData\Local\Programs\Python\Python311\Lib\site-packages\gradio\blocks.py", line 1435, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Vaishnav\AppData\Local\Programs\Python\Python311\Lib\site-packages\gradio\blocks.py", line 1107, in call_function
    prediction = await anyio.to_thread.run_sync(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Vaishnav\AppData\Local\Programs\Python\