## FinVox Insights
#### FinVox Insights is POC voice based financial data visualization tool. The tool integrates **GPT 3.5** for URL generation which is passed to **Alphavantage** API to fetch the financial data. The visulization techniques includes interactive scatter plots, 3-D plots with option to pan, zoom, lasso select certian data points. The tool is complete with frontend developed using **Gradio.io**


## Intalling necessary libraries

In [1]:
!pip install openai
!pip install plotly
!pip install ffmpeg-python
!pip install mplfinance
!pip install gradio

Collecting openai
  Downloading openai-1.34.0-py3-none-any.whl (325 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m325.5/325.5 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.14.0-py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: h11, httpcore, httpx, openai
Successfully installed h11-0.14.0 httpcore-1.0.5 ht

## Importing necessary libraries

In [2]:
import gradio as gr
from transformers import pipeline
import numpy as np
from openai import OpenAI
from google.colab import userdata
import openai
import os
import json
import requests
import csv
import matplotlib.pyplot as plt
import mplfinance as mpf
import matplotlib.dates as mdates
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
import plotly.express as px
import logging

In [3]:
# Configure logging
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                    handlers=[
                        logging.FileHandler("app.log"),
                        logging.StreamHandler()
                    ],
                    force=True)

logger = logging.getLogger(__name__)

logger.info("Logging is configured and the script has started.")


2024-06-16 01:22:18,277 - __main__ - INFO - Logging is configured and the script has started.


## Defining helper functions

In [4]:
def set_openAIKey():
  openai_api_key = userdata.get('OPENAI_API_KEY')

# Function to normalize keys
def normalize_keys(d):
    new_dict = {}
    for key, value in d.items():
        # Normalize key to lowercase and remove underscores
        new_key = key.lower().replace('_', '')
        new_dict[new_key] = value
    return new_dict

# Function to extract string portion from column names
def extract_string(column_name):
    parts = column_name.split('. ')
    if len(parts) > 1:
        return parts[1]
    else:
        return column_name

## Prompt Engineering : Prompting the model to obtain relevant data for AlphaVantage API

In [6]:
def get_URL(user_input):

  prompt = f'''
    Fetch me the AlphaVantage API URLs, Company names, and function name for the following task-
    {user_input} for all companies mentioned in the input, separated by commas, in the format:
    Company Names: [Company 1, Company 2, ...]
    Function Name:
    Api Urls: [Url for Company 1, Url for Company 2, ...]

    Additionally, specify if you want to:
    - Overlap/Juxtapose the data (True/False):

    For example:
    If the task is to fetch balance sheet for Google (GOOG) and Microsoft (MSFT) and overlap the data:
    Company Names: [Google, Microsoft]
    Overlap/Juxtapose: True
    Function Name: Balance Sheet
    Api Urls: [https://www.alphavantage.co/query?function=BALANCE_SHEET&symbol=GOOG&apikey=YOUR_API_KEY, https://www.alphavantage.co/query?function=BALANCE_SHEET&symbol=MSFT&apikey=YOUR_API_KEY]

    Strictly follow the format for your response.
    '''


  client = OpenAI()

  try:

      response = client.chat.completions.create(
        model="gpt-3.5-turbo-0125",
        response_format={ "type": "json_object" },
        messages=[
          {"role": "system", "content": "You are a helpful assistant designed to output JSON."},
          {"role": "user", "content": prompt}
        ]
      )

      logger.info(f"Response from OpenAI: {response}")

      if response.choices and response.choices[0].message.content:
          json_string = response.choices[0].message.content.strip("")
          s = json.loads(json_string)
          # Normalize keys in the dictionary
          normalized_data = normalize_keys(s)
          return normalized_data

      else:
          raise ValueError("Invalid response received from OpenAI API.")
  except Exception as e:
      logger.error(f"An error occurred during data retrieval: {str(e)}")
      return None


def get_data(response_url, function_name, company_name):

  api_key = userdata.get('AVANTAGE_API_KEY')

  # Dynamically add API key and other parameters to the API URL
  api_url = response_url.replace("YOUR_API_KEY", api_key)
  logging.info(f"Fetching data from URL: {api_url}")

  response = requests.get(api_url)

  data = response.json()

  logging.info(f"Function name: {function_name}")



# Handle Time Series data differently for visualisation

  if 'Time Series' in function_name:

    # Convert dictionary to list of key-value pairs
    pairs = list(data.items())

    # Get the second key-value pair
    second_pair = pairs[1]

    # Access key and value separately
    second_key, second_value = second_pair

    # Convert dictionary to DataFrame
    df = pd.DataFrame.from_dict(second_value, orient='index')

    # Reset index to make 'Date' a column
    df.reset_index(inplace=True)
    df.rename(columns={'index': 'Date'}, inplace=True)

    df.columns = [extract_string(col) for col in df.columns]

    # Set 'Date' as index
    df.set_index('Date', inplace=True)

    # Convert data types
    df = df.astype(float)  # Convert numerical columns to float if necessary

    # st.write(df.head())

    csv_path = f'{company_name}.csv'
    df.to_csv(csv_path)
    logger.info(f"CSV file saved successfully at {csv_path}")


## Defining functions for plotting

ChatCompletion(id='chatcmpl-9aZ4a2F98OA6uFCdFu8rGjr5fZWFn', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='{\n    "company": "Nvidia",\n    "earnings": {\n        "2021": {\n            "Q1": "$3.08 billion",\n            "Q2": "$3.87 billion",\n            "Q3": "$4.73 billion",\n            "Q4": "$5.66 billion"\n        }\n    }\n}', role='assistant', function_call=None, tool_calls=None))], created=1718501656, model='gpt-3.5-turbo-0125', object='chat.completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=70, prompt_tokens=27, total_tokens=97))


In [7]:
# Function to plot overlapping time series
def plot_overlapping_time_series(company_list):
  fig, ax = plt.subplots()
  for company in company_list:
      df = pd.read_csv(f'{company}.csv')
      y_axis = 'close'
      ax.plot(df['Date'], df[y_axis], label=company)

  ax.set_xlabel('Date')
  ax.set_ylabel('Value')
  ax.set_title('Time Series')
  n = 5
  ax.set_xticks(ax.get_xticks()[::n])
  plt.xticks(rotation=90)
  ax.legend()

  return fig


def plot_time_series_single(time_series_df):

  # Create a candlestick chart
  fig = go.Figure(data=[go.Candlestick(x=time_series_df['Date'],
                                      open=time_series_df['open'],
                                      high=time_series_df['high'],
                                      low=time_series_df['low'],
                                      close=time_series_df['close'])])

  # Update layout for better visualization
  fig.update_layout(title='Candlestick Chart',
                    xaxis_title='Date',
                    yaxis_title='Price',
                    xaxis_rangeslider_visible=True)

  return fig


def plot_graph(function_name, company_names):
  if 'Time Series' in function_name:

    number_of_companies = len(company_names)

    if number_of_companies == 1:
      stock_data = pd.read_csv(f'{company_names[0]}.csv')
      return plot_time_series_single(stock_data)

    else:
      return plot_overlapping_time_series(company_names)

  else:
    return

## Visualization of the data on Gradio

In [15]:
# Placeholder function for visualization (replace with your logic)
def visualize(text):
  visualization = "This is a placeholder for visualization"
  set_openAIKey()
  response_dict = get_URL(text)

  for i in range(len(response_dict['company names'])):
    get_data(response_dict['api urls'][i], response_dict['function name'], response_dict['company names'][i])

  fig = plot_graph(response_dict['function name'],response_dict['company names'])
  return fig


transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")


def transcribe(audio):
    sr, y = audio
    y = y.astype(np.float32)
    y /= np.max(np.abs(y))

    output = transcriber({"sampling_rate": sr, "raw": y})
    text = output["text"]
    logger.info(f"Transcribed text: {text}")
    visualization_data = visualize(text)  # Call the visualization function
    return text, visualization_data

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("## Audio Transcription and Visualization")

    with gr.Row():
        with gr.Column():
            audio_input = gr.Audio(sources=["microphone"], label="Record your audio")
            transcribe_button = gr.Button("Transcribe and Visualize")

        with gr.Column():
            transcription_output = gr.Textbox(label="Transcription")
            visualization_output = gr.Plot()

    transcribe_button.click(
        transcribe,
        inputs=audio_input,
        outputs=[transcription_output, visualization_output]
    )


demo.launch()


2024-06-16 01:35:12,083 - httpx - INFO - HTTP Request: GET http://127.0.0.1:7861/startup-events "HTTP/1.1 200 OK"
2024-06-16 01:35:12,100 - httpx - INFO - HTTP Request: HEAD http://127.0.0.1:7861/ "HTTP/1.1 200 OK"
2024-06-16 01:35:12,202 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()


2024-06-16 01:35:12,476 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v2/tunnel-request "HTTP/1.1 200 OK"


Running on public URL: https://fa71aed0248cc01158.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


2024-06-16 01:35:13,119 - httpx - INFO - HTTP Request: HEAD https://fa71aed0248cc01158.gradio.live "HTTP/1.1 200 OK"


