# Import Required Libraries

In [3]:
import os
import json
import re
import sys
import io
import contextlib
import warnings
from typing import Optional, List, Any, Tuple
from PIL import Image
import pandas as pd
import base64
from io import BytesIO
import matplotlib.pyplot as plt  # Import Matplotlib for visualizations
import seaborn as sns  # Import Seaborn for better plotting
import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

warnings.filterwarnings("ignore", category=UserWarning, module="pydantic")


# Define the Regular Expression for Extracting Python Code

In [4]:
# This regular expression will help you extract Python code blocks from the response of the language model.

pattern = re.compile(r"```python\n(.*?)\n```", re.DOTALL)


# Define the Function to Execute Python Code

In [5]:
# This function will execute Python code provided by the LLM. It will also handle capturing
#  and displaying Matplotlib plots generated during the execution.

def code_interpret(code: str) -> Optional[List[Any]]:
    with st.spinner('Executing code...'):  # Show loading spinner during execution
        stdout_capture = io.StringIO()  # Capture standard output
        stderr_capture = io.StringIO()  # Capture standard error

        # Redirect stdout and stderr to capture output and errors
        with contextlib.redirect_stdout(stdout_capture), contextlib.redirect_stderr(stderr_capture):
            try:
                exec(code)  # Execute the code provided by LLM
            except Exception as e:  # Catch any exceptions during code execution
                stderr_capture.write(str(e))  # Write the error message

        # Check if there are any errors in stderr
        if stderr_capture.getvalue():
            print("[Code Interpreter Warnings/Errors]", file=sys.stderr)
            print(stderr_capture.getvalue(), file=sys.stderr)

        # If there is any output in stdout, print it
        if stdout_capture.getvalue():
            print("[Code Interpreter Output]", file=sys.stdout)
            print(stdout_capture.getvalue(), file=sys.stdout)

        # Capture and return the generated plots
        if plt.get_fignums():  # If there are any open figures (plots)
            fig = plt.gcf()  # Get the current figure
            img_buf = BytesIO()
            fig.savefig(img_buf, format='png')  # Save the figure to a buffer
            img_buf.seek(0)
            img = Image.open(img_buf)
            st.image(img)  # Display the plot in Streamlit
            plt.close(fig)  # Close the figure to free up memory

        return stdout_capture.getvalue()  # Return any textual output from code execution





# Function to Match Python Code Blocks in LLM Response

In [6]:
# This function extracts Python code from the model's response if it contains any.

def match_code_blocks(llm_response: str) -> str:
    match = pattern.search(llm_response)
    if match:
        code = match.group(1)
        return code
    return ""


# Function to Interact with Hugging Face LLM

In [9]:
# This function sends the user's query to the LLM and retrieves the response. 
# It also executes any Python code generated by the model.

def chat_with_llm(user_message: str, dataset_path: str) -> Tuple[Optional[List[Any]], str]:
    # Load model and tokenizer (open-source model)
    model_name = "meta-llama/Meta-Llama-3.3-70B-Instruct-Turbo"  # You can change this model to another open-source one
    model = AutoModelForCausalLM.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    # Construct system prompt with dataset path
    system_prompt = f"""You're a Python data scientist and data visualization expert. You are given a dataset at path '{dataset_path}' and also the user's query.
    You need to analyze the dataset and answer the user's query with a response and you run Python code to solve them.
    IMPORTANT: Always use the dataset path variable '{dataset_path}' in your code when reading the CSV file."""

    # Encode the prompt and user message
    inputs = tokenizer.encode(system_prompt + "\n" + user_message, return_tensors="pt")
    
    # Generate response using the model
    with torch.no_grad():
        outputs = model.generate(inputs, max_length=1024, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)
    
    response_message = tokenizer.decode(outputs[0], skip_special_tokens=True)
    python_code = match_code_blocks(response_message)
    
    if python_code:
        code_results = code_interpret(python_code)
        return code_results, response_message
    else:
        st.warning(f"Failed to match any Python code in model's response")
        return None, response_message


# Function to Handle File Upload

In [10]:
# This function handles the uploading of the dataset. 
# It stores the dataset locally and returns the dataset's file path.

def upload_dataset(uploaded_file) -> str:
    dataset_path = f"./{uploaded_file.name}"
    
    try:
        with open(dataset_path, 'wb') as f:
            f.write(uploaded_file.getbuffer())
        return dataset_path
    except Exception as error:
        st.error(f"Error during file upload: {error}")
        raise error


# Main Streamlit Application

In [12]:
# This is the main function of your Streamlit app, 
# where you handle file uploads, query input, and visualizations.

import streamlit as st
import pandas as pd
import base64
from io import BytesIO
from PIL import Image
from e2b_code_interpreter import Sandbox  # Assuming this is a custom library for sandboxed code execution

def upload_dataset(uploaded_file):
    """Function to upload dataset and return the dataset path."""
    df = pd.read_csv(uploaded_file)
    # Save the file temporarily for later use (this might depend on your platform)
    dataset_path = "path/to/uploaded/dataset.csv"
    df.to_csv(dataset_path, index=False)
    return dataset_path

def chat_with_llm(query, dataset_path):
    """Function to interact with the language model and process the query."""
    # This is a placeholder for the actual interaction with the LLM (Language Model)
    # It should return the generated code to analyze the dataset and its response
    code_results = "Analysis Result Placeholder"
    llm_response = "AI Response Placeholder"
    return code_results, llm_response

def main():
    """Main Streamlit application."""
    # Step 1: Display title and introduction
    st.title("📊 AI Data Visualization Agent")
    st.write("Upload your dataset and ask questions about it!")

    # Step 2: Initialize session state variables for API keys and model
    if 'together_api_key' not in st.session_state:
        st.session_state.together_api_key = ''
    if 'e2b_api_key' not in st.session_state:
        st.session_state.e2b_api_key = ''
    if 'model_name' not in st.session_state:
        st.session_state.model_name = ''

    # Step 3: Sidebar for API Key input and Model Configuration
    with st.sidebar:
        st.header("API Keys and Model Configuration")
        st.session_state.together_api_key = st.sidebar.text_input("Together AI API Key", type="password")
        st.sidebar.info("💡 Everyone gets a free $1 credit by Together AI - AI Acceleration Cloud platform")
        st.sidebar.markdown("[Get Together AI API Key](https://api.together.ai/signin)")
        
        st.session_state.e2b_api_key = st.sidebar.text_input("Enter E2B API Key", type="password")
        st.sidebar.markdown("[Get E2B API Key](https://e2b.dev/docs/legacy/getting-started/api-key)")
        
        # Add model selection dropdown
        model_options = {
            "Meta-Llama 3.1 405B": "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
            "DeepSeek V3": "deepseek-ai/DeepSeek-V3",
            "Qwen 2.5 7B": "Qwen/Qwen2.5-7B-Instruct-Turbo",
            "Meta-Llama 3.3 70B": "meta-llama/Llama-3.3-70B-Instruct-Turbo"
        }
        st.session_state.model_name = st.selectbox(
            "Select Model",
            options=list(model_options.keys()),
            index=0  # Default to first option
        )
        st.session_state.model_name = model_options[st.session_state.model_name]

    # Step 4: Upload CSV file
    uploaded_file = st.file_uploader("Choose a CSV file", type="csv")

    if uploaded_file is not None:
        # Display dataset with toggle for preview or full dataset
        df = pd.read_csv(uploaded_file)
        st.write("Dataset:")
        show_full = st.checkbox("Show full dataset")
        if show_full:
            st.dataframe(df)
        else:
            st.write("Preview (first 5 rows):")
            st.dataframe(df.head())
        
        # Step 5: User input for query
        query = st.text_area("What would you like to know about your data?",
                             "Can you compare the average cost for two people between different categories?")
        
        # Step 6: Analyze button and results display
        if st.button("Analyze"):
            if not st.session_state.together_api_key or not st.session_state.e2b_api_key:
                st.error("Please enter both API keys in the sidebar.")
            else:
                with Sandbox(api_key=st.session_state.e2b_api_key) as code_interpreter:
                    # Upload the dataset to the sandbox (assuming the sandbox requires a path)
                    dataset_path = upload_dataset(uploaded_file)
                    
                    # Pass dataset_path to chat_with_llm for code generation
                    code_results, llm_response = chat_with_llm(query, dataset_path)
                    
                    # Display AI's text response
                    st.write("AI Response:")
                    st.write(llm_response)
                    
                    # Handle and display results
                    if code_results:
                        for result in code_results:
                            if hasattr(result, 'png') and result.png:  # Check if PNG data is available
                                # Decode the base64-encoded PNG data and display the image
                                png_data = base64.b64decode(result.png)
                                image = Image.open(BytesIO(png_data))
                                st.image(image, caption="Generated Visualization", use_container_width=False)
                            elif hasattr(result, 'figure'):  # For matplotlib figures
                                fig = result.figure  # Extract matplotlib figure
                                st.pyplot(fig)  # Display using st.pyplot
                            elif hasattr(result, 'show'):  # For plotly figures
                                st.plotly_chart(result)
                            elif isinstance(result, (pd.DataFrame, pd.Series)):
                                st.dataframe(result)
                            else:
                                st.write(result)

if __name__ == "__main__":
    main()


2025-01-23 00:55:40.396 
  command:

    streamlit run c:\Users\Chinelo\anaconda3\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2025-01-23 00:55:40.401 Session state does not function when running a script without `streamlit run`
