In [None]:
!pip install pandas streamlit together

Collecting streamlit
  Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting together
  Downloading together-1.3.13-py3-none-any.whl.metadata (11 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m582.4 kB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting pillow<12,>=7.1.0 (from streamlit)
  Downloading pillow-10.4.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.2 kB)
Downloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.1/9.1 MB[0m [31m54.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading together-1.3.13-py3-none-any.whl (70 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.6/70.6 kB[0m [31m5.0 MB/s[0m e

In [1]:
# Step 2: Import libraries
import re
import pandas as pd
import streamlit as st
from together import Together
import warnings
from typing import Optional, List, Any, Tuple

warnings.filterwarnings("ignore", category=UserWarning, module="pydantic")

In [6]:
# Step 3: Define helper functions
pattern = re.compile(r"```python\n(.*?)\n```", re.DOTALL)

# Function to extract Python code from the model's response
def match_code_blocks(llm_response: str) -> str:
    match = pattern.search(llm_response)
    if match:
        code = match.group(1)
        return code
    return ""

# Function to interact with the LLM model
def chat_with_llm(user_message: str, dataset_path: str) -> Tuple[Optional[List[Any]], str]:
    system_prompt = f"""You're a Python data scientist and data visualization expert. You are given a dataset at path '{dataset_path}' and also the user's query.
You need to analyze the dataset and answer the user's query with a response and you run Python code to solve them.
IMPORTANT: Always use the dataset path variable '{dataset_path}' in your code when reading the CSV file."""

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_message},
    ]

    # Replace with actual API key
    client = Together(api_key="API_HERE")  # Replace with your key
    response = client.chat.completions.create(
        model="meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",  # Replace with your chosen model
        messages=messages,
    )

    response_message = response.choices[0].message
    python_code = match_code_blocks(response_message.content)

    if python_code:
        return python_code, response_message.content
    else:
        return None, response_message.content


In [7]:
# Step 4: Upload dataset and read it
# Example CSV
uploaded_file = 'sample_data/california_housing_test.csv'  # Replace this with the correct path

df = pd.read_csv(uploaded_file)
df.head()  # Display the first 5 rows


Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
0,-122.05,37.37,27.0,3885.0,661.0,1537.0,606.0,6.6085,344700.0
1,-118.3,34.26,43.0,1510.0,310.0,809.0,277.0,3.599,176500.0
2,-117.81,33.78,27.0,3589.0,507.0,1484.0,495.0,5.7934,270500.0
3,-118.36,33.82,28.0,67.0,15.0,49.0,11.0,6.1359,330000.0
4,-119.67,36.33,19.0,1241.0,244.0,850.0,237.0,2.9375,81700.0


In [8]:
# Step 5: Query input
query = "Show some possible insights from dataset to get some trends?"  # Replace with your actual query


In [9]:
# Step 6: Upload dataset and interact with LLM
dataset_path = uploaded_file
code, llm_response = chat_with_llm(query, dataset_path)

print("AI Response:")
print(llm_response)

# Display the generated code
if code:
    print("Generated Python Code:")
    print(code)


AI Response:
To gain insights and identify trends in the California housing dataset, we can perform exploratory data analysis and visualization. Here's a Python code snippet that uses pandas, matplotlib, and seaborn libraries to analyze the dataset:

```python
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Read the dataset
data = pd.read_csv('sample_data/california_housing_test.csv')

# Display the first few rows of the dataset
print(data.head())

# Get the summary statistics of the dataset
print(data.describe())

# Check for missing values
print(data.isnull().sum())

# Visualize the distribution of the target variable (median_house_value)
plt.figure(figsize=(10, 6))
sns.histplot(data['median_house_value'], bins=50)
plt.title('Distribution of Median House Value')
plt.xlabel('Median House Value')
plt.ylabel('Frequency')
plt.show()

# Visualize the relationship between median_house_value and other features
plt.figure(figsize=(12,