In [1]:
import os
import sys
import pandas as pd
import requests
from pandasai import SmartDataframe, SmartDatalake
from pandasai.llm.local_llm import LocalLLM 


In [2]:
# if sys.stdout.encoding != 'utf-8':
#     sys.stdout.reconfigure(encoding='utf-8')

API_BASE = "https://api.sambanova.ai/v1"
MODEL = "Meta-Llama-3.1-70B-Instruct"
API_KEY = "540f8914-997e-46c6-829a-ff76f5d4d265"

In [3]:
def load_file(file_path):
    try:
        if file_path.endswith('.csv'):
            return pd.read_csv(file_path, encoding='utf-8')
        elif file_path.endswith(('.xlsx', '.xls')):
            return pd.read_excel(file_path)
        else:
            print("Unsupported file format. Please use CSV or Excel files.")
            return None
    except Exception as e:
        print(f"Error loading file: {str(e)}")
        return None

In [4]:
def load_multiple_files():
    dataframes = []
    while True:
        file_path = input("Enter File Path (or 'done' when finished): ").strip()
        if file_path.lower() == 'done':
            break
        
        df = load_file(file_path)
        if df is not None:
            dataframes.append(df)
            print(f"Successfully loaded: {file_path}")
    
    return dataframes

In [5]:
def process_with_llama(original_data, query, query_result):
    """
    Process data with LLaMA 70B model and return modified data
    """
    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type": "application/json"
    }
    
    prompt = f"""Based on the following data and query, provide a MODIFIED version of the original data that addresses the query. Return ONLY the modified data in a table format.

Original Data:
{original_data.to_string()}

User Query:
{query}

Initial Query Result:
{query_result.to_string() if isinstance(query_result, pd.DataFrame) else str(query_result)}

IMPORTANT INSTRUCTIONS:
1. Return the complete modified dataset as a table
2. Include ALL relevant columns from the original data
3. Make necessary modifications based on the query
4. Use the same column names as the original data
5. Maintain proper data types for each column
6. Use '|' as column separator
7. Include a header row

Return ONLY the modified table with no additional text."""

    payload = {
        "messages": [
            {
                "role": "system",
                "content": "You are a data transformation assistant. Return only the modified data table with the same structure as the input data."
            },
            {
                "role": "user",
                "content": prompt
            }
        ],
        "model": MODEL,
        "max_tokens": 4000,
        "temperature": 0.3  
    }

    try:
        response = requests.post(f"{API_BASE}/chat/completions", headers=headers, json=payload)
        if response.status_code != 200:
            print(f"API Error: {response.status_code}")
            print(f"Response: {response.text}")
            return None
        
        result = response.json()['choices'][0]['message']['content']
        table_lines = [line.strip() for line in result.strip().split('\n') if '|' in line]
        if not table_lines:
            return None
            
        # Convert the table string back to DataFrame
        try:
            # Split the header and clean it
            headers = [col.strip() for col in table_lines[0].split('|') if col.strip()]
            
            # Process data rows
            data = []
            for line in table_lines[2:]:  # Skip header and separator line
                row = [cell.strip() for cell in line.split('|') if cell.strip()]
                if row:  # Only add non-empty rows
                    data.append(row)
            
            # Create DataFrame with the same structure as original
            modified_df = pd.DataFrame(data, columns=headers)
            
            # Convert data types to match original DataFrame
            for col in modified_df.columns:
                if col in original_data.columns:
                    try:
                        modified_df[col] = modified_df[col].astype(original_data[col].dtype)
                    except:
                        pass  # Keep original type if conversion fails
            
            return modified_df
            
        except Exception as e:
            print(f"Error converting table to DataFrame: {str(e)}")
            return None
            
    except Exception as e:
        print(f"Error in LLaMA processing: {str(e)}")
        return None

In [6]:
def main():
    llm = LocalLLM(api_base=API_BASE, model=MODEL, api_key=API_KEY)
    
    print("Please enter file paths one at a time. Type 'done' when finished.")
    dataframes = load_multiple_files()
    
    if not dataframes:
        print("No valid files were loaded. Exiting.")
        return

    combined_df = pd.concat(dataframes, axis=0, ignore_index=True)
    
    custom_prompt = """You are a helpful assistant capable of processing and returning data in tabular form. Whenever a user asks for information, process the query and return the result as a well-structured table. Please ensure that the table includes the relevant columns, data, and is easy to read.

            Structure: Format the result as a table with clear headers.
            Data Representation: Ensure the data is returned in a structured, tabular form, with appropriate rows and columns.
            Context Understanding: If the query is about filtering, sorting, or aggregating data, return the modified table as per the user's request.
            Handle Missing Data: If applicable, indicate missing data as NaN or similar.
            Respond with the Table Only: Focus on returning the table, without unnecessary explanation.
    Query: {question}
    Please provide a clear and relevant response."""
    
    smart_df = SmartDatalake(
        combined_df, 
        config={
            "llm": llm,
            "prompt_template": custom_prompt
        }
    )

    while True:
        query = input("Enter Query (or 'exit' to quit): ").strip()
        if query.lower() == 'exit':
            break
        else:
            try:
                query_result = smart_df.chat(query)
                print("\nInitial Query Result:", query_result)
                
                modified_df = process_with_llama(combined_df, query, query_result)
                
                if modified_df is not None:
                    print("\nModified Data from LLaMA:")
                    print(modified_df.to_string())
                    # Update the combined_df with modified data
                    combined_df = modified_df
                    print("\nData has been updated with the modifications.")
                else:
                    print("\nCould not get modified data from LLaMA.")
                    
            except Exception as e:
                error_msg = str(e).encode('utf-8', errors='replace').decode('utf-8')
                print(f"Error: {error_msg}")

In [8]:
main()

Please enter file paths one at a time. Type 'done' when finished.
Successfully loaded: SuperApp-sampledata(in).csv

Initial Query Result:              Application                                       Supports  \
0               crm plus  sales operations, marketing, customer service   
1              hr portal                                human resources   
2            finance pro                  finance, accounting, treasury   
3   supply chain manager             operations, logistics, procurement   
4    marketing analytics                                      marketing   
..                   ...                                            ...   
61          voice system                             it, communications   
62      password manager                                   security, it   
63    quality management                              manufacturing, qa   
64        reporting tool                                all departments   
65      ticketing system             