In [12]:
import os
import argparse
from crewai import Agent, Task, Crew, Process
import pandas as pd
import json
import sys
import re
from litellm import completion
from llama_cpp import Llama
import litellm

# Initialize Llama model
def get_llm():
    llm = Llama(
        model_path="/Users/idks/.ollama/models/blobs/sha256-4824460d29f2058aaf6e1118a63a7a197a09bed509f0e7d4e2efb1ee273b447d",
        n_ctx=4096,
        n_batch=512,
        n_gpu_layers=0  # Set higher if you have GPU support
    )
    return llm

# Create a wrapper class for llama-cpp-python to work with CrewAI
class LlamaLocal:
    def __init__(self, llm):
        self.llm = llm
    
    def __call__(self, prompt, **kwargs):
        response = self.llm(
            prompt,
            max_tokens=16434,
            temperature=0.7,
            top_p=0.95,
            echo=False
        )
        # Extract the generated text
        return response['choices'][0]['text']

# Patch litellm.completion to handle LlamaLocal
original_completion = litellm.completion

def patched_completion(model, messages, **kwargs):
    if isinstance(model, LlamaLocal):
        # Convert messages to a single prompt
        prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])
        # Call the model
        response = model.llm(
            prompt,
            max_tokens=kwargs.get('max_tokens', 16434),
            temperature=kwargs.get('temperature', 0.7),
            top_p=kwargs.get('top_p', 0.95),
            echo=False
        )
        # Return the response as is (matches litellm's expected format)
        return response
    else:
        return original_completion(model, messages, **kwargs)

litellm.completion = patched_completion

# CSV data loader
class CSVDataLoader:
    def load_csv(self, file_path):
        try:
            return pd.read_csv(file_path)
        except Exception as e:
            return f"Error loading CSV: {str(e)}"
    
    def get_csv_info(self, df):
        # Basic info about the dataframe
        info = {
            "columns": list(df.columns),
            "shape": df.shape,
            "missing_values": df.isnull().sum().to_dict(),
        }
        
        # Add numerical column statistics if any exist
        numerical_columns = df.select_dtypes(include=['number']).columns.tolist()
        if numerical_columns:
            info["numerical_stats"] = df[numerical_columns].describe().to_dict()
        
        # Add categorical column info if any exist
        categorical_columns = df.select_dtypes(include=['object', 'category']).columns.tolist()
        if categorical_columns:
            cat_info = {}
            for col in categorical_columns:
                if len(df[col].unique()) < 20:  # Only for columns with reasonable number of categories
                    cat_info[col] = df[col].value_counts().head(10).to_dict()
            info["categorical_stats"] = cat_info
            
        return info

def custom_task_analysis(file_path, question):
    """
    Run a custom analysis task based on a specific question
    """
    print(f"Loading model and initializing agent for custom analysis...")
    
    # Initialize our components
    llm_model = get_llm()
    llm = LlamaLocal(llm_model)
    csv_loader = CSVDataLoader()
    
    # Define a specific analyst agent for this task
    task_analyst = Agent(
        role="Data Analyst",
        goal=f"Answer the specific question about the CSV data: {question}",
        backstory="You are an expert data analyst who specializes in answering specific questions about datasets with precision and depth.",
        llm=llm,
        verbose=True
    )
    
    print(f"Loading CSV file: {file_path}")
    # Load the CSV file
    df = csv_loader.load_csv(file_path)
    if isinstance(df, str):
        return df  # Return error message if loading failed
    
    # Get basic information about the CSV
    csv_info = csv_loader.get_csv_info(df)
    csv_info_str = json.dumps(csv_info, indent=2)
    csv_sample = df.head(10).to_string()
    
    print(csv_info_str)
    
    # Define the specific task
    analysis_task = Task(
        description=f"""
        Answer the following question about this CSV dataset:
        
        QUESTION: {question}
        
        CSV Info: {csv_info_str}
        
        Sample Data:
        {csv_sample}
        
        Please provide a thorough answer that:
        1. Directly addresses the specific question
        2. Uses data from the CSV to support your answer
        3. Includes relevant statistics or calculations if needed
        4. Explains your reasoning clearly
        5. Identifies any limitations in the data that affect your answer
        """,
        agent=task_analyst,
        expected_output="A detailed answer to the specific question about the CSV data, supported by evidence from the dataset."
    )
    
    # Create and run a simple crew with just this task
    print("Starting custom analysis...")
    crew = Crew(
        agents=[task_analyst],
        tasks=[analysis_task],
        verbose=True,
        process=Process.sequential
    )
    
    result = crew.kickoff()
    
    return result

if __name__ == "__main__":
    print("CSV Analysis Tool with Crew AI and Llama 3.3")
    print("===========================================")
    
    result = custom_task_analysis("reports.csv", "What is the Channel Name with most Total Viewership Minutes")
    print(result)

llama_model_load_from_file_impl: using device Metal (Apple M4 Pro) - 36843 MiB free
llama_model_loader: loaded meta data with 36 key-value pairs and 724 tensors from /Users/idks/.ollama/models/blobs/sha256-4824460d29f2058aaf6e1118a63a7a197a09bed509f0e7d4e2efb1ee273b447d (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Llama 3.1 70B Instruct 2024 12
llama_model_loader: - kv   3:                            general.version str              = 2024-12
llama_model_loader: - kv   4:                           general.finetune str              = Instruct
llama_model_loader: - kv   5:                           general.basename str        

CSV Analysis Tool with Crew AI and Llama 3.3
Loading model and initializing agent for custom analysis...


load_tensors: layer  43 assigned to device CPU
load_tensors: layer  44 assigned to device CPU
load_tensors: layer  45 assigned to device CPU
load_tensors: layer  46 assigned to device CPU
load_tensors: layer  47 assigned to device CPU
load_tensors: layer  48 assigned to device CPU
load_tensors: layer  49 assigned to device CPU
load_tensors: layer  50 assigned to device CPU
load_tensors: layer  51 assigned to device CPU
load_tensors: layer  52 assigned to device CPU
load_tensors: layer  53 assigned to device CPU
load_tensors: layer  54 assigned to device CPU
load_tensors: layer  55 assigned to device CPU
load_tensors: layer  56 assigned to device CPU
load_tensors: layer  57 assigned to device CPU
load_tensors: layer  58 assigned to device CPU
load_tensors: layer  59 assigned to device CPU
load_tensors: layer  60 assigned to device CPU
load_tensors: layer  61 assigned to device CPU
load_tensors: layer  62 assigned to device CPU
load_tensors: layer  63 assigned to device CPU
load_tensors:


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m

Loading CSV file: reports.csv
{
  "columns": [
    "Date",
    "Channel Name",
    "Session Count",
    "Total Viewership Minutes",
    "Unique Viewers",
    "Avg Session Count",
    "Avg Session Duration Per Session",
    "Avg Session Duration Per Viewer"
  ],
  "shape": [
    99,
    8
  ],
  "missing_values": {
    "Date": 0,
    "Channel Name": 0,
    "Session Count": 0,
    "Total Viewership Minutes": 0,
    "Unique Viewers": 0,
    "Avg Session Count": 0,
    "Avg Session Duration Per Session": 0,
    "Avg Session Duration Per Viewer": 0
  },
  "numerical_stats": {
    "Session Count": {
      "count": 99.0,
      "mean": 2996.282828282828,
      "std": 6032.541729247323,
      "min": 1.0,
      "25%": 34.0,
      "50%": 274.0,
      "75%": 2710.5,
      "max": 37706.0
    },
    "Total Viewership Minutes": {
      "count": 99.0,
      "mean": 5833.946262626263,
      "std": 12673.649496043612,
      "min": 0.03,


BadRequestError: litellm.BadRequestError: LLM Provider NOT provided. Pass in the LLM provider you are trying to call. You passed model=<__main__.LlamaLocal object at 0x11e654710>
 Pass model as E.g. For 'Huggingface' inference endpoints pass in `completion(model='huggingface/starcoder',..)` Learn more: https://docs.litellm.ai/docs/providers