<a href="https://colab.research.google.com/github/SAHIL9581/Agentic_model_main/blob/main/updated3_agentic_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
!pip install -q fastapi uvicorn pandas google-generativeai langchain-google-genai langchain-core langchain-experimental

In [7]:
import pandas as pd
import io
import os
from typing import List, Dict

from google.colab import files
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.tools import tool

import warnings
warnings.filterwarnings("ignore")

In [8]:
# This global dictionary will hold the content of uploaded files.
UPLOADED_FILES_CONTENT = {}
# This global variable will hold the active DataFrame for our new tool.
ACTIVE_DF = None

# --- Tool 1: For the original agentic search ---
class FileInspectionArgs(BaseModel):
    """Input schema for the file_inspector tool."""
    filename: str = Field(description="The name of the file to inspect from the list of uploaded files.")

@tool(args_schema=FileInspectionArgs)
def file_inspector(filename: str) -> str:
    """  <--- THIS DOCSTRING WAS MISSING
    Reads and returns the first 5 lines of a specified uploaded file (CSV or TXT).
    This helps in understanding the file's structure and content, especially for identifying columns.
    """
    if filename not in UPLOADED_FILES_CONTENT: return f"Error: File '{filename}' not found."
    content_bytes = UPLOADED_FILES_CONTENT[filename]
    try: content_str = content_bytes.decode('utf-8')
    except UnicodeDecodeError: content_str = content_bytes.decode('latin1')
    try:
        df = pd.read_csv(io.StringIO(content_str), on_bad_lines='skip', sep=None, engine='python', nrows=5)
        return f"Successfully read the first 5 rows of '{filename}':\n\n{df.to_string()}"
    except Exception as e:
        first_lines = "\n".join(content_str.splitlines()[:5])
        return f"Could not parse '{filename}' as a CSV, but here are the first 5 lines:\n\n{first_lines}"

# --- Tool 2: The NEW Code Interpreter for deep analysis ---
class CodeInterpreterArgs(BaseModel):
    """Input schema for the python_data_analyst tool."""
    pandas_command: str = Field(description="A single-line Python command using the 'df' variable to query the loaded pandas DataFrame.")

@tool(args_schema=CodeInterpreterArgs)
def python_data_analyst(pandas_command: str) -> str:
    """
    Executes a pandas command on the loaded DataFrame 'df' and returns the result.
    Use this for any questions that require filtering, calculating, or specific data lookups.
    Example: To find unique operators, the command would be "df['Operator'].unique()"
    """
    global ACTIVE_DF
    if ACTIVE_DF is None:
        return "Error: No DataFrame is currently loaded. Please load a file first."

    df = ACTIVE_DF # Use the globally loaded DataFrame

    try:
        print(f"⚙️ Executing command: {pandas_command}")
        local_scope = {}
        exec(f"result = {pandas_command}", {'df': df}, local_scope)
        result = local_scope.get('result', "Command executed, but no result was returned.")
        return f"Command execution successful. Result:\n{str(result)}"
    except Exception as e:
        return f"Error executing command: {e}. Please check your pandas command syntax."

In [9]:
# --- Authentication ---
if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = input("Enter your Google AI API key: ")  # Use input() for local testing

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash-latest",
    temperature=0, # Low temperature for more predictable, factual analysis
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

UPLOADED_FILES_CONTENT = {}  # Global variable to store uploaded file contents

In [10]:
def upload_files():
    """Uploads one or more files."""
    global UPLOADED_FILES_CONTENT
    print("Please upload your CSV and/or TXT files:")
    uploaded = files.upload()

    if not uploaded:
        print("No files were uploaded.")
        return

    UPLOADED_FILES_CONTENT = uploaded
    filenames = list(uploaded.keys())
    print(f"Successfully uploaded {len(filenames)} files: {', '.join(filenames)}")
    return filenames

def analyze_folder():
    """Handles uploading a folder of files, using an LLM agent to identify relevant
    files, and then allowing the user to select one for detailed analysis."""
    global UPLOADED_FILES_CONTENT
    filenames = upload_files()
    if not filenames:
        return

    print("\n🤖 Asking AI agent to identify files with map data (lat, long, depth)...")

    llm_with_tools = llm.bind_tools([file_inspector])

    prompt = f"""
    You are a data analyst agent. Your task is to identify which of the following files are most likely to contain geographical map data.
    You are specifically looking for columns that contain latitude, longitude, lat, long, or depth information.

    To do this, you MUST use the `file_inspector` tool to examine the first few lines of each file.
    After inspecting the files, state which file or files are the most relevant and explain your reasoning.

    Here are the available files: {filenames}
    """

    agent_response = llm_with_tools.invoke(prompt)

    print("\n🤖 Agent's Recommendation:\n")
    print(agent_response.content)

    print("\n📋 Previews of all uploaded files (first 5 lines):")
    all_dfs = {}
    for filename in filenames:
        print(f"\n--- {filename} ---")
        try:
            content_bytes = UPLOADED_FILES_CONTENT[filename]
            try:
                df = pd.read_csv(io.StringIO(content_bytes.decode('utf-8')), on_bad_lines='skip', sep=None, engine='python')
            except UnicodeDecodeError:
                df = pd.read_csv(io.StringIO(content_bytes.decode('latin1')), on_bad_lines='skip', sep=None, engine='python')

            all_dfs[filename] = df
            print(df.head())
        except Exception as e:
            all_dfs[filename] = None
            print(f"Could not display '{filename}' as a table. Error: {e}")
            print("Showing raw text instead:")
            print(content_bytes[:200].decode('utf-8', errors='ignore') + "...")

    while True:
        choice = input("\nEnter the name of the file you want to analyze in detail (or 'back'): ").strip()
        if choice.lower() == 'back':
            break
        if choice in all_dfs and all_dfs[choice] is not None:
            chat_with_dataframe(all_dfs[choice], choice)
            break
        elif choice in all_dfs and all_dfs[choice] is None:
            print(f"❌ Cannot analyze '{choice}' as it could not be parsed into a table.")
        else:
            print("❌ Invalid filename. Please enter one of the uploaded file names.")

def analyze_multiple_files():
    """Allows the user to upload multiple files and ask questions that can be
    answered by combining information from all uploaded files."""
    global UPLOADED_FILES_CONTENT
    filenames = upload_files()
    if not filenames:
        return

    all_dfs = {}
    for filename in filenames:
        try:
            content_bytes = UPLOADED_FILES_CONTENT[filename]
            try:
                df = pd.read_csv(io.StringIO(content_bytes.decode('utf-8')), on_bad_lines='skip', sep=None, engine='python')
            except UnicodeDecodeError:
                df = pd.read_csv(io.StringIO(content_bytes.decode('latin1')), on_bad_lines='skip', sep=None, engine='python')
            all_dfs[filename] = df
        except Exception as e:
            all_dfs[filename] = None
            print(f"Could not parse '{filename}'. Error: {e}")

    while True:
        question = input("\nEnter your question (or 'back'): ").strip()
        if question.lower() == 'back':
            break

        # Construct the prompt with information about all files
        prompt = "You are a data analyst.  You have access to the following dataframes:\n\n"
        for filename, df in all_dfs.items():
            if df is not None:
                prompt += f"--- {filename} ---\n"
                prompt += df.head(5).to_string() + "\n\n"
            else:
                prompt += f"--- {filename} --- (Could not be parsed as a dataframe)\n\n"

        prompt += f"Question: {question}\n"
        prompt += "Provide a concise and thorough answer based on the available data. If the question cannot be answered, explain why."

        print("\n🔍 Analyzing...")
        response = llm.invoke(prompt)
        print("\n🤖 Analysis Results:\n")
        print(response.content)

def chat_with_dataframe(df: pd.DataFrame, filename: str):
    """Handles the interactive Q&A for a single, chosen dataframe."""
    print("\n" + "="*50)
    print(f"🔬 Now analyzing '{filename}' in detail.")
    print(f"Shape of the data: {df.shape}")
    print("\nFirst 5 rows:")
    print(df.head())

    max_rows_to_send = min(100, len(df))
    data_sample = df.head(max_rows_to_send).to_string() # index=True can be helpful

    print("\nYou can now ask detailed questions about this specific dataset.")
    print("Type 'back' to return to the main menu.")

    while True:
        question = input(f"\n💬 Enter your question about '{filename}' (or 'back'): ")
        if question.lower() == 'back':
            break

        prompt = f"""
        You are a helpful data analyst. Be concise but thorough.

        You are analyzing the file named '{filename}'.
        Here is a sample of the data (first {max_rows_to_send} rows):

        {data_sample}

        Question: {question}

        Provide your analysis based on this data. If the question can't be answered
        from the data, explain why and suggest what additional data would be needed.
        """
        print("\n🔍 Analyzing...")
        response = llm.invoke(prompt)
        print("\n🤖 Analysis Results:\n")
        print(response.content)

def python_data_analyst(pandas_command: str):
    """Executes a pandas command."""
    global ACTIVE_DF
    if ACTIVE_DF is None:
        return "Error: No DataFrame is currently loaded. Please load a file first."

    df = ACTIVE_DF # Use the globally loaded DataFrame

    try:
        print(f"⚙️ Executing command: {pandas_command}")
        local_scope = {}
        exec(f"result = {pandas_command}", {'df': df}, local_scope)
        result = local_scope.get('result', "Command executed, but no result was returned.")
        return f"Command execution successful. Result:\n{str(result)}"
    except Exception as e:
        return f"Error executing command: {e}. Please check your pandas command syntax."

In [11]:
def main():
    """The main entry point of the application."""
    while True:
        print("\n===== Gemini AI Assistant =====")
        print("1. Analyze a Folder of Data Files (CSV/TXT)")
        print("2. Analyze Multiple Files and Combine Data")
        print("3. General Chat with Gemini")
        print("4. Execute Pandas Command")
        print("5. Exit")

        choice = input("Select an option (1-5): ").strip()

        if choice == "1":
            analyze_folder()
        elif choice == "2":
            analyze_multiple_files()
        elif choice == "3":
            question = input("\n💬 Enter your question: ").strip()
            if not question: continue
            print("\n🤖 Thinking...")
            response = llm.invoke(question)
            print("\nResponse:\n")
            print(response.content)
        elif choice == "4":
            command = input("\nEnter a pandas command (e.g., df['column'].value_counts()): ").strip()
            result = python_data_analyst(command)
            print(result)
        elif choice == "5":
            print("\nGoodbye!")
            break
        else:
            print("Invalid choice. Please try again.")

if __name__ == "__main__":
    main()


===== Gemini AI Assistant =====
1. Analyze a Folder of Data Files (CSV/TXT)
2. Analyze Multiple Files and Combine Data
3. General Chat with Gemini
4. Execute Pandas Command
5. Exit
Select an option (1-5): 2
Please upload your CSV and/or TXT files:


Saving Well data.CSV to Well data.CSV
Saving Formation tops.TXT to Formation tops.TXT
Successfully uploaded 2 files: Well data.CSV, Formation tops.TXT

Enter your question (or 'back'): what is this data about

🔍 Analyzing...

🤖 Analysis Results:

The data describes oil and gas wells.  The `well data.CSV` file contains information about individual wells, including their location, operator, completion date, production data (oil, gas, water), and status.  The `Formation tops.TXT` file provides formation top depths (measured depth, MD) for specific wells, identified by their UWI (API number).  The data appears to be from Texas, based on the presence of Texas-specific fields in `well data.CSV`.

Enter your question (or 'back'): now give me some important datas to be known

🔍 Analyzing...

🤖 Analysis Results:

Based on the provided data, here are some important data points:

* **Well Status and Activity:** The datasets show various well statuses (e.g., ABD-GW, TA-OIL, SWDOP, D&A) and activit

Saving Well data.CSV to Well data (1).CSV
Saving Formation tops.TXT to Formation tops (1).TXT
Successfully uploaded 2 files: Well data (1).CSV, Formation tops (1).TXT

🤖 Asking AI agent to identify files with map data (lat, long, depth)...

🤖 Agent's Recommendation:



📋 Previews of all uploaded files (first 5 lines):

--- Well data (1).CSV ---
    WSN    UWI (APINum) Well Number Well Name    Well Label Sym Code  \
0  2492  42001004370000         6-B       NaN  4.200100e+13  PLUGGAS   
1  2493  42001004380000         5-B       NaN  4.200100e+13   TA-OIL   
2  2706  42001300900000       SWD-1       NaN  4.200130e+13    SWDOP   
3  2712  42001300950000           1       NaN  4.200130e+13      DRY   
4  2741  42001301360000           1       NaN  4.200130e+13      DRY   

               Operator           Hist Oper          Lease Name Lease Nbr  \
0  YOUNG MARSHALL R DRL  ROESER & PENDLETON         L C BILLETT       6-B   
1    ROESER & PENDLETON  ROESER & PENDLETON         BILLETT L R   

Saving Well data.CSV to Well data (2).CSV
Successfully uploaded 1 files: Well data (2).CSV

🤖 Asking AI agent to identify files with map data (lat, long, depth)...

🤖 Agent's Recommendation:



📋 Previews of all uploaded files (first 5 lines):

--- Well data (2).CSV ---
    WSN    UWI (APINum) Well Number Well Name    Well Label Sym Code  \
0  2492  42001004370000         6-B       NaN  4.200100e+13  PLUGGAS   
1  2493  42001004380000         5-B       NaN  4.200100e+13   TA-OIL   
2  2706  42001300900000       SWD-1       NaN  4.200130e+13    SWDOP   
3  2712  42001300950000           1       NaN  4.200130e+13      DRY   
4  2741  42001301360000           1       NaN  4.200130e+13      DRY   

               Operator           Hist Oper          Lease Name Lease Nbr  \
0  YOUNG MARSHALL R DRL  ROESER & PENDLETON         L C BILLETT       6-B   
1    ROESER & PENDLETON  ROESER & PENDLETON         BILLETT L R       5-B   
2     ENR OPERATING LLC  FAULCONER VERNON E        WATHEN BEN H  