<a href="https://colab.research.google.com/github/SAHIL9581/Agentic_model_main/blob/main/updated1_agentic_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Cell 1: Installation
!pip install -q pandas google-generativeai langchain-google-genai langchain-core

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.0/42.0 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
# Cell 2: Imports & API Key Setup

import pandas as pd
import io
import getpass
import os
from IPython.display import display
from google.colab import files

from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.tools import tool

import warnings
warnings.filterwarnings("ignore")

# --- Authentication ---
if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter your Google AI API key: ")


For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  exec(code_obj, self.user_global_ns, self.user_ns)


Enter your Google AI API key: ··········


In [None]:
# Cell 3: Tool Definition (File Inspector)

# This global dictionary will hold the content of uploaded files,
# making it easily accessible to our tool.
UPLOADED_FILES_CONTENT = {}

class FileInspectionArgs(BaseModel):
    """Input schema for the file_inspector tool."""
    filename: str = Field(description="The name of the file to inspect from the list of uploaded files.")

@tool(args_schema=FileInspectionArgs)
def file_inspector(filename: str) -> str:
    """
    Reads and returns the first 5 lines of a specified uploaded file (CSV or TXT).
    This helps in understanding the file's structure and content, especially for identifying columns.
    """
    if filename not in UPLOADED_FILES_CONTENT:
        return f"Error: File '{filename}' not found. Please choose from the available files."

    content_bytes = UPLOADED_FILES_CONTENT[filename]
    try:
        # Use io.StringIO to treat the byte string as a file
        content_str = content_bytes.decode('utf-8')
    except UnicodeDecodeError:
        content_str = content_bytes.decode('latin1')

    try:
        # Use pandas to robustly read the first few lines
        df = pd.read_csv(io.StringIO(content_str), on_bad_lines='skip', sep=None, engine='python', nrows=5)
        return f"Successfully read the first 5 rows of '{filename}':\n\n{df.to_string()}"
    except Exception as e:
        # Fallback for non-tabular data or other pandas errors
        first_lines = "\n".join(content_str.splitlines()[:5])
        return f"Could not parse '{filename}' as a CSV, but here are the first 5 lines:\n\n{first_lines}"

In [None]:
# Cell 4: LLM Initialization

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash-latest",
    temperature=0, # Low temperature for more predictable, factual analysis
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

In [None]:
# Cell 5: Core Application Functions

def chat_with_dataframe(df: pd.DataFrame, filename: str):
    """Handles the interactive Q&A for a single, chosen dataframe."""
    print("\n" + "="*50)
    print(f"🔬 Now analyzing '{filename}' in detail.")
    print(f"Shape of the data: {df.shape}")
    print("\nFirst 5 rows:")
    display(df.head())

    max_rows_to_send = min(100, len(df))
    data_sample = df.head(max_rows_to_send).to_string() # index=True can be helpful

    print("\nYou can now ask detailed questions about this specific dataset.")
    print("Type 'back' to return to the main menu.")

    while True:
        question = input(f"\n💬 Enter your question about '{filename}' (or 'back'): ")
        if question.lower() == 'back':
            break

        prompt = f"""
        You are a helpful data analyst. Be concise but thorough.

        You are analyzing the file named '{filename}'.
        Here is a sample of the data (first {max_rows_to_send} rows):

        {data_sample}

        Question: {question}

        Provide your analysis based on this data. If the question can't be answered
        from the data, explain why and suggest what additional data would be needed.
        """
        print("\n🔍 Analyzing...")
        response = llm.invoke(prompt)
        print("\n🤖 Analysis Results:\n")
        print(response.content)

def analyze_folder():
    """
    Handles uploading a folder of files, using an LLM agent to identify relevant
    files, and then allowing the user to select one for detailed analysis.
    """
    global UPLOADED_FILES_CONTENT
    print("\nPlease upload all your CSV and/or TXT files:")
    uploaded = files.upload()

    if not uploaded:
        print("No files were uploaded. Returning to main menu.")
        return

    UPLOADED_FILES_CONTENT = uploaded
    filenames = list(uploaded.keys())
    print(f"\n✅ Successfully uploaded {len(filenames)} files: {', '.join(filenames)}")

    print("\n🤖 Asking AI agent to identify files with map data (lat, long, depth)...")

    llm_with_tools = llm.bind_tools([file_inspector])

    prompt = f"""
    You are a data analyst agent. Your task is to identify which of the following files are most likely to contain geographical map data.
    You are specifically looking for columns that contain latitude, longitude, lat, long, or depth information.

    To do this, you MUST use the `file_inspector` tool to examine the first few lines of each file.
    After inspecting the files, state which file or files are the most relevant and explain your reasoning.

    Here are the available files: {filenames}
    """

    agent_response = llm_with_tools.invoke(prompt)

    print("\n🤖 Agent's Recommendation:\n")
    print(agent_response.content)

    print("\n" + "="*50)
    print("📋 Previews of all uploaded files (first 5 lines):")
    all_dfs = {}
    for filename in filenames:
        print(f"\n--- {filename} ---")
        try:
            content_bytes = UPLOADED_FILES_CONTENT[filename]
            try:
                df = pd.read_csv(io.StringIO(content_bytes.decode('utf-8')), on_bad_lines='skip', sep=None, engine='python')
            except UnicodeDecodeError:
                df = pd.read_csv(io.StringIO(content_bytes.decode('latin1')), on_bad_lines='skip', sep=None, engine='python')

            all_dfs[filename] = df
            display(df.head())
        except Exception as e:
            all_dfs[filename] = None
            print(f"Could not display '{filename}' as a table. Error: {e}")
            print("Showing raw text instead:")
            print(content_bytes[:200].decode('utf-8', errors='ignore') + "...")
    print("\n" + "="*50)

    while True:
        choice = input("\nEnter the name of the file you want to analyze in detail (or 'back'): ").strip()
        if choice.lower() == 'back':
            break
        if choice in all_dfs and all_dfs[choice] is not None:
            chat_with_dataframe(all_dfs[choice], choice)
            break
        elif choice in all_dfs and all_dfs[choice] is None:
            print(f"❌ Cannot analyze '{choice}' as it could not be parsed into a table.")
        else:
            print("❌ Invalid filename. Please enter one of the uploaded file names.")

def main():
    """The main entry point of the application."""
    while True:
        print("\n===== Gemini AI Assistant =====")
        print("1. Analyze a Folder of Data Files (CSV/TXT)")
        print("2. General Chat with Gemini")
        print("3. Exit")

        choice = input("Select an option (1-3): ").strip()

        if choice == "1":
            analyze_folder()
        elif choice == "2":
            question = input("\n💬 Enter your question: ").strip()
            if not question: continue
            print("\n🤖 Thinking...")
            response = llm.invoke(question)
            print("\nResponse:\n")
            print(response.content)
        elif choice == "3":
            print("\nGoodbye!")
            break
        else:
            print("Invalid choice. Please try again.")

In [None]:
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash-latest",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

In [None]:
# Cell 6: Run the Application
if __name__ == "__main__":
    main()


===== Gemini AI Assistant =====
1. Analyze a Folder of Data Files (CSV/TXT)
2. General Chat with Gemini
3. Exit
Select an option (1-3): 1

Please upload all your CSV and/or TXT files:


Saving Formation tops.TXT to Formation tops.TXT
Saving Well data.CSV to Well data.CSV

✅ Successfully uploaded 2 files: Formation tops.TXT, Well data.CSV

🤖 Asking AI agent to identify files with map data (lat, long, depth)...

🤖 Agent's Recommendation:



📋 Previews of all uploaded files (first 5 lines):

--- Formation tops.TXT ---


Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26,Unnamed: 27,Unnamed: 28,Unnamed: 29,Unnamed: 30,Unnamed: 31,Unnamed: 32,Unnamed: 33,Unnamed: 34,Unnamed: 35,Unnamed: 36,Unnamed: 37,Unnamed: 38,Unnamed: 39,<-----UWI(API)----->,<----------FM,NAME----------->,<-SOURCE->,<---MD,VALUE-->
42001004370000,,,,,,,605EGFDK,,,,,,,,,,,,,,,,,,,,,,,4.0,,,,,,,,,,,,,,,3930.0
42001004370000,,,,,,,602CRCSL,,,,,,,,,,,,,,,,,,,,,,,4.0,,,,,,,,,,,,,,,4653.0
42001004370000,,,,,,,602ADSGU,,,,,,,,,,,,,,,,,,,,,,,4.0,,,,,,,,,,,,,,,6748.0
42001004370000,,,,,,,602ADSGL,,,,,,,,,,,,,,,,,,,,,,,4.0,,,,,,,,,,,,,,,7450.0
42001004370000,,,,,,,602GLRSU,,,,,,,,,,,,,,,,,,,,,,,4.0,,,,,,,,,,,,,,,6128.0



--- Well data.CSV ---


Unnamed: 0,WSN,UWI (APINum),Well Number,Well Name,Well Label,Sym Code,Operator,Hist Oper,Lease Name,Lease Nbr,...,WELL\r CUMWTR,WELL\r WHIPSTOCK,WELL\r COMP_DATE,WELL\r SPUD_DATE,WELL\r PERMIT_DATE,WELL\r ABAND_DATE,WELL\r REPORT_DATE,WELL\r RPT_DATE,WELL\r LAST_ACT_DATE,WELL\r TD_DATE
0,2492,42001004370000,6-B,,42001000000000.0,PLUGGAS,YOUNG MARSHALL R DRL,ROESER & PENDLETON,L C BILLETT,6-B,...,,,11/16/1943,08/09/1943,07/30/1943,11/08/1961,04/01/1977,,06/30/2022,
1,2493,42001004380000,5-B,,42001000000000.0,TA-OIL,ROESER & PENDLETON,ROESER & PENDLETON,BILLETT L R,5-B,...,,,08/25/1936,08/04/1936,07/25/1936,,12/29/1992,,06/30/2022,
2,2706,42001300900000,SWD-1,,42001300000000.0,SWDOP,ENR OPERATING LLC,FAULCONER VERNON E,WATHEN BEN H,SWD-1,...,,,10/17/1969,10/10/1969,09/22/1969,,11/11/2009,,11/02/2023,
3,2712,42001300950000,1,,42001300000000.0,DRY,SPENCE RALPH,SPENCE RALPH,EZEM G SCARBOROUGH,1,...,,,11/27/1969,10/24/1969,10/14/1969,,12/01/1969,,04/14/2021,
4,2741,42001301360000,1,,42001300000000.0,DRY,FARISH W S,FARISH W S,O L ELLIS,1,...,,,02/15/1970,02/05/1970,01/26/1970,,03/01/1970,,02/24/2015,




Enter the name of the file you want to analyze in detail (or 'back'): Well data.CSV

🔬 Now analyzing 'Well data.CSV' in detail.
Shape of the data: (15381, 47)

First 5 rows:


Unnamed: 0,WSN,UWI (APINum),Well Number,Well Name,Well Label,Sym Code,Operator,Hist Oper,Lease Name,Lease Nbr,...,WELL\r CUMWTR,WELL\r WHIPSTOCK,WELL\r COMP_DATE,WELL\r SPUD_DATE,WELL\r PERMIT_DATE,WELL\r ABAND_DATE,WELL\r REPORT_DATE,WELL\r RPT_DATE,WELL\r LAST_ACT_DATE,WELL\r TD_DATE
0,2492,42001004370000,6-B,,42001000000000.0,PLUGGAS,YOUNG MARSHALL R DRL,ROESER & PENDLETON,L C BILLETT,6-B,...,,,11/16/1943,08/09/1943,07/30/1943,11/08/1961,04/01/1977,,06/30/2022,
1,2493,42001004380000,5-B,,42001000000000.0,TA-OIL,ROESER & PENDLETON,ROESER & PENDLETON,BILLETT L R,5-B,...,,,08/25/1936,08/04/1936,07/25/1936,,12/29/1992,,06/30/2022,
2,2706,42001300900000,SWD-1,,42001300000000.0,SWDOP,ENR OPERATING LLC,FAULCONER VERNON E,WATHEN BEN H,SWD-1,...,,,10/17/1969,10/10/1969,09/22/1969,,11/11/2009,,11/02/2023,
3,2712,42001300950000,1,,42001300000000.0,DRY,SPENCE RALPH,SPENCE RALPH,EZEM G SCARBOROUGH,1,...,,,11/27/1969,10/24/1969,10/14/1969,,12/01/1969,,04/14/2021,
4,2741,42001301360000,1,,42001300000000.0,DRY,FARISH W S,FARISH W S,O L ELLIS,1,...,,,02/15/1970,02/05/1970,01/26/1970,,03/01/1970,,02/24/2015,



You can now ask detailed questions about this specific dataset.
Type 'back' to return to the main menu.

💬 Enter your question about 'Well data.CSV' (or 'back'): what is this data about

🔍 Analyzing...

🤖 Analysis Results:

This dataset contains information on oil and gas wells, primarily located in Anderson County, Texas.  The data includes well identification numbers (UWI, Well Number), location coordinates (surface and bottom),  operator information, lease details, formation data (Fm at TD, Prod Fm), well status ("Well Remarks" provides detailed status descriptions like "ABD-GW" for Abandoned-Gas Well), and various dates related to the well's lifecycle (completion, spud, permit, abandonment, report dates).  The data also includes  elevation and depth measurements (KB, DF, GR, TD) and cumulative production data (oil, gas, water).  The information suggests the purpose is to track the history and production of individual wells within a specific geographic area and time period.

💬 Ente