## Requirements 

In [None]:
%pip install langchain_openai
%pip install langchain_core
%pip install langchain_community
%pip install langchain_experimental

In [None]:
from langchain_openai.chat_models import ChatOpenAI
import os

openai_key = os.environ.get("OPENAI_API")
gpt4o = ChatOpenAI(temperature=0.0, model="gpt-4o-mini", openai_api_key=os.getenv("OPENAI_API_KEY"))

In [69]:
import pandas as pd

def extract_metadata(df):
    metadata = {}

    # Number of columns
    metadata['Number of Columns'] = df.shape[1]

    # Column names
    metadata['Schema'] = df.columns.tolist()

    # Data types of each column
    metadata['Data Types'] = str(df.dtypes)

    # Store multiple rows in the sample (default: first 5 rows)
    metadata['Sample'] = df.head(100).to_dict(orient="records")

    return metadata


In [None]:
import pandas as pd
import pandas as pd

df = pd.read_csv("try1.csv", encoding="latin-1")  # or encoding="ISO-8859-1"
metadata = extract_metadata(df)

In [74]:
print(metadata)

{'Number of Columns': 13, 'Schema': ['ID', 'Agency', 'Record Group', 'GAL LOOKUP - Display name', 'GAL LOOKUP - Email Address', 'Alias', 'Name', 'Role', 'Beginning Date', 'Ending Date', 'Currently in the Role?', 'In the role as Acting?', 'Notes'], 'Data Types': 'ID                             int64\nAgency                        object\nRecord Group                   int64\nGAL LOOKUP - Display name     object\nGAL LOOKUP - Email Address    object\nAlias                         object\nName                          object\nRole                          object\nBeginning Date                object\nEnding Date                   object\nCurrently in the Role?        object\nIn the role as Acting?        object\nNotes                         object\ndtype: object', 'Sample': [{'ID': 1, 'Agency': 'AMS', 'Record Group': 136, 'GAL LOOKUP - Display name': 'Adams, Keith', 'GAL LOOKUP - Email Address': 'Keith.Adams@usda.gov ', 'Alias': 'Keith.Adams@ams.usda.gov', 'Name': 'Keith Adams', 'Role': 

In [76]:
question = "What is the Starting and Ending date for Erin Morris"

prompt_template = '''
Assistant is an AI model that takes in a sample from a dataset, understands the data, and provides the answer to the question:  

**Question:** {question}  

### Instructions:  
- Analyze the given sample data to extract relevant insights.  
- Provide all the information which is being asked.  
- If the question asks for multiple pieces of information, include all relevant details.
- Alias is also Considered as the Email Address
- Appearing Same Email Address Twice doesnt considered as More than One Email

SCHEMA:
--------
{schema}

DATA TYPES: 
--------
{data_types}

SAMPLE: 
--------
{sample}
'''.format(question=question, schema=metadata["Schema"], data_types=metadata["Data Types"], sample=metadata["Sample"])

response = gpt4o.invoke(prompt_template)
response.content


'For Erin Morris, the relevant details are as follows:\n\n- **Starting Date (Beginning Date):** 2/20/2020\n- **Ending Date:** (Currently in the role, so no ending date is provided)\n\nAdditionally, Erin Morris has held another role with the following details:\n- **Starting Date (Beginning Date):** 8/14/2011\n- **Ending Date:** 6/30/2013\n\nIn summary:\n- **Current Role:**\n  - **Starting Date:** 2/20/2020\n  - **Ending Date:** Currently in the role (no ending date)\n  \n- **Previous Role:**\n  - **Starting Date:** 8/14/2011\n  - **Ending Date:** 6/30/2013'

In [77]:
print("Question: ", question)
print("Answer: ", response.content)

Question:  What is the Starting and Ending date for Erin Morris
Answer:  For Erin Morris, the relevant details are as follows:

- **Starting Date (Beginning Date):** 2/20/2020
- **Ending Date:** (Currently in the role, so no ending date is provided)

Additionally, Erin Morris has held another role with the following details:
- **Starting Date (Beginning Date):** 8/14/2011
- **Ending Date:** 6/30/2013

In summary:
- **Current Role:**
  - **Starting Date:** 2/20/2020
  - **Ending Date:** Currently in the role (no ending date)
  
- **Previous Role:**
  - **Starting Date:** 8/14/2011
  - **Ending Date:** 6/30/2013


In [75]:
metadata["Sample"]

[{'ID': 1,
  'Agency': 'AMS',
  'Record Group': 136,
  'GAL LOOKUP - Display name': 'Adams, Keith',
  'GAL LOOKUP - Email Address': 'Keith.Adams@usda.gov ',
  'Alias': 'Keith.Adams@ams.usda.gov',
  'Name': 'Keith Adams',
  'Role': 'Mission Area Senior Program Manager',
  'Beginning Date': '3/16/2021',
  'Ending Date': '1/20/2025',
  'Currently in the Role?': 'Yes',
  'In the role as Acting?': 'No',
  'Notes': nan},
 {'ID': 2,
  'Agency': 'AMS',
  'Record Group': 136,
  'GAL LOOKUP - Display name': 'Adams, Keith',
  'GAL LOOKUP - Email Address': 'Keith.Adams@usda.gov\n',
  'Alias': 'Keith.Adams@ams.usda.gov',
  'Name': 'Keith Adams',
  'Role': 'Deputy Administrator, Management and Analysis Program',
  'Beginning Date': '1/25/2021',
  'Ending Date': '6/28/2024',
  'Currently in the Role?': 'Yes',
  'In the role as Acting?': 'No',
  'Notes': nan},
 {'ID': 3,
  'Agency': 'AMS',
  'Record Group': 136,
  'GAL LOOKUP - Display name': 'Allen, William',
  'GAL LOOKUP - Email Address': 'William.