In [5]:
import pandas as pd
import ast  # To convert string lists to actual lists

# Load data
access_df = pd.read_csv("Accessibility.csv")
dept_df = pd.read_csv("Department.csv")
emp_df = pd.read_csv("Employees_table.csv")

# Convert Access_ID column from string to list (if it's stored as a string)
emp_df["Access_ID"] = emp_df["Access_ID"].apply(ast.literal_eval)

# Merge Employees with Department
merged_df = emp_df.merge(dept_df, on="Dept_ID", how="left")

# Create a dictionary to map Access_ID to Access name
access_map = dict(zip(access_df["Access_ID"], access_df["Access"]))

# Create a new column with Access Names list using the mapping
merged_df["Access_Name"] = merged_df["Access_ID"].apply(lambda id_list: [access_map.get(i) for i in id_list])

# Final dataframe with all relevant information
final_df = merged_df[["Emp_name", "Dept_ID", "Dept_Name", "Access_ID", "Access_Name"]]

# Display the first few rows
final_df.head()


Unnamed: 0,Emp_name,Dept_ID,Dept_Name,Access_ID,Access_Name
0,Ajay Chhajlani,1,Accounts and Finance,"[1, 2, 3, 4, 9, 16]","[Teams, Outlook_Internal, Outlook_External, Ou..."
1,Amar Gupta,1,Accounts and Finance,"[1, 2, 3, 4, 9, 16]","[Teams, Outlook_Internal, Outlook_External, Ou..."
2,Amit Bansode,1,Accounts and Finance,"[1, 2, 3, 4, 9, 16]","[Teams, Outlook_Internal, Outlook_External, Ou..."
3,Aniket Khode,1,Accounts and Finance,"[1, 2, 3, 4, 9, 16]","[Teams, Outlook_Internal, Outlook_External, Ou..."
4,Kajal Prajapati,1,Accounts and Finance,"[1, 2, 3, 4, 9, 16]","[Teams, Outlook_Internal, Outlook_External, Ou..."


In [13]:
qa_data = []

for _, row in final_df.iterrows():
    emp = row['Emp_name']
    dept_id = row['Dept_ID']
    dept_name = row['Dept_Name']
    access_ids = ", ".join(map(str, row['Access_ID']))
    access_names = ", ".join(row['Access_Name'])

    qa_data.extend([
        {"question": f"What department does {emp} belong to?", "answer": dept_name},
        {"question": f"What is the department ID of {emp}?", "answer": str(dept_id)},
        {"question": f"What accesses does {emp} have?", "answer": access_names},
        {"question": f"What are the access IDs assigned to {emp}?", "answer": access_ids},
        {"question": f"List the access names for {emp}", "answer": access_names},
        {"question": f"List the access IDs for {emp}", "answer": access_ids},
    ])

# Save as CSV
qa_df = pd.DataFrame(qa_data)
qa_df.to_csv("qa_dataset.csv", index=False)


In [17]:
from transformers import pipeline

qa_model = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")

# Use formatted text
context = ""
for _, row in final_df.iterrows():
    context += f"{row['Emp_name']} works in {row['Dept_Name']}. They have access to {', '.join(row['Access_Name'])}.\n"

# Ask a question
result = qa_model(question="What department does Amar Gupta belong to?", context=context)
print(result['answer'])

Accounts and Finance
