In [11]:
import os
import PyPDF2
from pathlib import Path
from google import genai

In [12]:
client = genai.Client()

In [13]:
def load_documents(file_paths):
    """
    Load documents from file paths (supports .txt and .pdf)
    Returns a dictionary with document name and content
    """
    documents = {}
    
    for file_path in file_paths:
        path = Path(file_path)
        
        if not path.exists():
            print(f"Warning: {file_path} not found")
            continue
            
        if path.suffix == '.txt':
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read()
                documents[path.name] = content
                
        elif path.suffix == '.pdf':
            with open(file_path, 'rb') as f:
                pdf_reader = PyPDF2.PdfReader(f)
                content = ""
                for page_num, page in enumerate(pdf_reader.pages):
                    content += f"\n[Page {page_num + 1}]\n"
                    content += page.extract_text()
                documents[path.name] = content
        else:
            print(f"Unsupported file type: {path.suffix}")
    
    return documents

# Load the documents
document_files = ['documents/immigration_rules.txt', 'documents/policy_guidance.txt']
documents = load_documents(document_files)

print(f"Loaded {len(documents)} documents")
for doc_name in documents.keys():
    print(f"  - {doc_name}")

Loaded 2 documents
  - immigration_rules.txt
  - policy_guidance.txt


In [14]:
def check_for_prohibited_requests(question):
    """
    Check if the question contains requests for fraudulent or illegal activity
    Returns (is_prohibited, refusal_message)
    """
    prohibited_keywords = [
        'hide',
        'fake',
        'fraud',
        'illegal',
        'not tell'
        'hide information',
        'false documents',
        'fake documents',
        'commit fraud',
        'fraudulent',
        'lie to',
        'deceive',
        'conceal information',
        'forge',
        'falsify'
    ]
    
    question_lower = question.lower()
    
    for keyword in prohibited_keywords:
        if keyword in question_lower:
            refusal_message = """
            I cannot assist with requests that involve:
            - Hiding or concealing information from immigration authorities
            - Creating or submitting false documents
            - Committing fraud or misrepresentation
            - Any illegal activities

            If you have questions about legitimate immigration procedures, I'm happy to help using the official guidance documents.
            """
            return True, refusal_message
    
    return False, None

# Test the refusal handling
test_question = "How can I hide my unauthorized work history?"
is_prohibited, message = check_for_prohibited_requests(test_question)
if is_prohibited:
    print("Example Refusal Response:")
    print(message)
else:
    print(message)

Example Refusal Response:

            I cannot assist with requests that involve:
            - Hiding or concealing information from immigration authorities
            - Creating or submitting false documents
            - Committing fraud or misrepresentation
            - Any illegal activities

            If you have questions about legitimate immigration procedures, I'm happy to help using the official guidance documents.
            


In [None]:
def answer_immigration_question(question, documents):
    """
    Answer immigration question using only provided documents
    """
    
    # First check for prohibited requests
    is_prohibited, refusal_message = check_for_prohibited_requests(question)
    if is_prohibited:
        return {
            'status': 'refused',
            'answer': refusal_message,
            'question': question
        }
    
    # Combine all documents into context
    context = ""
    for doc_name, content in documents.items():
        context += f"\n\n=== {doc_name} ===\n{content}\n"
    
    # Create the prompt with strict grounding instructions
    prompt = f"""You are an immigration legal assistant. You must answer ONLY using the documents provided below.

            DOCUMENTS INFO: I extracted these documents content and put all the information in Documents. Format: Name.extention then content.

            DOCUMENTS:
            {context}

            QUESTION: {question}

            INSTRUCTIONS:
            1. Answer ONLY using information explicitly stated in the documents above. Format: Start with "Based on your provided documents:"
            2. If the answer is not in the documents, you MUST respond with: "Not found in provided documents". Do not add any information not in the documents.
            3. Provide your answer in short with 3 bullet points. Each bullet point MUST be supported by the documents above.
            4. After each point, cite the source in [brackets]. Source are the documents I provided above. In source mention document name and section or paragraph of the document. example format: [immigration_rules.txt, Eligibility Requirements]
            5. Do NOT use any outside knowledge or make assumptions. Just give answer based on my provided documents.
            6. End with the disclaimer: "This information is for general guidance only and does not constitute legal advice."

            """
    try:
        response = client.models.generate_content(
            model="gemini-2.5-flash", 
            contents=prompt
        )
        
        answer_text = response.text.strip()
        
        # Determine if answer was found in documents
        if "not found in provided documents" in answer_text.lower():
            status = "Not found in provided documents"
        else:
            status = "Found in documents"
        
        return {
            'status': status,
            'answer': answer_text,
            'question': question
        }
        
    except Exception as e:
        return {
            'status': 'error',
            'answer': f"Error occurred: {str(e)}",
            'question': question
        }

print("Q&A function ready!")

Q&A function ready!


Test

In [16]:
# Test Question 1: Information that IS in the documents
print("=" * 80)
print("TEST 1: Question with answer in documents")
print("=" * 80)

question1 = "Can someone who worked without authorization for 150 days still apply for a green card?"
result1 = answer_immigration_question(question1, documents)

print(f"\nQUESTION: {result1['question']}")
print(f"\nSTATUS: {result1['status']}")
print(f"\nANSWER:\n{result1['answer']}")

TEST 1: Question with answer in documents

QUESTION: Can someone who worked without authorization for 150 days still apply for a green card?

STATUS: Found in documents

ANSWER:
Based on your provided documents:
*   Certain applicants for adjustment of status may be eligible even if they have worked without authorization, under Section 245(k). [immigration_rules.txt, Section 245(k) Overview]
*   For these applicants, the period of unauthorized employment must not exceed 180 days in aggregate. [immigration_rules.txt, Eligibility Requirements]
*   Therefore, 150 days of unauthorized work would be within the 180-day limit for eligibility under Section 245(k). [immigration_rules.txt, Eligibility Requirements]
*   This provision applies to immediate relatives of a U.S. citizen (spouse, parent, or unmarried child under 21) and does NOT apply to employment-based green card applicants. [immigration_rules.txt, Eligibility Requirements, Special Notes]
*   The applicant must also be otherwise adm

In [17]:
# Test Question 2: Information that is NOT in the documents
print("\n" + "=" * 80)
print("TEST 2: Question with answer NOT in documents")
print("=" * 80)

question2 = "What are the requirements for an H-1B visa?"
result2 = answer_immigration_question(question2, documents)

print(f"\nQUESTION: {result2['question']}")
print(f"\nSTATUS: {result2['status']}")
print(f"\nANSWER:\n{result2['answer']}")


TEST 2: Question with answer NOT in documents

QUESTION: What are the requirements for an H-1B visa?

STATUS: Not found in provided documents

ANSWER:
Based on your provided documents:
Not found in provided documents
This information is for general guidance only and does not constitute legal advice.


In [20]:
# Test Question 3: Prohibited request
print("\n" + "=" * 80)
print("TEST 3: Prohibited request (refusal handling)")
print("=" * 80)

question3 = "How can I submit false documents to hide my criminal record?"
result3 = answer_immigration_question(question3, documents)

print(f"\nQUESTION: {result3['question']}")
print(f"\nSTATUS: {result3['status']}")
print(f"\nANSWER:\n{result3['answer']}")


TEST 3: Prohibited request (refusal handling)

QUESTION: How can I submit false documents to hide my criminal record?

STATUS: refused

ANSWER:

            I cannot assist with requests that involve:
            - Hiding or concealing information from immigration authorities
            - Creating or submitting false documents
            - Committing fraud or misrepresentation
            - Any illegal activities

            If you have questions about legitimate immigration procedures, I'm happy to help using the official guidance documents.
            
