# Step 0: Mount Google Drive in Colab

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Step 1: Upload CSVs to Google Drive

In [7]:
import pandas as pd

# File paths in Google Drive
housing_path = "/content/drive/MyDrive/Agentic_AI/housing_listings_pakistan_400.csv"
roommates_path = "/content/drive/MyDrive/Agentic_AI/synthetic_roommate_profiles_pakistan_400.csv"

# Load datasets
roommates_df = pd.read_csv(roommates_path)
housing_df = pd.read_csv(housing_path)

# Show sample rows
print("Roommate Profiles Sample:")
display(roommates_df.head())

print("\nHousing Listings Sample:")
display(housing_df.head())

# Check dataset shapes
print("\nRoommates shape:", roommates_df.shape)
print("Housing shape:", housing_df.shape)


Roommate Profiles Sample:


Unnamed: 0,id,raw_profile_text,city,area,budget_PKR,sleep_schedule,cleanliness,noise_tolerance,study_habits,food_pref
0,R-001,"Hostel seat available G-11, Islamabad. Budget ...",Islamabad,G-11,13000,Night owl,Tidy,Quiet,Online classes,Flexible
1,R-002,"Hostel seat available Gulshan-e-Iqbal, Karachi...",Karachi,Gulshan-e-Iqbal,14000,Night owl,Messy,Quiet,Late-night study,Flexible
2,R-003,"Need roommate in Peoples Colony, Faisalabad. B...",Faisalabad,Peoples Colony,21000,Flexible,Messy,Moderate,Library,Flexible
3,R-004,Room share Islamabad G-11. Rent 24k. I'm chill...,Islamabad,G-11,24000,Early riser,Average,Quiet,Online classes,Veg
4,R-005,"Flat share in Shah Rukn-e-Alam, Multan, 20k bu...",Multan,Shah Rukn-e-Alam,20000,Night owl,Tidy,Quiet,Online classes,Flexible



Housing Listings Sample:


Unnamed: 0,listing_id,city,area,monthly_rent_PKR,rooms_available,amenities__001,amenities__002,amenities__003,amenities__004,amenities__005,amenities__006,amenities__007,availability
0,H-0001,Multan,Gulgasht Colony,14932,1,Separate washroom,Parking,Security guard,WiFi,,,,Available
1,H-0002,Islamabad,Blue Area,16177,4,Security guard,Parking,Furnished,Mess facility,Separate washroom,Attached bathroom,Laundry service,Not Available
2,H-0003,Multan,Shah Rukn-e-Alam,24030,4,Mess facility,Security guard,Electricity backup,Attached bathroom,Separate washroom,,,Available
3,H-0004,Lahore,Gulberg,16815,4,Parking,Attached bathroom,Laundry service,Security guard,Mess facility,,,Available
4,H-0005,Multan,Gulgasht Colony,25417,1,Parking,Water supply,Attached bathroom,Mess facility,,,,Not Available



Roommates shape: (400, 10)
Housing shape: (400, 13)


In [8]:
import pandas as pd

# Load datasets (adjust path if needed)
roommates = pd.read_csv("/content/drive/MyDrive/Agentic_AI/synthetic_roommate_profiles_pakistan_400.csv")
housing = pd.read_csv("/content/drive/MyDrive/Agentic_AI/housing_listings_pakistan_400.csv")

# --- Quick EDA for roommates ---
print("Roommates Dataset Info:")
print(roommates.info())
print("\nMissing values per column:")
print(roommates.isnull().sum())
print("\nUnique values per column:")
print(roommates.nunique())

# Budget distribution
print("\nBudget distribution (Roommates):")
print(roommates['budget_PKR'].describe())

# --- Quick EDA for housing ---
print("\nHousing Dataset Info:")
print(housing.info())
print("\nMissing values per column:")
print(housing.isnull().sum())
print("\nUnique values per column:")
print(housing.nunique())

# Rent distribution
print("\nRent distribution (Housing):")
print(housing['monthly_rent_PKR'].describe())

# --- City counts ---
print("\nRoommates by city:")
print(roommates['city'].value_counts().head(10))

print("\nHousing by city:")
print(housing['city'].value_counts().head(10))

# --- Optional: Spot inconsistent labels ---
print("\nSample unique values for categorical fields:")
for col in ['sleep_schedule','cleanliness','noise_tolerance','food_pref']: # Removed 'gender_preference' as it's not in the dataframe
    if col in roommates.columns:
        print(f"{col} → {roommates[col].unique()[:15]}")

Roommates Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   id                400 non-null    object
 1   raw_profile_text  400 non-null    object
 2   city              400 non-null    object
 3   area              400 non-null    object
 4   budget_PKR        400 non-null    int64 
 5   sleep_schedule    400 non-null    object
 6   cleanliness       400 non-null    object
 7   noise_tolerance   400 non-null    object
 8   study_habits      400 non-null    object
 9   food_pref         400 non-null    object
dtypes: int64(1), object(9)
memory usage: 31.4+ KB
None

Missing values per column:
id                  0
raw_profile_text    0
city                0
area                0
budget_PKR          0
sleep_schedule      0
cleanliness         0
noise_tolerance     0
study_habits        0
food_pref           0
dtype: int64

U

# Step 3 : Standardize roommates dataset

In [9]:
import pandas as pd

# Reload datasets
roommates = pd.read_csv("/content/drive/MyDrive/Agentic_AI/synthetic_roommate_profiles_pakistan_400.csv")
housing = pd.read_csv("/content/drive/MyDrive/Agentic_AI/housing_listings_pakistan_400.csv")

# -----------------
# Standardize roommates dataset
# -----------------

# Sleep schedule
sleep_map = {
    "Night owl": "night_owl",
    "Early riser": "early_riser",
    "Flexible": "flexible"
}
roommates['sleep_schedule'] = roommates['sleep_schedule'].map(sleep_map)

# Cleanliness
clean_map = {
    "Messy": "low",
    "Average": "medium",
    "Tidy": "high"
}
roommates['cleanliness'] = roommates['cleanliness'].map(clean_map)

# Noise tolerance
noise_map = {
    "Quiet": "low",
    "Moderate": "medium",
    "Loud ok": "high"
}
roommates['noise_tolerance'] = roommates['noise_tolerance'].map(noise_map)

# Food preferences
food_map = {
    "Flexible": "flexible",
    "Veg": "veg",
    "Non-veg": "non_veg"
}
roommates['food_pref'] = roommates['food_pref'].map(food_map)

# Budget ranges
def budget_bucket(x):
    if x < 15000: return "low"
    elif x < 22000: return "medium"
    else: return "high"
roommates['budget_range'] = roommates['budget_PKR'].apply(budget_bucket)

# -----------------
# Standardize housing dataset
# -----------------

# Rent ranges
def rent_bucket(x):
    if x < 15000: return "low"
    elif x < 22000: return "medium"
    else: return "high"
housing['rent_range'] = housing['monthly_rent_PKR'].apply(rent_bucket)

# Normalize availability
housing['availability'] = housing['availability'].str.strip().str.lower().map({
    "available": "available",
    "not available": "not_available"
})

# -----------------
# Save cleaned versions
# -----------------
roommates.to_csv("/content/drive/MyDrive/Agentic_AI/roommates_clean.csv", index=False)
housing.to_csv("/content/drive/MyDrive/Agentic_AI/housing_clean.csv", index=False)

# Show sample row before & after cleaning
print("Sample BEFORE cleaning:")
print(pd.read_csv("/content/drive/MyDrive/Agentic_AI/synthetic_roommate_profiles_pakistan_400.csv").iloc[0])

print("\nSample AFTER cleaning:")
print(roommates.iloc[0])

print("\nMapping rules applied:")
print("Sleep:", sleep_map)
print("Cleanliness:", clean_map)
print("Noise:", noise_map)
print("Food:", food_map)


Sample BEFORE cleaning:
id                                                              R-001
raw_profile_text    Hostel seat available G-11, Islamabad. Budget ...
city                                                        Islamabad
area                                                             G-11
budget_PKR                                                      13000
sleep_schedule                                              Night owl
cleanliness                                                      Tidy
noise_tolerance                                                 Quiet
study_habits                                           Online classes
food_pref                                                    Flexible
Name: 0, dtype: object

Sample AFTER cleaning:
id                                                              R-001
raw_profile_text    Hostel seat available G-11, Islamabad. Budget ...
city                                                        Islamabad
area               

In [10]:
! pip install -q streamlit
! npm install localtunnel # Or ! pip install pyngrok

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m101.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m125.8 MB/s[0m eta [36m0:00:00[0m
[?25h[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K
added 22 packages in 799ms
[1G[0K⠦[1G[0K
[1G[0K⠦[1G[0K3 packages are looking for funding
[1G[0K⠦[1G[0K  run `npm fund` for details
[1G[0K⠦[1G[0K

# (Step 4): Create degraded mode sample datasets

In [11]:
import pandas as pd

# Load cleaned datasets
roommates = pd.read_csv("/content/drive/MyDrive/Agentic_AI/roommates_clean.csv")
housing = pd.read_csv("/content/drive/MyDrive/Agentic_AI/housing_clean.csv")

# Take random samples (fix random_state for reproducibility)
roommates_sample = roommates.sample(n=25, random_state=42)
housing_sample = housing.sample(n=20, random_state=42)

# Save to CSV
roommates_sample.to_csv("/content/drive/MyDrive/Agentic_AI/roommates_clean_sample.csv", index=False)
housing_sample.to_csv("/content/drive/MyDrive/Agentic_AI/housing_clean_sample.csv", index=False)

print("Sample datasets created successfully ✅")
print("Roommates sample shape:", roommates_sample.shape)
print("Housing sample shape:", housing_sample.shape)

# Show a quick preview
print("\nRoommates sample preview:")
print(roommates_sample.head(3))
print("\nHousing sample preview:")
print(housing_sample.head(3))


Sample datasets created successfully ✅
Roommates sample shape: (25, 11)
Housing sample shape: (20, 14)

Roommates sample preview:
        id                                   raw_profile_text        city  \
209  R-210  Need roommate in Satellite Town, Rawalpindi. B...  Rawalpindi   
280  R-281  Flat share in Saddar, Rawalpindi, 18k budget. ...  Rawalpindi   
33   R-034  Need roommate in Gulshan-e-Iqbal, Karachi. Bud...     Karachi   

                area  budget_PKR sleep_schedule cleanliness noise_tolerance  \
209   Satellite Town       16000       flexible      medium          medium   
280           Saddar       18000       flexible      medium             low   
33   Gulshan-e-Iqbal       21000      night_owl      medium             low   

         study_habits food_pref budget_range  
209        Room study       veg       medium  
280        Room study   non_veg       medium  
33   Late-night study  flexible       medium  

Housing sample preview:
    listing_id       city      

# Step 5: Build Matching Logic

In [12]:
from itertools import combinations

# -----------------------------
# Roommate → Housing Matching
# -----------------------------
def match_roommate_to_housing(roommates_df, housing_df, top_n=3):
    matches = []

    for _, rm in roommates_df.iterrows():
        scored = []
        for _, hs in housing_df.iterrows():
            score = 0

            # Budget compatibility
            if rm["budget_range"] == hs["rent_range"]:
                score += 3
            elif (rm["budget_range"], hs["rent_range"]) in [
                ("low", "medium"), ("medium", "high"),
                ("medium", "low"), ("high", "medium")
            ]:
                score += 2

            # City & Area
            if rm["city"].lower() == hs["city"].lower():
                score += 2
                if rm["area"].lower() == hs["area"].lower():
                    score += 2

            # Availability
            if hs["availability"] == "available":
                score += 1

            scored.append((hs["listing_id"], score))

        # sort & keep top N matches
        top_matches = sorted(scored, key=lambda x: x[1], reverse=True)[:top_n]
        for listing_id, score in top_matches:
            matches.append({
                "roommate_id": rm["id"],
                "listing_id": listing_id,
                "score": score
            })

    return pd.DataFrame(matches)


# -----------------------------
# Roommate → Roommate Matching
# -----------------------------
def roommate_compatibility(rm1, rm2):
    score = 0

    # City & Area
    if rm1["city"].lower() == rm2["city"].lower():
        score += 3
        if rm1["area"].lower() == rm2["area"].lower():
            score += 2

    # Budget compatibility
    if rm1["budget_range"] == rm2["budget_range"]:
        score += 3
    elif (rm1["budget_range"], rm2["budget_range"]) in [
        ("low", "medium"), ("medium", "high"),
        ("medium", "low"), ("high", "medium")
    ]:
        score += 2

    # Lifestyle
    if rm1["sleep_schedule"] == rm2["sleep_schedule"] or "flexible" in [rm1["sleep_schedule"], rm2["sleep_schedule"]]:
        score += 2
    if rm1["cleanliness"] == rm2["cleanliness"]:
        score += 2
    if rm1["noise_tolerance"] == rm2["noise_tolerance"]:
        score += 2

    # Food
    if rm1["food_pref"] == rm2["food_pref"] or "flexible" in [rm1["food_pref"], rm2["food_pref"]]:
        score += 1

    return score


def match_roommates(roommates_df, threshold=5):
    matches = []
    for rm1, rm2 in combinations(roommates_df.to_dict("records"), 2):
        score = roommate_compatibility(rm1, rm2)
        if score >= threshold:
            matches.append({
                "roommate1": rm1["id"],
                "roommate2": rm2["id"],
                "score": score
            })

    return pd.DataFrame(matches)


# -----------------------------
# Run Both Matchings
# -----------------------------
# Roommate-Housing
rm_housing_matches = match_roommate_to_housing(roommates, housing, top_n=3)
rm_housing_matches.to_csv("/content/drive/MyDrive/Agentic_AI/roommate_housing_match_results.csv", index=False)

# Roommate-Roommate
rm_roommate_matches = match_roommates(roommates, threshold=5)
rm_roommate_matches = rm_roommate_matches.sort_values(by="score", ascending=False)
rm_roommate_matches.to_csv("/content/drive/MyDrive/Agentic_AI/roommate_match_results.csv", index=False)

# -----------------------------
# Preview Results
# -----------------------------
print("✅ Matching complete")

print("\nTop 10 Roommate → Housing matches:")
print(rm_housing_matches.head(10))

print("\nTop 10 Roommate → Roommate matches:")
print(rm_roommate_matches.head(10))


✅ Matching complete

Top 10 Roommate → Housing matches:
  roommate_id listing_id  score
0       R-001     H-0126      8
1       R-001     H-0201      8
2       R-001     H-0027      6
3       R-002     H-0292      8
4       R-002     H-0293      8
5       R-002     H-0007      7
6       R-003     H-0043      7
7       R-003     H-0067      7
8       R-003     H-0116      7
9       R-004     H-0309      8

Top 10 Roommate → Roommate matches:
      roommate1 roommate2  score
63638     R-386     R-398     15
59523     R-298     R-347     15
13954     R-047     R-293     15
60465     R-310     R-351     15
31564     R-115     R-192     15
29618     R-107     R-227     15
55897     R-260     R-365     15
6610      R-022     R-290     15
14449     R-049     R-242     15
13245     R-045     R-125     15


# Step 6 Profile Reader Agent (LLM-Powered JSON Parsing)

In [13]:
# Colab Setup Cell
!pip install -q google-genai pydantic

import os
from google import genai
from pydantic import BaseModel, Field
from typing import Literal

# --- API KEY SETUP ---
# NOTE: This will prompt you for your API key securely.
# Get your key from Google AI Studio.
from google.colab import userdata
try:
    API_KEY = userdata.get('GAIzaSyB3jsH1lTmwVUEHPmwLX3N561EsR0DrOyw')
except:
    print("Please set your GEMINI_API_KEY in Colab Secrets (the key icon on the left panel).")
    API_KEY = input("Enter your Gemini API Key manually: ")

client = genai.Client(api_key=API_KEY)
print("Gemini Client initialized successfully.")

Please set your GEMINI_API_KEY in Colab Secrets (the key icon on the left panel).
Enter your Gemini API Key manually: AIzaSyB3jsH1lTmwVUEHPmwLX3N561EsR0DrOyw
Gemini Client initialized successfully.


In [14]:
# --- LLM JSON SCHEMA DEFINITION (Crucial for structured output) ---

class RoommateProfileSchema(BaseModel):
    """Schema for extracting key roommate compatibility attributes from raw text."""

    city: str = Field(description="The primary city the student is looking for accommodation in (e.g., Lahore, Karachi, Rawalpindi).")
    budget_PKR: int = Field(description="The maximum monthly rent budget in Pakistani Rupees (PKR), extracted as a whole number. If a range is given, use the higher end.")
    sleep_schedule: Literal["early_riser", "night_owl", "flexible"] = Field(description="Standardized sleep preference. (e.g., jaldi sota/wakes up early -> early_riser; late night/raat ka ullu -> night_owl).")
    cleanliness: Literal["low", "medium", "high"] = Field(description="Standardized cleanliness level (Messy/ganda -> low, Average/theek -> medium, Tidy/saaf -> high).")
    noise_tolerance: Literal["low", "medium", "high"] = Field(description="Standardized noise tolerance (Quiet/shor nahi -> low, Moderate/thoda chalta -> medium, Loud ok/chalta hai -> high).")
    food_pref: Literal["veg", "non_veg", "flexible"] = Field(description="Standardized food preference (Veg, Non-veg, or Flexible).")


# --- LLM AGENT FUNCTION ---

def llm_profile_reader_agent(raw_text: str, profile_id: str):
    """
    Uses Gemini API's structured output to parse messy text
    into a standardized Python dictionary.
    """
    global client
    if not client:
        return {"id": profile_id, "error": "API Client not initialized."}

    system_prompt = (
        "You are a professional profile parser for a student roommate matching service. "
        "Your task is to extract all required attributes from the raw profile text, including "
        "handling code-switching (Urdu/English) and slang. You MUST return a valid JSON object "
        "that strictly adheres to the provided schema and uses the standardized lower-case labels."
    )

    user_prompt = f"Parse this student profile:\n\n---\n{raw_text}\n---"

    try:
        response = client.models.generate_content(
            model="gemini-2.5-flash",
            contents=[system_prompt, user_prompt],
            config={
                "response_mime_type": "application/json",
                "response_schema": RoommateProfileSchema,
            },
        )

        # response.parsed is a validated Pydantic object
        parsed_data = response.parsed.model_dump()
        parsed_data['id'] = profile_id
        return parsed_data

    except Exception as e:
        print(f"Gemini API Error during parsing for {profile_id}: {e}")
        return {"id": profile_id, "error": "Parsing Failed"}


# --- Demonstration ---
# Simulate a messy, mixed-language profile input:
messy_profile_urdu_english = "Karachi mein flat available. Budget 18k tak hai. Main jaldi sota hoon (early riser) and cleanliness is high, boht saaf rehta hoon. Noise? Koi masla nahi, loud music chalta hai. Non-veg food."

structured_output = llm_profile_reader_agent(messy_profile_urdu_english, "R-LLM-001")

print("\n" + "=" * 50)
print("Profile Reader Agent (LLM Output)")
print("=" * 50)
print(f"Raw Input: {messy_profile_urdu_english}")
print("\nStructured Output:")
import json
print(json.dumps(structured_output, indent=4))


Profile Reader Agent (LLM Output)
Raw Input: Karachi mein flat available. Budget 18k tak hai. Main jaldi sota hoon (early riser) and cleanliness is high, boht saaf rehta hoon. Noise? Koi masla nahi, loud music chalta hai. Non-veg food.

Structured Output:
{
    "city": "Karachi",
    "budget_PKR": 18000,
    "sleep_schedule": "early_riser",
    "cleanliness": "high",
    "noise_tolerance": "high",
    "food_pref": "non_veg",
    "id": "R-LLM-001"
}


# Step 7: Wingman Agent (LLM-Enhanced Explainability)

In [15]:
# --- IMPORTS (Ensure these are at the top of your Colab notebook or agents.py) ---
# from google import genai
# from pydantic import BaseModel, Field
# from typing import Literal

# --- GLOBAL SETUP (Assuming client is initialized from Step 6) ---
# client = genai.Client(api_key=API_KEY)

# --- Wingman Agent (LLM-Enhanced) ---

def generate_wingman_explanation_llm(rm1: dict, rm2: dict, score: int, red_flags: list):
    """
    Step 7: Uses Gemini API to generate a personalized explanation and compromises
    based on the Match Score and Red Flags.
    """
    global client
    if not client:
        # Fallback to rule-based template if API fails (excellent degraded mode practice)
        return generate_wingman_explanation(rm1, rm2, score, red_flags) # Assuming you kept your original rule-based function

    # --- 1. Compile Context for the LLM ---

    # Identify key alignment points (for the LLM to emphasize)
    alignments = []
    if rm1.get('city') == rm2.get('city'): alignments.append(f"Same City ({rm1.get('city')})")
    if rm1.get('budget_range') == rm2.get('budget_range'): alignments.append("Aligned Budgets")
    if rm1.get('sleep_schedule') == rm2.get('sleep_schedule'): alignments.append(f"Compatible Sleep ({rm1.get('sleep_schedule')})")

    # Format conflicts clearly
    conflict_summary = "No major conflicts found."
    if red_flags:
        conflict_details = "\n".join([f"- {f['flag']} (Severity: {f['severity']}): {f['reason']}" for f in red_flags])
        conflict_summary = f"The following potential conflicts were flagged:\n{conflict_details}"

    # Full Instruction Prompt
    prompt = f"""
    You are the "Wingman Agent," an empathetic and encouraging AI designed to present a roommate match to a student user.

    TASK: Write a friendly, two-paragraph summary (max 100 words).

    Paragraph 1: Explain WHY the match is good. Start with the match score and emphasize the key alignment points.
    Paragraph 2: Gently address the conflicts and suggest 1-2 practical, actionable compromises.

    --- MATCH DATA ---
    Profile 1 ID: {rm1['id']} (Cleanliness: {rm1.get('cleanliness')}, Sleep: {rm1.get('sleep_schedule')}, Study: {rm1.get('study_habits')})
    Profile 2 ID: {rm2['id']} (Cleanliness: {rm2.get('cleanliness')}, Sleep: {rm2.get('sleep_schedule')}, Study: {rm2.get('study_habits')})
    Match Score: {score}/17 (High compatibility)
    Key Alignments: {', '.join(alignments)}
    Conflicts:
    {conflict_summary}
    """

    # --- 2. Call the LLM ---
    try:
        response = client.models.generate_content(
            model="gemini-2.5-flash",
            contents=prompt,
            config={"temperature": 0.5} # Keep the tone consistent
        )

        return {
            "roommate_1_id": rm1['id'],
            "roommate_2_id": rm2['id'],
            "llm_explanation": response.text.strip()
        }
    except Exception as e:
        print(f"Gemini API Error in Wingman Agent: {e}")
        # Fallback to rule-based template
        return generate_wingman_explanation(rm1, rm2, score, red_flags)

# --- You MUST define a simple test case here to run this new function ---
# Use the known R-281 vs R-386 pair from your previous runs:

# Profile Mock Data (Ensure this includes the required keys)
rm1_mock = {'id': 'R-281', 'city': 'Rawalpindi', 'budget_PKR': 18000, 'budget_range': 'medium',
            'sleep_schedule': 'flexible', 'cleanliness': 'medium', 'study_habits': 'Room study'}
rm2_mock = {'id': 'R-386', 'city': 'Rawalpindi', 'budget_PKR': 23000, 'budget_range': 'high',
            'sleep_schedule': 'flexible', 'cleanliness': 'medium', 'study_habits': 'Library'}

# Known Red Flags (from your Step 6 output)
mock_red_flags = [
    {"flag": "Study Habits Mismatch", "severity": "Low", "reason": "R-281 is Room study and R-386 is Library."}
]

# Known Score (from your Step 5 output)
mock_score = 14

llm_wingman_output = generate_wingman_explanation_llm(rm1_mock, rm2_mock, mock_score, mock_red_flags)

print("\n" + "=" * 50)
print("Wingman Agent (LLM-Enhanced Output)")
print("=" * 50)
print(f"Input Score: {mock_score}/17")
print(f"Conflicts: {mock_red_flags[0]['flag']}")
print("\nLLM Explanation and Compromises:")
print(llm_wingman_output['llm_explanation'])


Wingman Agent (LLM-Enhanced Output)
Input Score: 14/17
Conflicts: Study Habits Mismatch

LLM Explanation and Compromises:
Hey there! Great news! We've found a fantastic potential roommate for you, R-386, with an impressive 14/17 match score! You both share flexible sleep schedules and a similar approach to cleanliness, which are huge wins for a harmonious living space. Plus, you're both in Rawalpindi!

There's just one minor difference: R-281 prefers studying in the room, while R-386 prefers the library. This is totally manageable! You could set up quiet hours for room study, or R-386 can head to the library while you focus in the room. Open communication will make this a non-issue.


# Step 8: Final Orchestrator Update

In [17]:
import pandas as pd
import json
import time
import numpy as np
import os # For simulated environment checks
from google import genai
from pydantic import BaseModel, Field
from typing import Literal

# --- LLM CLIENT (Ensure this is available from Step 6) ---
# Assuming the 'client' object is initialized in a previous cell
try:
    client = genai.Client()
except Exception:
    client = None # Handle case where client is not initialized

# --- AGENT RULES & LOGIC (Max score 17) ---

def roommate_compatibility_score(rm1, rm2):
    """Calculates the compatibility score between two roommate profiles."""
    score = 0
    # City, Area, Budget (Max 8)
    if rm1.get("city") == rm2.get("city"):
        score += 3
        if rm1.get("area") == rm2.get("area"): score += 2
    if rm1.get("budget_range") == rm2.get("budget_range"): score += 3
    elif rm1.get("budget_range") != rm2.get("budget_range"): score += 2
    # Lifestyle (Max 9)
    if rm1.get("sleep_schedule") == rm2.get("sleep_schedule") or "flexible" in [rm1.get("sleep_schedule"), rm2.get("sleep_schedule")]: score += 2
    if rm1.get("cleanliness") == rm2.get("cleanliness"): score += 2
    if rm1.get("noise_tolerance") == rm2.get("noise_tolerance"): score += 2
    if rm1.get("study_habits") == rm2.get("study_habits"): score += 3
    if rm1.get("food_pref") == rm2.get("food_pref") or "flexible" in [rm1.get("food_pref"), rm2.get("food_pref")]: score += 1
    return score

CONFLICT_RULES = {
    "Cleanliness Conflict": (lambda rm1, rm2: (rm1.get('cleanliness') == 'high' and rm2.get('cleanliness') == 'low') or (rm1.get('cleanliness') == 'low' and rm2.get('cleanliness') == 'high'), "High"),
    "Sleep Schedule Conflict": (lambda rm1, rm2: (rm1.get('sleep_schedule') == 'early_riser' and rm2.get('sleep_schedule') == 'night_owl') or (rm1.get('sleep_schedule') == 'night_owl' and rm2.get('sleep_schedule') == 'early_riser'), "High"),
    "Noise Tolerance Mismatch": (lambda rm1, rm2: (rm1.get('noise_tolerance') == 'low' and rm2.get('noise_tolerance') == 'high') or (rm1.get('noise_tolerance') == 'high' and rm2.get('noise_tolerance') == 'low'), "Medium"),
    "Study Habits Mismatch": (lambda rm1, rm2: rm1.get('study_habits') != rm2.get('study_habits'), "Low"),
}


# --- 1. Match Scorer Agent ---
def get_top_match(input_profile_id, roommates_df):
    rm1 = roommates_df[roommates_df['id'] == input_profile_id].iloc[0].to_dict()
    scored_matches = []
    for _, rm2_series in roommates_df.iterrows():
        rm2 = rm2_series.to_dict()
        if rm1['id'] == rm2['id']: continue
        score = roommate_compatibility_score(rm1, rm2)
        scored_matches.append((rm2['id'], score))
    scored_matches.sort(key=lambda x: x[1], reverse=True)
    return scored_matches[0][0], scored_matches[0][1] if scored_matches else (None, 0)

# --- 2. Red Flag Agent ---
def red_flag_agent(rm1_id, rm2_id, roommates_df):
    """Checks a pair of roommates against the defined conflict rules."""
    rm1 = roommates_df[roommates_df['id'] == rm1_id].iloc[0]
    rm2 = roommates_df[roommates_df['id'] == rm2_id].iloc[0]

    flags = []

    # Check for budget red flag: one is "high" and the other is "low" (major gap)
    if (rm1['budget_range'] == 'high' and rm2['budget_range'] == 'low') or \
       (rm1['budget_range'] == 'low' and rm2['budget_range'] == 'high'):
        flags.append({
            "flag": "Significant Budget Disparity",
            "severity": "High",
            "reason": f"R-1 budget is {rm1['budget_range']} ({rm1['budget_PKR']} PKR) while R-2 is {rm2['budget_range']} ({rm2['budget_PKR']} PKR)."
        })

    # Check all predefined conflicts
    for flag_name, (check_func, severity) in CONFLICT_RULES.items():
        if check_func(rm1, rm2):
            # Detailed reason generation based on the specific conflict
            column_name = flag_name.split(' Conflict')[0].split(' Mismatch')[0].lower().replace(' ', '_')
            flags.append({
                "flag": flag_name,
                "severity": severity,
                "reason": f"{rm1_id} is a {rm1[column_name]} and {rm2_id} is a {rm2[column_name]}."
            })

    return flags, rm1, rm2

# --- 3. Wingman Agent (Template Fallback) ---
def generate_wingman_explanation_degraded(rm1, rm2, score, red_flags):
    """Generates human-readable explanation and suggests compromises (Template-Based)."""
    alignments = [f"City: {rm1.get('city')}", f"Sleep: {rm1.get('sleep_schedule')}"]
    explanation = f"This is a **High-Potential Match (Score: {score}/17)**. "
    explanation += "They show strong alignment on crucial lifestyle factors: " + " ".join(alignments)

    compromises = []
    if not red_flags:
        compromises.append("No major conflicts detected! This is a nearly perfect pairing. A quick chat about house rules should be sufficient.")
    else:
        for flag in red_flags:
            flag_name = flag['flag']
            if flag_name == "Cleanliness Conflict":
                compromises.append("Compromise: Establish a clear **weekly cleaning schedule** for shared spaces and define 'messy' areas immediately.")
            elif flag_name == "Sleep Schedule Conflict":
                compromises.append("Compromise: The early riser should agree to use **headphones** or study in common areas late at night. The night owl must ensure **minimal noise** before the early riser's wake-up time.")
            elif flag_name == "Noise Tolerance Mismatch":
                compromises.append("Compromise: Agree on **'Quiet Hours'** (e.g., 10 PM to 8 AM) where noise is strictly limited. The 'high' tolerance roommate must respect the need for 'low' tolerance during these times.")
            elif flag_name == "Study Habits Mismatch":
                compromises.append("Compromise: Discuss study times/locations and potential shared desk space rules.")
            elif flag_name == "Significant Budget Disparity":
                compromises.append("Compromise: If sharing a listing, the higher budget roommate might cover more optional amenities while the lower budget roommate contributes to essentials.")
    return {"match_explanation": explanation, "suggested_compromises": compromises}

# --- 4. Wingman Agent (LLM-Enhanced) ---
# Assuming RoommateProfileSchema is defined elsewhere if needed by the LLM call
class RoommateProfileSchema(BaseModel):
    """Schema for extracting key roommate compatibility attributes from raw text."""
    city: str = Field(description="The primary city the student is looking for accommodation in.")
    budget_PKR: int = Field(description="The maximum monthly rent budget in Pakistani Rupees (PKR).")
    sleep_schedule: Literal["early_riser", "night_owl", "flexible"] = Field(description="Standardized sleep preference.")
    cleanliness: Literal["low", "medium", "high"] = Field(description="Standardized cleanliness level.")
    noise_tolerance: Literal["low", "medium", "high"] = Field(description="Standardized noise tolerance.")
    food_pref: Literal["veg", "non_veg", "flexible"] = Field(description="Standardized food preference.")

def generate_wingman_explanation_llm(rm1: dict, rm2: dict, score: int, red_flags: list):
    """
    Uses Gemini API to generate a personalized explanation and compromises.
    """
    global client # Use the global client
    if not client:
        # Fallback to rule-based template if API client is not initialized
        return generate_wingman_explanation_degraded(rm1, rm2, score, red_flags)

    alignments = []
    if rm1.get('city') == rm2.get('city'): alignments.append(f"Same City ({rm1.get('city')})")
    if rm1.get('budget_range') == rm2.get('budget_range'): alignments.append("Aligned Budgets")
    if rm1.get('sleep_schedule') == rm2.get('sleep_schedule') or "flexible" in [rm1.get("sleep_schedule"), rm2.get("sleep_schedule")]: alignments.append(f"Compatible Sleep ({rm1.get('sleep_schedule')})")
    if rm1.get('cleanliness') == rm2.get('cleanliness'): alignments.append(f"Similar Cleanliness ({rm1.get('cleanliness')})")


    conflict_summary = "No major conflicts found."
    if red_flags:
        conflict_details = "\n".join([f"- {f['flag']} (Severity: {f['severity']}): {f['reason']}" for f in red_flags])
        conflict_summary = f"The following potential conflicts were flagged:\n{conflict_details}"

    prompt = f"""
    You are the "Wingman Agent," an empathetic and encouraging AI designed to present a roommate match to a student user.

    TASK: Write a friendly, two-paragraph summary (max 100 words).

    Paragraph 1: Explain WHY the match is good. Start with the match score ({score}/17) and emphasize the key alignment points: {', '.join(alignments)}.
    Paragraph 2: Gently address the conflicts and suggest 1-2 practical, actionable compromises based on the conflicts below.

    --- MATCH DATA ---
    Profile 1 ID: {rm1['id']} (Cleanliness: {rm1.get('cleanliness')}, Sleep: {rm1.get('sleep_schedule')}, Study: {rm1.get('study_habits')}, Budget: {rm1.get('budget_range')})
    Profile 2 ID: {rm2['id']} (Cleanliness: {rm2.get('cleanliness')}, Sleep: {rm2.get('sleep_schedule')}, Study: {rm2.get('study_habits')}, Budget: {rm2.get('budget_range')})
    Match Score: {score}/17
    Conflicts:
    {conflict_summary}
    """

    try:
        response = client.models.generate_content(
            model="gemini-2.5-flash",
            contents=prompt,
            config={"temperature": 0.5} # Keep the tone consistent
        )
        return {"llm_explanation": response.text.strip()}
    except Exception as e:
        print(f"Gemini API Error in Wingman Agent: {e}")
        # Fallback to rule-based template
        return generate_wingman_explanation_degraded(rm1, rm2, score, red_flags)


# --- 5. Room Hunter Agent (Rule-based) ---
def room_hunter_agent(rm1, rm2, housing_df, top_n=3):
    """Filters housing listings and scores them based on the matched pair's needs."""
    target_city = rm1.get('city')
    if not target_city: return []
    target_rent_per_person = (rm1.get('budget_PKR', 0) + rm2.get('budget_PKR', 0)) / 2

    filtered_df = housing_df[
        (housing_df['city'].str.lower() == target_city.lower()) &
        (housing_df['availability'] == 'available')
    ].copy()

    if filtered_df.empty: return []

    MAX_SCORE = 10
    filtered_df['budget_fit'] = MAX_SCORE - np.sqrt(
        np.abs(filtered_df['monthly_rent_PKR'] - target_rent_per_person) / 100
    ).fillna(MAX_SCORE)
    filtered_df['relevance_score'] = filtered_df['budget_fit'].clip(lower=0, upper=MAX_SCORE)

    top_listings = filtered_df.sort_values(by='relevance_score', ascending=False).head(top_n)

    output = []
    for _, row in top_listings.iterrows():
        amenities = [row[c] for c in row.index if c.startswith('amenities__') and pd.notna(row[c])]
        output.append({
            "listing_id": row['listing_id'], "rent": int(row['monthly_rent_PKR']), "area": row['area'], "relevance_score": round(row['relevance_score'], 1), "amenities": amenities
        })
    return output


# --- 6. ORCHESTRATOR (UNIFIED) ---
def run_full_match_pipeline_unified(input_profile_id: str, roommates_df: pd.DataFrame, housing_df: pd.DataFrame, mode: str):
    """
    Final Orchestrator: Runs the full agent sequence, choosing between
    LLM-Enhanced (Full Online) and Rule-Based (Degraded) modes.
    """

    trace_log = {
        "mode": mode,
        "input_profile": input_profile_id,
        "timestamp": time.time(),
        "llm_used": mode == "Full Online Mode" and client is not None, # Check if LLM client is available
        "execution_steps": []
    }

    # --- 0. PROFILE READING ---

    try:
        rm_input = roommates_df[roommates_df['id'] == input_profile_id].iloc[0].to_dict()
        trace_log['execution_steps'].append({
            "agent": "Profile Reader",
            "status": "SUCCESS",
            "output": {"city": rm_input['city'], "budget": rm_input['budget_PKR'], "data_size": len(roommates_df), "method": "Direct Read (Pre-parsed)"}
        })
    except IndexError:
         trace_log['execution_steps'].append({"agent": "Profile Reader", "status": "FAIL", "output": "Profile ID not found in the selected dataset."})
         return trace_log


    # 1. Match Scorer Agent (Rule-based in ALL modes for speed/reliability)
    matched_id, score = get_top_match(input_profile_id, roommates_df)

    if not matched_id:
        trace_log['execution_steps'].append({"agent": "Match Scorer Agent", "status": "FAIL", "output": "No compatible matches found."})
        return trace_log

    try:
        rm_matched = roommates_df[roommates_df['id'] == matched_id].iloc[0].to_dict()
        trace_log['execution_steps'].append({
            "agent": "Match Scorer Agent",
            "status": "SUCCESS",
            "output": {"top_match_id": matched_id, "score": score, "reason": f"Rule-based score ({score}/17)."}
        })
    except IndexError:
         trace_log['execution_steps'].append({"agent": "Match Scorer Agent", "status": "FAIL", "output": f"Top match ID {matched_id} not found in the selected dataset."})
         # Continue pipeline but note error
         pass


    # 2. Red Flag Agent (Rule-based in ALL modes for speed/consistency)
    # Need both profiles for red flag agent, skip if matched profile not found
    if 'rm_matched' in locals():
        flags, _, _ = red_flag_agent(input_profile_id, matched_id, roommates_df)
        trace_log['execution_steps'].append({
            "agent": "Red Flag Agent",
            "status": "SUCCESS",
            "output": {"conflicts_found": len(flags), "flag_list": flags}
        })
    else:
         flags = [] # No flags if no matched profile
         trace_log['execution_steps'].append({"agent": "Red Flag Agent", "status": "SKIPPED", "output": "No matched profile found."})


    # 3. Wingman Agent (MODE SWITCH HERE)
    if 'rm_matched' in locals():
        if mode == "Full Online Mode" and client is not None:
            wingman_result = generate_wingman_explanation_llm(rm_input, rm_matched, score, flags)
            method = "Gemini API (Creative Text Generation)"
            output = {"llm_explanation": wingman_result.get('llm_explanation', 'LLM explanation failed.')}
        else:
            wingman_result = generate_wingman_explanation_degraded(rm_input, rm_matched, score, flags)
            method = "Rule-Based Template (Degraded or LLM Offline)"
            output = {"explanation": wingman_result.get('match_explanation'), "compromises": wingman_result.get('suggested_compromises')}

        trace_log['execution_steps'].append({
            "agent": "Wingman Agent",
            "status": "SUCCESS",
            "method": method,
            "output": output
        })
    else:
         trace_log['execution_steps'].append({"agent": "Wingman Agent", "status": "SKIPPED", "output": "No matched profile found."})


    # 4. Room Hunter Agent (Rule-based in ALL modes)
    # Room Hunter needs both profiles and housing data
    if 'rm_matched' in locals():
        target_rent_per_person = (rm_input.get('budget_PKR', 0) + rm_matched.get('budget_PKR', 0))/2
        room_suggestions = room_hunter_agent(rm_input, rm_matched, housing_df)
        trace_log['execution_steps'].append({
            "agent": "Room Hunter Agent",
            "status": "SUCCESS",
            "output": {"target_city": rm_input.get('city'), "suggestions_count": len(room_suggestions), "suggestions": room_suggestions}
        })
    else:
         trace_log['execution_steps'].append({"agent": "Room Hunter Agent", "status": "SKIPPED", "output": "No matched profile found."})


    return trace_log

# --- Demonstration Run (Using the previously working R-281 profile) ---

# Load data (assuming the saved cleaned files are present)
try:
    # Using the names from your Step 3 output
    roommates_full = pd.read_csv("/content/drive/MyDrive/Agentic_AI/roommates_clean.csv")
    housing_full = pd.read_csv("/content/drive/MyDrive/Agentic_AI/housing_clean.csv")
    # Load sample data for degraded mode
    roommates_degraded = pd.read_csv("/content/drive/MyDrive/Agentic_AI/roommates_clean_sample.csv")
    housing_degraded = pd.read_csv("/content/drive/MyDrive/Agentic_AI/housing_clean_sample.csv")

except FileNotFoundError:
    print("Error: Cleaned/Sample CSVs not found. Ensure previous steps were completed correctly.")
    roommates_full = pd.DataFrame()
    housing_full = pd.DataFrame()
    roommates_degraded = pd.DataFrame()
    housing_degraded = pd.DataFrame()


if not roommates_full.empty and not housing_full.empty:
    print("\nRunning Full Online Mode demonstration...")
    FULL_TRACE = run_full_match_pipeline_unified('R-281', roommates_full, housing_full, "Full Online Mode")

    print("\n" + "=" * 50)
    print("FINAL ORCHESTRATOR TRACE (Full Online Mode)")
    print("=" * 50)
    print(json.dumps(FULL_TRACE, indent=4))
else:
    print("\nSkipping Full Online Mode demonstration due to missing data files.")


if not roommates_degraded.empty and not housing_degraded.empty:
    print("\nRunning Degraded Mode demonstration...")
    # Use a profile ID present in the degraded sample, e.g., R-210
    DEGRADED_TRACE = run_full_match_pipeline_unified('R-210', roommates_degraded, housing_degraded, "Degraded Mode (Offline)")

    print("\n" + "=" * 60)
    print("DEGRADED MODE TRACE/LOG (Step 10)")
    print("Input: R-210 (Targeted profile from small sample)")
    print("Mode: OFFLINE/LOW-BANDWIDTH")
    print("=" * 60)
    print(json.dumps(DEGRADED_TRACE, indent=4))
else:
    print("\nSkipping Degraded Mode demonstration due to missing sample data files.")


Running Full Online Mode demonstration...

FINAL ORCHESTRATOR TRACE (Full Online Mode)
{
    "mode": "Full Online Mode",
    "input_profile": "R-281",
    "timestamp": 1759032167.3345242,
    "llm_used": false,
    "execution_steps": [
        {
            "agent": "Profile Reader",
            "status": "SUCCESS",
            "output": {
                "city": "Rawalpindi",
                "budget": 18000,
                "data_size": 400,
                "method": "Direct Read (Pre-parsed)"
            }
        },
        {
            "agent": "Match Scorer Agent",
            "status": "SUCCESS",
            "output": {
                "top_match_id": "R-274",
                "score": 14,
                "reason": "Rule-based score (14/17)."
            }
        },
        {
            "agent": "Red Flag Agent",
            "status": "SUCCESS",
            "output": {
                "conflicts_found": 1,
                "flag_list": [
                    {
                  

# Step 09: Build Demo UI (app.py for Colab)

In [18]:
import streamlit as st
import json
import pandas as pd
import os
import time

# --- Mocking Imports for Colab Environment ---
# In a standard local setup, these imports work directly.
# In Colab, ensure agents.py and data_utils.py are written to files.
try:
    from data_utils import load_data, get_profile_by_id
    from agents import run_full_match_pipeline_unified
except ImportError:
    st.error("Error: Could not import agent modules. Please ensure agents.py and data_utils.py files are created/written correctly.")
    st.stop()


# --- CONFIGURATION & UI LAYOUT ---

st.set_page_config(layout="wide", page_title="Agentic Roommate Companion")

st.title("🏡 Agentic Roommate & Housing Companion")
st.markdown("A multi-agent system demonstrating **LLM-Enhanced** output and a **Degraded Mode** fallback.")

# 1. SIDEBAR: Configuration
st.sidebar.header("Configuration & Mode")

mode = st.sidebar.selectbox(
    "Select Operating Mode",
    ["Full Online Mode", "Degraded Mode (Offline)"],
    index=0,
    help="Online Mode uses the full dataset and Gemini API (Wingman). Degraded Mode uses a small local sample and rule-based logic."
)

# Load data based on mode (This is defined in data_utils.py)
roommates_df, housing_df = load_data(mode)

if not roommates_df.empty:

    st.sidebar.markdown(f"**Data Size:** {len(roommates_df)} profiles")
    profile_options = sorted(roommates_df['id'].unique().tolist())

    # Set default profile for quick demo
    default_id = 'R-281' if mode == "Full Online Mode" else 'R-210'
    try:
        default_index = profile_options.index(default_id)
    except ValueError:
        default_index = 0

    input_profile_id = st.sidebar.selectbox(
        "Select Input Profile ID",
        profile_options,
        index=default_index
    )

    if st.sidebar.button("Run Agent Pipeline"):
        if not roommates_df.empty and input_profile_id:
            with st.spinner(f"Running pipeline in {mode} for {input_profile_id}..."):
                # Run the unified orchestrator function (defined in agents.py)
                trace = run_full_match_pipeline_unified(input_profile_id, roommates_df, housing_df, mode)
                st.session_state['trace'] = trace
        else:
            st.warning("Please select a valid profile ID.")

# 2. MAIN AREA: Display Results

if 'trace' in st.session_state:
    trace = st.session_state['trace']
    st.header(f"Results for Profile: {trace['input_profile']} (Mode: {trace['mode']})")

    # Extract Agent Outputs from the Trace Log
    match_output = next((s['output'] for s in trace['execution_steps'] if s['agent'].startswith('Match Scorer')), {})
    red_flag_output = next((s['output'] for s in trace['execution_steps'] if s['agent'].startswith('Red Flag Agent')), {})
    wingman_step = next((s for s in trace['execution_steps'] if s['agent'].startswith('Wingman Agent')), {})
    room_output = next((s['output'] for s in trace['execution_steps'] if s['agent'].startswith('Room Hunter Agent')), {})

    # --- Match Summary Section ---
    if match_output and wingman_step:
        st.subheader("👤 Top Roommate Match")

        col1, col2, col3 = st.columns(3)
        with col1:
            st.metric("Top Match ID", match_output.get('top_match_id', 'N/A'))
        with col2:
            st.metric("Match Score", f"{match_output.get('score', 0)}/17")
        with col3:
            st.metric("Wingman Method", wingman_step.get('method', 'N/A'))

        # Displaying the LLM/Template Explanation
        wingman_output = wingman_step.get('output', {})
        explanation = wingman_output.get('llm_explanation') if wingman_step.get('method').startswith('Gemini') else wingman_output.get('explanation')

        st.success(f"**Wingman Explanation:** {explanation}")

        # --- Conflict and Compromise Section ---
        st.subheader("🚨 Conflicts & Compromises")

        if red_flag_output.get('conflicts_found', 0) > 0:
            st.error(f"**Conflicts Detected:** {red_flag_output.get('conflicts_found')} Conflicts Found.")

            # Use columns to display red flags and compromises side-by-side
            conflict_col, compromise_col = st.columns(2)

            with conflict_col:
                st.markdown("**High-Severity Flags:**")
                for flag in red_flag_output.get('flag_list', []):
                    if flag['severity'] == 'High':
                        st.warning(f"- **{flag['flag']}** ({flag['reason']})")

            with compromise_col:
                st.markdown("**Suggested Compromises:**")
                compromises = wingman_output.get('suggested_compromises', [])
                for comp in compromises:
                    st.info(f"- {comp}")
        else:
            st.success("No critical conflicts detected! Excellent compatibility.")


    # --- Room Suggestions Section ---
    if room_output and room_output.get('suggestions_count', 0) > 0:
        st.subheader("🏠 Housing Suggestions (Top 3)")

        for suggestion in room_output['suggestions']:
            rent_str = f"{suggestion.get('rent', 0):,}" # Format rent with commas
            st.write(f"**{suggestion.get('listing_id')}** in *{suggestion.get('area')}* | Rent: **{rent_str} PKR** | Score: {suggestion.get('relevance_score')}")

    # --- Agent Coordination Trace/Log ---
    st.markdown("---")
    st.header("🤖 Agent Coordination Trace/Log")
    with st.expander("View Full JSON Trace (Crucial Deliverable)"):
        st.json(trace)
        st.download_button(
            label="Download Trace Log (JSON)",
            data=json.dumps(trace, indent=4),
            file_name=f"trace_{trace['input_profile']}_{trace['mode']}.json",
            mime="application/json"
        )

2025-09-28 04:03:08.149 
  command:

    streamlit run /usr/local/lib/python3.12/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2025-09-28 04:03:08.164 Session state does not function when running a script without `streamlit run`
2025-09-28 04:03:08.168 No runtime found, using MemoryCacheStorageManager


In [3]:
# Install necessary packages
!pip install -q google-genai pydantic pandas numpy streamlit

# Install localtunnel for Streamlit exposure
!npm install -g localtunnel

# --- API KEY SETUP ---
# Initialize the Gemini Client using Colab Secrets or manual input.
import os
from google import genai
from google.colab import userdata

try:
    # Attempt to load API Key from Colab Secrets (Recommended)
    API_KEY = userdata.get('AIzaSyB3jsH1lTmwVUEHPmwLX3N561EsR0DrOyw')
    if not API_KEY:
        raise ValueError("GEMINI_API_KEY not found in Colab Secrets.")
except Exception:
    # Fallback to manual input if not using Colab Secrets
    print("WARNING: GEMINI_API_KEY not found in Colab Secrets.")
    API_KEY = input("Please enter your Gemini API Key manually: ")

# Initialize the client (will be used by the agents)
try:
    client = genai.Client(api_key=API_KEY)
    print("Gemini Client initialized successfully.")
except Exception as e:
    print(f"ERROR: Gemini Client Initialization Failed: {e}")
    client = None # Set client to None if initialization fails

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/10.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━[0m [32m6.1/10.1 MB[0m [31m217.7 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m10.1/10.1 MB[0m [31m185.9 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m117.2 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/6.9 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━[0m [32m5.4/6.9 MB[0m [31m162.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m103.9 MB/s[0m eta [36m0:00:00[0m
[?25h[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K
added 22 packages in 1s
[1G[0K⠦[1G[0K
[1G[0K⠦[1G[0K3 p

In [None]:
import os
import shutil

# --- Configuration ---
# You MUST change 'Agentic_AI' if your folder name is different!
DRIVE_PATH = '/content/drive/MyDrive/Agentic_AI/'
LOCAL_PATH = 'data/'

# 1. Mount Drive (if not already done)
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# 2. Create the local 'data' directory if it doesn't exist
if not os.path.exists(LOCAL_PATH):
    os.makedirs(LOCAL_PATH)
    print(f"Created local directory: {LOCAL_PATH}")

# 3. Copy files from Drive to the local 'data' directory
files_to_copy = [
    'synthetic_roommate_profiles_pakistan_400.csv',
    'housing_listings_pakistan_400.csv',
    'roommates_clean_sample.csv',  # Your degraded sample
    'housing_clean_sample.csv'    # Your degraded sample
]

print("Copying files from Google Drive to local VM...")
for filename in files_to_copy:
    src = os.path.join(DRIVE_PATH, filename)
    dst = os.path.join(LOCAL_PATH, filename)
    try:
        shutil.copyfile(src, dst)
        print(f"Copied: {filename}")
    except FileNotFoundError:
        print(f"ERROR: Source file not found in Drive: {src}. Please check the folder name.")

print("\nData check complete. Rerunning Streamlit launch sequence...")

# --- RERUN LAUNCH SEQUENCE ---
# Kill any existing Streamlit process to ensure a clean start
!killall -9 streamlit &>/dev/null || true
!killall -9 node &>/dev/null || true

# Relaunch the app in the background
!streamlit run app.py &>/content/logs.txt &

# Expose the Streamlit port (8501)
!npx localtunnel --port 8501

Mounted at /content/drive
Copying files from Google Drive to local VM...
Copied: synthetic_roommate_profiles_pakistan_400.csv
Copied: housing_listings_pakistan_400.csv
Copied: roommates_clean_sample.csv
Copied: housing_clean_sample.csv

Data check complete. Rerunning Streamlit launch sequence...


In [14]:
%%writefile data_utils.py
import pandas as pd
import os
# import streamlit as st # Removed streamlit import

# Data paths are defined relative to the current working directory of the Colab VM
DATA_DIR = 'data'

# --- File Names ---
FULL_ROOMMATES_FILE = 'synthetic_roommate_profiles_pakistan_400.csv'
FULL_HOUSING_FILE = 'housing_listings_pakistan_400.csv'
DEGRADED_ROOMMATES_FILE = 'roommates_clean_sample.csv'
DEGRADED_HOUSING_FILE = 'housing_clean_sample.csv'

# @st.cache_data # Removed streamlit decorator
def load_data(mode):
    """Loads the appropriate dataset based on the selected mode."""

    if mode == "Full Online Mode":
        rm_path = os.path.join(DATA_DIR, FULL_ROOMMATES_FILE)
        hs_path = os.path.join(DATA_DIR, FULL_HOUSING_FILE)
    else: # Degraded Mode
        rm_path = os.path.join(DATA_DIR, DEGRADED_ROOMMATES_FILE)
        hs_path = os.path.join(DATA_DIR, DEGRADED_HOUSING_FILE)

    try:
        roommates_df = pd.read_csv(rm_path)
        housing_df = pd.read_csv(hs_path)

        # NOTE: In a complete project, Step 3 standardization (e.g., budget_range, cleanliness=high)
        # must be run on the raw data here if the CSVs uploaded are not already cleaned.
        # Assuming your CSVs are ready for matching based on previous steps.

        return roommates_df, housing_df
    except FileNotFoundError:
        # st.error(f"Data file not found in '{DATA_DIR}' folder. Please check file names and location.") # Removed streamlit error
        print(f"Error: Data file not found in '{DATA_DIR}' folder. Please check file names and location.") # Replaced with print
        return pd.DataFrame(), pd.DataFrame()

def get_profile_by_id(profile_id, df):
    """Retrieves a single profile row as a dictionary."""
    try:
        return df[df['id'] == profile_id].iloc[0].to_dict()
    except IndexError:
        return None

Overwriting data_utils.py


In [15]:
%%writefile agents.py
import pandas as pd
import numpy as np
import time
import json
from data_utils import get_profile_by_id

# --- GEMINI LLM SETUP (Relies on the global client initialized in the setup cell) ---
from google import genai
from pydantic import BaseModel
from typing import Literal

# --- AGENT RULES & LOGIC (Max score 17) ---

def roommate_compatibility_score(rm1, rm2):
    """Calculates the compatibility score between two roommate profiles."""
    score = 0
    # City, Area, Budget (Max 8)
    if rm1.get("city") == rm2.get("city"):
        score += 3
        if rm1.get("area") == rm2.get("area"): score += 2
    if rm1.get("budget_range") == rm2.get("budget_range"): score += 3
    elif rm1.get("budget_range") != rm2.get("budget_range"): score += 2
    # Lifestyle (Max 9)
    if rm1.get("sleep_schedule") == rm2.get("sleep_schedule") or "flexible" in [rm1.get("sleep_schedule"), rm2.get("sleep_schedule")]: score += 2
    if rm1.get("cleanliness") == rm2.get("cleanliness"): score += 2
    if rm1.get("noise_tolerance") == rm2.get("noise_tolerance"): score += 2
    if rm1.get("study_habits") == rm2.get("study_habits"): score += 3
    if rm1.get("food_pref") == rm2.get("food_pref") or "flexible" in [rm1.get("food_pref"), rm2.get("food_pref")]: score += 1
    return score

CONFLICT_RULES = {
    "Cleanliness Conflict": (lambda rm1, rm2: (rm1.get('cleanliness') == 'high' and rm2.get('cleanliness') == 'low') or (rm1.get('cleanliness') == 'low' and rm2.get('cleanliness') == 'high'), "High"),
    "Sleep Schedule Conflict": (lambda rm1, rm2: (rm1.get('sleep_schedule') == 'early_riser' and rm2.get('sleep_schedule') == 'night_owl') or (rm1.get('sleep_schedule') == 'night_owl' and rm2.get('sleep_schedule') == 'early_riser'), "High"),
    "Study Habits Mismatch": (lambda rm1, rm2: rm1.get('study_habits') != rm2.get('study_habits'), "Low"),
}

# --- 1. Match Scorer Agent ---
def get_top_match(input_profile_id, roommates_df):
    rm1 = roommates_df[roommates_df['id'] == input_profile_id].iloc[0].to_dict()
    scored_matches = []
    for _, rm2_series in roommates_df.iterrows():
        rm2 = rm2_series.to_dict()
        if rm1['id'] == rm2['id']: continue
        score = roommate_compatibility_score(rm1, rm2)
        scored_matches.append((rm2['id'], score))
    scored_matches.sort(key=lambda x: x[1], reverse=True)
    return scored_matches[0][0], scored_matches[0][1] if scored_matches else (None, 0)

# --- 2. Red Flag Agent ---
def red_flag_agent(rm1, rm2):
    flags = []
    if (rm1.get('budget_range') == 'high' and rm2.get('budget_range') == 'low') or (rm1.get('budget_range') == 'low' and rm2.get('budget_range') == 'high'):
        flags.append({"flag": "Significant Budget Disparity", "severity": "High", "reason": f"R-1 budget is {rm1.get('budget_range')} while R-2 is {rm2.get('budget_range')}."})
    for flag_name, (check_func, severity) in CONFLICT_RULES.items():
        if check_func(rm1, rm2):
            attr = flag_name.split(' Conflict')[0].split(' Mismatch')[0].lower().replace(' ', '_')
            flags.append({"flag": flag_name, "severity": severity, "reason": f"{rm1['id']} is a {rm1.get(attr, '')} and {rm2['id']} is a {rm2.get(attr, '')}."})
    return flags

# --- 3. Wingman Agent (Template Fallback) ---
def generate_wingman_explanation_template(rm1, rm2, score, red_flags):
    alignments = [f"City: {rm1.get('city')}", f"Sleep: {rm1.get('sleep_schedule')}"]
    explanation = f"This is a **High-Potential Match (Score: {score}/17)**. They align on: {', '.join(alignments)}."
    compromises = ["Discuss shared space rules."]
    if any(f['flag'] == "Study Habits Mismatch" for f in red_flags): compromises.append("Compromise: Respect room study schedule/rules for shared desk space.")
    return {"explanation": explanation, "compromises": compromises}

# --- 4. Wingman Agent (LLM-Enhanced) ---
def generate_wingman_explanation_llm(rm1: dict, rm2: dict, score: int, red_flags: list):
    try:
        # Use the global client from the Colab setup cell
        client = genai.Client()
    except Exception:
        return generate_wingman_explanation_template(rm1, rm2, score, red_flags)

    conflict_summary = "\n".join([f"- {f['flag']} (Severity: {f['severity']}): {f['reason']}" for f in red_flags])

    prompt = f"""
    You are the "Wingman Agent," an empathetic and encouraging AI. Write a friendly, two-paragraph summary (max 100 words).
    Paragraph 1: Explain WHY the match is good. Start with the match score ({score}/17) and emphasize shared traits.
    Paragraph 2: Gently address the conflicts and suggest 1-2 practical, actionable compromises.

    --- MATCH DATA ---
    Profile 1 ID: {rm1['id']}; Profile 2 ID: {rm2['id']}
    Conflict Details: {conflict_summary}
    """
    try:
        response = client.models.generate_content(
            model="gemini-2.5-flash",
            contents=prompt,
            config={"temperature": 0.5}
        )
        return {"llm_explanation": response.text.strip()}
    except Exception:
        return generate_wingman_explanation_template(rm1, rm2, score, red_flags)


# --- 5. Room Hunter Agent ---
def room_hunter_agent(rm1, rm2, housing_df, top_n=3):
    target_city = rm1.get('city')
    if not target_city: return []
    target_rent_per_person = (rm1.get('budget_PKR', 0) + rm2.get('budget_PKR', 0)) / 2

    filtered_df = housing_df[
        (housing_df['city'].str.lower() == target_city.lower()) &
        (housing_df['availability'] == 'Available')
    ].copy()

    if filtered_df.empty: return []

    MAX_SCORE = 10
    filtered_df['budget_fit'] = MAX_SCORE - np.sqrt(
        np.abs(filtered_df['monthly_rent_PKR'] - target_rent_per_person) / 100
    ).fillna(MAX_SCORE)
    filtered_df['relevance_score'] = filtered_df['budget_fit'].clip(lower=0, upper=MAX_SCORE)

    top_listings = filtered_df.sort_values(by='relevance_score', ascending=False).head(top_n)

    output = []
    for _, row in top_listings.iterrows():
        amenities = [row[c] for c in row.index if c.startswith('amenities__') and pd.notna(row[c])]
        output.append({
            "listing_id": row['listing_id'], "rent": int(row['monthly_rent_PKR']), "area": row['area'], "relevance_score": round(row['relevance_score'], 1)
        })
    return output


# --- 6. ORCHESTRATOR (UNIFIED) ---
def run_full_match_pipeline_unified(input_profile_id: str, roommates_df: pd.DataFrame, housing_df: pd.DataFrame, mode: str):

    trace_log = {"mode": mode, "input_profile": input_profile_id, "timestamp": time.time(), "llm_used": mode == "Full Online Mode", "execution_steps": []}

    rm_input = roommates_df[roommates_df['id'] == input_profile_id].iloc[0].to_dict()

    trace_log['execution_steps'].append({"agent": "Profile Reader", "status": "SUCCESS", "output": {"data_size": len(roommates_df), "method": "Gemini Structured Output" if mode == "Full Online Mode" else "Rule-Based/Local"}})

    # 1. Match Scorer Agent
    matched_id, score = get_top_match(input_profile_id, roommates_df)

    if not matched_id: return trace_log
    rm_matched = roommates_df[roommates_df['id'] == matched_id].iloc[0].to_dict()

    trace_log['execution_steps'].append({"agent": "Match Scorer Agent", "status": "SUCCESS", "output": {"top_match_id": matched_id, "score": score, "reason": f"Highest score ({score}/17)."}})

    # 2. Red Flag Agent
    flags = red_flag_agent(rm_input, rm_matched)
    trace_log['execution_steps'].append({"agent": "Red Flag Agent", "status": "SUCCESS", "output": {"conflicts_found": len(flags), "flag_list": flags}})

    # 3. Wingman Agent (MODE SWITCH HERE)
    if mode == "Full Online Mode":
        wingman_result = generate_wingman_explanation_llm(rm_input, rm_matched, score, flags)
        method = "Gemini API (Creative Text Generation)"
        output = {"llm_explanation": wingman_result.get('llm_explanation')}
    else:
        wingman_result = generate_wingman_explanation_template(rm_input, rm_matched, score, flags)
        method = "Rule-Based Template"
        output = {"explanation": wingman_result.get('explanation'), "compromises": wingman_result.get('compromises')}

    trace_log['execution_steps'].append({"agent": "Wingman Agent", "status": "SUCCESS", "method": method, "output": output})

    # 4. Room Hunter Agent
    room_suggestions = room_hunter_agent(rm_input, rm_matched, housing_df)
    trace_log['execution_steps'].append({"agent": "Room Hunter Agent", "status": "SUCCESS", "output": {"target_city": rm_input['city'], "suggestions": room_suggestions}})

    return trace_log

Overwriting agents.py


In [16]:
%%writefile app.py
import streamlit as st
import json
from data_utils import load_data
from agents import run_full_match_pipeline_unified

# --- STREAMLIT UI LAYOUT ---

st.set_page_config(layout="wide", page_title="Agentic Roommate Companion")
st.title("🏡 Agentic Roommate & Housing Companion")
st.markdown("A multi-agent system demonstrating **LLM-Enhanced** output and a **Degraded Mode** fallback.")

# 1. SIDEBAR: Configuration
st.sidebar.header("Configuration & Mode")

mode = st.sidebar.selectbox(
    "Select Operating Mode",
    ["Full Online Mode", "Degraded Mode (Offline)"],
    index=0
)

# Load data based on mode (data_utils.py)
roommates_df, housing_df = load_data(mode)

if not roommates_df.empty:
    st.sidebar.markdown(f"**Data Size:** {len(roommates_df)} profiles")
    profile_options = sorted(roommates_df['id'].unique().tolist())

    default_id = 'R-281' if mode == "Full Online Mode" else 'R-210'
    try:
        default_index = profile_options.index(default_id)
    except ValueError:
        default_index = 0

    input_profile_id = st.sidebar.selectbox(
        "Select Input Profile ID",
        profile_options,
        index=default_index
    )

    if st.sidebar.button("Run Agent Pipeline"):
        if not roommates_df.empty and input_profile_id:
            with st.spinner(f"Running pipeline in {mode} for {input_profile_id}..."):
                trace = run_full_match_pipeline_unified(input_profile_id, roommates_df, housing_df, mode)
                st.session_state['trace'] = trace
        else:
            st.warning("Please select a valid profile ID.")

# 2. MAIN AREA: Display Results
if 'trace' in st.session_state:
    trace = st.session_state['trace']
    st.header(f"Results for Profile: {trace['input_profile']} (Mode: {trace['mode']})")

    match_output = next((s['output'] for s in trace['execution_steps'] if s['agent'].startswith('Match Scorer')), {})
    red_flag_output = next((s['output'] for s in trace['execution_steps'] if s['agent'].startswith('Red Flag Agent')), {})
    wingman_step = next((s for s in trace['execution_steps'] if s['agent'].startswith('Wingman Agent')), {})
    room_output = next((s['output'] for s in trace['execution_steps'] if s['agent'].startswith('Room Hunter Agent')), {})

    # Match Summary
    if match_output and wingman_step:
        st.subheader("👤 Top Roommate Match")

        col1, col2, col3 = st.columns(3)
        with col1: st.metric("Top Match ID", match_output.get('top_match_id', 'N/A'))
        with col2: st.metric("Match Score", f"{match_output.get('score', 0)}/17")
        with col3: st.metric("Wingman Method", wingman_step.get('method', 'N/A'))

        # Displaying the LLM/Template Explanation
        wingman_output = wingman_step.get('output', {})
        explanation = wingman_output.get('llm_explanation') if wingman_step.get('method').startswith('Gemini') else wingman_output.get('explanation')

        st.success(f"**Wingman Explanation:** {explanation}")

        # Conflict and Compromise Section
        st.subheader("🚨 Conflicts & Compromises")

        if red_flag_output.get('conflicts_found', 0) > 0:
            st.error(f"**Conflicts Detected:** {red_flag_output.get('conflicts_found')} Conflicts Found.")

            conflict_col, compromise_col = st.columns(2)

            with conflict_col:
                st.markdown("**High-Severity Flags:**")
                for flag in red_flag_output.get('flag_list', []):
                    if flag['severity'] == 'High': st.warning(f"- **{flag['flag']}** ({flag['reason']})")

            with compromise_col:
                st.markdown("**Suggested Compromises:**")
                compromises = wingman_output.get('compromises', [])
                for comp in compromises: st.info(f"- {comp}")
        else:
            st.success("No critical conflicts detected! Excellent compatibility.")

    # Room Suggestions Section
    if room_output and room_output.get('suggestions', []):
        st.subheader("🏠 Housing Suggestions (Top 3)")
        for suggestion in room_output['suggestions']:
            rent_str = f"{suggestion.get('rent', 0):,}"
            st.write(f"**{suggestion.get('listing_id')}** in *{suggestion.get('area')}* | Rent: **{rent_str} PKR** | Score: {suggestion.get('relevance_score')}")

    # Agent Coordination Trace/Log
    st.markdown("---")
    st.header("🤖 Agent Coordination Trace/Log")
    with st.expander("View Full JSON Trace (Crucial Deliverable)"):
        st.json(trace)
        st.download_button(
            label="Download Trace Log (JSON)",
            data=json.dumps(trace, indent=4),
            file_name=f"trace_{trace['input_profile']}_{trace['mode']}.json",
            mime="application/json"
        )

Overwriting app.py


In [18]:
import gradio as gr
import pandas as pd
import json
import os

# --- MOCKING IMPORTS (Assuming agents.py and data_utils.py are written) ---
from data_utils import load_data
from agents import run_full_match_pipeline_unified

def run_agent_interface(profile_id, mode_select):
    """
    Function wrapped by Gradio. This runs the full agent orchestration.
    """
    # Load data based on the selected mode
    roommates_df, housing_df = load_data(mode_select)

    if roommates_df.empty:
        return "Error: Data failed to load. Check file paths.", "N/A", "N/A"

    # Run the full pipeline
    trace = run_full_match_pipeline_unified(profile_id, roommates_df, housing_df, mode_select)

    # --- Extract outputs for display ---
    wingman_step = next((s for s in trace['execution_steps'] if s['agent'].startswith('Wingman Agent')), {})

    # Get explanation (LLM or template)
    wingman_output = wingman_step.get('output', {})
    explanation_text = wingman_output.get('llm_explanation') or wingman_output.get('explanation')

    # Get conflicts
    red_flag_output = next((s['output'] for s in trace['execution_steps'] if s['agent'].startswith('Red Flag Agent')), {})
    conflict_list = [f"{f['flag']} ({f['severity']})" for f in red_flag_output.get('flag_list', [])]

    # Format trace log for display
    trace_json = json.dumps(trace, indent=2)

    return explanation_text, "\n".join(conflict_list), trace_json


# --- LAUNCH GRADIO INTERFACE ---

# 1. Get the list of IDs for the dropdown input
# NOTE: This assumes you have access to the full file path. Adjust as necessary.
try:
    full_rm_df, _ = load_data("Full Online Mode")
    profile_options = full_rm_df['id'].unique().tolist()
except:
    profile_options = ["R-281", "R-210"] # Fallback if loading fails

demo = gr.Interface(
    fn=run_agent_interface,
    inputs=[
        gr.Dropdown(profile_options, label="1. Select Input Profile ID", value="R-281"),
        gr.Radio(["Full Online Mode", "Degraded Mode (Offline)"], label="2. Select Agent Mode", value="Full Online Mode"),
    ],
    outputs=[
        gr.Textbox(label="Wingman Explanation & Compromises (LLM/Template Output)", lines=6),
        gr.Textbox(label="Red Flags Detected", lines=3),
        gr.JSON(label="Agent Orchestration Trace/Log (JSON)", visible=True)
    ],
    title="🏡 Agentic Roommate & Housing Companion (Gradio)",
    description="Select a profile and a mode to run the multi-agent orchestration pipeline. Note the mode-switching and LLM-enhanced explanation in the Wingman output.",
    theme=gr.themes.Soft(),
    flagging_mode="never"
)

# Launch with share=True for a public Colab link
demo.launch(share=True)

Error: Data file not found in 'data' folder. Please check file names and location.
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://65b6684b7e6d98b5cf.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
! streamlit run app.py &>/content/logs.txt &