In [1]:
import pandas as pd
import torch
import torch.nn as nn
import requests
import random
from scipy.spatial import distance
from geopy.distance import geodesic

# 1. LOAD AND FILTER DATASET
# Ensure 'Address.csv' has columns: 'officename', 'statename', 'latitude', 'longitude'
try:
    full_df = pd.read_csv('Address.csv')
    
    # FILTER: Import ONLY cities and villages in Andhra Pradesh
    # Note: Adjust 'statename' column name if it differs in your file (e.g., 'State')
    df = full_df[full_df['statename'].str.contains("Andhra Pradesh", case=False, na=False)].copy()
    df = df.dropna(subset=['latitude', 'longitude'])
    
    print(f"Dataset loaded: {len(df)} locations filtered for Andhra Pradesh.")
except FileNotFoundError:
    print("Error: 'Address.csv' not found. Ensure it is in the same directory.")
    exit()

# 2. RANDOM USER GENERATION
interests_pool = ["sports", "movies", "games", "gym", "music", "coding", "travel"]

def generate_random_users(n=2):
    users = []
    sampled_df = df.sample(n=n)
    for i, (_, row) in enumerate(sampled_df.iterrows()):
        interests = random.sample(interests_pool, random.randint(1, 4))
        users.append({
            'name': f"User_{i+1}",
            'village': row['officename'],
            'coords': (float(row['latitude']), float(row['longitude'])),
            'interests': interests
        })
    return users

# 3. ACTUAL ROAD DISTANCE CALCULATION
def get_road_distance(coord1, coord2):
    url = f"http://router.project-osrm.org/route/v1/driving/{coord1[1]},{coord1[0]};{coord2[1]},{coord2[0]}?overview=false"
    try:
        r = requests.get(url, timeout=5)
        if r.status_code == 200:
            return r.json()['routes'][0]['distance'] / 1000.0
    except:
        pass
    # Estimated road fallback (Haversine * 1.4 detour factor)
    return geodesic(coord1, coord2).kilometers * 1.4

# 4. GNN ARCHITECTURE
class DemographicGNN(nn.Module):
    def __init__(self, in_features, out_features):
        super(DemographicGNN, self).__init__()
        self.conv = nn.Linear(in_features, out_features)
    def forward(self, x, adj):
        # matmul(adj, x) ensures only nodes with an edge (1) share information
        return torch.relu(self.conv(torch.matmul(adj, x)))

# 5. EXECUTE MATCHING LOGIC
def process_matching():
    users = generate_random_users(2)
    u1, u2 = users[0], users[1]

    common_interests = list(set(u1['interests']) & set(u2['interests']))
    actual_road_dist = get_road_distance(u1['coords'], u2['coords'])
    
    # CONSTRAINT: If distance > 30km, do not connect (Adjacency = 0)
    # Threshold check for GNN edge
    has_edge = 1 if (actual_road_dist <= 1500.0) else 0
    adj_matrix = torch.tensor([[1, has_edge], [has_edge, 1]], dtype=torch.float)
    
    # Prepare GNN features
    u1_vec = [1 if x in u1['interests'] else 0 for x in interests_pool]
    u2_vec = [1 if x in u2['interests'] else 0 for x in interests_pool]
    node_features = torch.tensor([u1_vec + list(u1['coords']), u2_vec + list(u2['coords'])], dtype=torch.float)
    
    gnn = DemographicGNN(9, 4)
    embeddings = gnn(node_features, adj_matrix)

    # FINAL OUTPUT
    print("\n" + "="*55)
    print("                DEMOGRAPHIC PROFILE SUMMARY (AP ONLY)")
    print("="*55)
    for u in [u1, u2]:
        print(f"PROFILE: {u['name']} | Location: {u['village']}, AP")
        print(f"Interests: {', '.join(u['interests'])} | GPS: {u['coords']}\n")
    
    print("-" * 55)
    print(f"SHARED INTERESTS: {', '.join(common_interests) if common_interests else 'None'}")
    print(f"ACTUAL ROAD DISTANCE: {actual_road_dist:.2f} km")
    
    # Final Connection Logic
    if has_edge == 1 and len(common_interests) > 0:
        status = "Status: Compatible & Reachable ✅"
    elif has_edge == 0:
        status = "Status: Too Distant ❌ (Connection Blocked - Over Distance)"
    else:
        status = "Status: Incompatible ❌ (No Common Interests)"
    
    print(f"DETERMINATION: {status}")
    print("="*55)

if __name__ == "__main__":
    process_matching()

Dataset loaded: 1332 locations filtered for Andhra Pradesh.

                DEMOGRAPHIC PROFILE SUMMARY (AP ONLY)
PROFILE: User_1 | Location: Pidathapoluru B.O, AP
Interests: gym, movies, sports, music | GPS: (14.353498, 80.086317)

PROFILE: User_2 | Location: Boruvancha B.O, AP
Interests: coding, music, movies, travel | GPS: (16.76, 80.84)

-------------------------------------------------------
SHARED INTERESTS: music, movies
ACTUAL ROAD DISTANCE: 333.51 km
DETERMINATION: Status: Compatible & Reachable ✅


In [7]:
import pandas as pd
import torch
import torch.nn as nn
import requests
import random
from scipy.spatial import distance
from geopy.distance import geodesic

# 1. LOAD AND FILTER DATASET
try:
    full_df = pd.read_csv('Address.csv')
    
    # FILTER: strictly Andhra Pradesh
    df = full_df[full_df['statename'].str.contains("Andhra Pradesh", case=False, na=False)].copy()
    df = df.dropna(subset=['latitude', 'longitude'])
    
    print(f"Dataset loaded: {len(df)} locations filtered for Andhra Pradesh.")
except FileNotFoundError:
    print("Error: 'Address.csv' not found.")
    exit()

# 2. CONFIGURATION & USER GENERATION
interests_pool = ["sports", "movies", "games", "gym", "music", "coding", "travel"]

def generate_random_users(n=2):
    users = []
    sampled_df = df.sample(n=n)
    for i, (_, row) in enumerate(sampled_df.iterrows()):
        interests = random.sample(interests_pool, random.randint(1, 4))
        users.append({
            'name': f"User_{i+1}",
            'village': row['officename'],
            'coords': (float(row['latitude']), float(row['longitude'])),
            'interests': interests
        })
    return users

# 3. ROAD DISTANCE CALCULATION
def get_road_distance(coord1, coord2):
    url = f"http://router.project-osrm.org/route/v1/driving/{coord1[1]},{coord1[0]};{coord2[1]},{coord2[0]}?overview=false"
    try:
        r = requests.get(url, timeout=5)
        if r.status_code == 200:
            return r.json()['routes'][0]['distance'] / 1000.0
    except:
        pass
    return geodesic(coord1, coord2).kilometers * 1.4

# 4. GNN ARCHITECTURE
class DemographicGNN(nn.Module):
    def __init__(self, in_features, out_features):
        super(DemographicGNN, self).__init__()
        self.conv = nn.Linear(in_features, out_features)
    def forward(self, x, adj):
        return torch.relu(self.conv(torch.matmul(adj, x)))

# 5. EXECUTE MATCHING LOGIC
def process_matching():
    users = generate_random_users(2)
    u1, u2 = users[0], users[1]

    common_interests = list(set(u1['interests']) & set(u2['interests']))
    actual_road_dist = get_road_distance(u1['coords'], u2['coords'])
    
    # CONSTRAINT: Connection only if Distance <= 30km
    # THRESHOLD = 30.0
    has_edge = 1 if (actual_road_dist <= 500) else 0
    adj_matrix = torch.tensor([[1, has_edge], [has_edge, 1]], dtype=torch.float)
    
    # Prepare GNN features
    u1_vec = [1 if x in u1['interests'] else 0 for x in interests_pool]
    u2_vec = [1 if x in u2['interests'] else 0 for x in interests_pool]
    node_features = torch.tensor([u1_vec + list(u1['coords']), u2_vec + list(u2['coords'])], dtype=torch.float)
    
    gnn = DemographicGNN(9, 4)
    embeddings = gnn(node_features, adj_matrix)

    # --- UPDATED OUTPUT SECTION ---
    print("\n" + "="*55)
    print("                DETAILED USER PROFILES (AP)")
    print("="*55)
    
    # Explicitly printing each user profile and their interests
    for u in [u1, u2]:
        print(f"IDENTIFIER : {u['name']}")
        print(f"VILLAGE    : {u['village']}, Andhra Pradesh")
        print(f"INTERESTS  : {', '.join(u['interests'])}")
        print(f"GPS LOC    : {u['coords']}")
        print("-" * 30)

    print("\n" + "="*55)
    print("                MATCHING & SPATIAL RESULTS")
    print("="*55)
    print(f"SHARED INTERESTS     : {', '.join(common_interests) if common_interests else 'None'}")
    print(f"ACTUAL ROAD DISTANCE : {actual_road_dist:.2f} km")
    
    # Final Connection Logic
    if has_edge == 1 and len(common_interests) > 0:
        status = "Status: Compatible & Reachable ✅"
    elif has_edge == 0:
        status = f"Status: Too Distant ❌ (Connection Blocked > 500km)"
    else:
        status = "Status: Incompatible ❌ (No Shared Interests)"
    
    print(f"DETERMINATION        : {status}")
    print("="*55)

if __name__ == "__main__":
    process_matching()

Dataset loaded: 1332 locations filtered for Andhra Pradesh.

                DETAILED USER PROFILES (AP)
IDENTIFIER : User_1
VILLAGE    : Agaram B.O, Andhra Pradesh
INTERESTS  : movies, sports, music, travel
GPS LOC    : (13.33621, 79.639816)
------------------------------
IDENTIFIER : User_2
VILLAGE    : Kusumapuram B.O, Andhra Pradesh
INTERESTS  : travel, sports, games
GPS LOC    : (18.951777, 84.631722)
------------------------------

                MATCHING & SPATIAL RESULTS
SHARED INTERESTS     : travel, sports
ACTUAL ROAD DISTANCE : 1010.10 km
DETERMINATION        : Status: Too Distant ❌ (Connection Blocked > 500km)


In [2]:
import pandas as pd
import torch
import torch.nn as nn
import requests
import random
from geopy.distance import geodesic

# --- PRE-REQUISITE: GENERATE SAMPLE INTERESTS DATASET ---
# Run this once to create the file if you don't have it yet
def create_mock_interests_file():
    data = {
        'User_ID': [f"User_{i}" for i in range(1, 101)],
        'interests': [", ".join(random.sample(["sports", "movies", "games", "gym", "music", "coding", "travel"], random.randint(1, 4))) for _ in range(100)]
    }
    pd.DataFrame(data).to_csv('Interests.csv', index=False)
    print("Interest dataset created/verified.")

# 1. DATA LOADING AND MERGING
try:
    # Load Location Data
    addr_df = pd.read_csv('Address.csv')
    addr_df = addr_df[addr_df['statename'].str.contains("Andhra Pradesh", case=False, na=False)].copy()
    addr_df = addr_df.dropna(subset=['latitude', 'longitude'])

    # Load Interests Data
    # If file doesn't exist, we create one for this demo
    try:
        int_df = pd.read_csv('Interests.csv')
    except FileNotFoundError:
        create_mock_interests_file()
        int_df = pd.read_csv('Interests.csv')

    # Merge or Sample: We'll pick 2 random locations and assign them IDs from the interest file
    print(f"Datasets loaded. Filtering for Andhra Pradesh...")
except Exception as e:
    print(f"Error loading files: {e}")
    exit()

# 2. ROAD DISTANCE CALCULATION
def get_road_distance(coord1, coord2):
    url = f"http://router.project-osrm.org/route/v1/driving/{coord1[1]},{coord1[0]};{coord2[1]},{coord2[0]}?overview=false"
    try:
        r = requests.get(url, timeout=5)
        if r.status_code == 200:
            return r.json()['routes'][0]['distance'] / 1000.0
    except:
        pass
    return geodesic(coord1, coord2).kilometers * 1.4

# 3. GNN ARCHITECTURE
class DemographicGNN(nn.Module):
    def __init__(self, in_features, out_features):
        super(DemographicGNN, self).__init__()
        self.conv = nn.Linear(in_features, out_features)
    def forward(self, x, adj):
        return torch.relu(self.conv(torch.matmul(adj, x)))

# 4. EXECUTE MATCHING LOGIC
def process_matching():
    # Select 2 random users from our merged context
    # We sample from address and join with interests based on User_ID
    sampled_addr = addr_df.sample(n=2)
    sampled_ints = int_df.sample(n=2)
    
    users = []
    for i in range(2):
        row_addr = sampled_addr.iloc[i]
        row_int = sampled_ints.iloc[i]
        users.append({
            'name': row_int['User_ID'],
            'village': row_addr['officename'],
            'coords': (float(row_addr['latitude']), float(row_addr['longitude'])),
            'interests': [item.strip() for item in str(row_int['interests']).split(',')]
        })

    u1, u2 = users[0], users[1]
    interests_pool = ["sports", "movies", "games", "gym", "music", "coding", "travel"]
    
    common_interests = list(set(u1['interests']) & set(u2['interests']))
    actual_road_dist = get_road_distance(u1['coords'], u2['coords'])
    
    # CONSTRAINT
    THRESHOLD = 30.0
    has_edge = 1 if (actual_road_dist <= 500) else 0
    adj_matrix = torch.tensor([[1, has_edge], [has_edge, 1]], dtype=torch.float)
    
    # GNN Processing
    u1_vec = [1 if x in u1['interests'] else 0 for x in interests_pool]
    u2_vec = [1 if x in u2['interests'] else 0 for x in interests_pool]
    node_features = torch.tensor([u1_vec + list(u1['coords']), u2_vec + list(u2['coords'])], dtype=torch.float)

    gnn = DemographicGNN(9, 4)
    embeddings = gnn(node_features, adj_matrix)

    # --- OUTPUT SECTION ---
    print("\n" + "="*55)
    print("             USER PROFILES (FROM DATASETS)")
    print("="*55)
    for u in [u1, u2]:
        print(f"USER ID    : {u['name']}")
        print(f"VILLAGE    : {u['village']}, AP")
        print(f"INTERESTS  : {', '.join(u['interests'])}")
        print(f"GPS LOC    : {u['coords']}")
        print("-" * 30)

    print("\n" + "="*55)
    print("             MATCHING & SPATIAL RESULTS")
    print("="*55)
    print(f"SHARED INTERESTS     : {', '.join(common_interests) if common_interests else 'None'}")
    print(f"ACTUAL ROAD DISTANCE : {actual_road_dist:.2f} km")
    
    if has_edge == 1 and len(common_interests) > 0:
        status = "Status: Compatible & Reachable ✅"
    elif has_edge == 0:
        status = f"Status: Too Distant ❌ (Connection Blocked > 500km)"
    else:
        status = "Status: Incompatible ❌ (No Shared Interests)"
    
    print(f"DETERMINATION        : {status}")
    print("="*55)

if __name__ == "__main__":
    process_matching()

Datasets loaded. Filtering for Andhra Pradesh...

             USER PROFILES (FROM DATASETS)
USER ID    : User_6
VILLAGE    : Kothakottam B.O, AP
INTERESTS  : sports, music, games, coding
GPS LOC    : (17.46, 82.46)
------------------------------
USER ID    : User_7
VILLAGE    : Mulakaluru B.O, AP
INTERESTS  : games, music
GPS LOC    : (16.28, 80.07)
------------------------------

             MATCHING & SPATIAL RESULTS
SHARED INTERESTS     : music, games
ACTUAL ROAD DISTANCE : 337.76 km
DETERMINATION        : Status: Compatible & Reachable ✅
