# AI Travel Planner - GNN Research Notebook

This notebook is for researching and developing the Graph Neural Network (GNN) component of the AI Travel Planner.

## Objectives
1. Build user preference graphs
2. Create POI (Point of Interest) relationship graphs
3. Implement GNN-based recommendation algorithms
4. Test personalization performance

## Setup

In [None]:
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, GraphConv
from torch_geometric.data import Data

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("✅ Libraries imported successfully")
print(f"PyTorch version: {torch.__version__}")

## 1. Generate Sample Data

Let's create synthetic user and POI data for our research.

In [None]:
# Generate sample users
np.random.seed(42)

n_users = 1000
n_pois = 500

# User features
users_data = {
    'user_id': range(n_users),
    'age': np.random.normal(35, 12, n_users).clip(18, 80),
    'budget_preference': np.random.choice(['budget', 'mid-range', 'luxury'], n_users, p=[0.3, 0.5, 0.2]),
    'travel_style': np.random.choice(['adventure', 'cultural', 'relaxation', 'business'], n_users, p=[0.25, 0.3, 0.3, 0.15]),
    'group_size': np.random.poisson(2, n_users).clip(1, 8),
    'travel_frequency': np.random.poisson(3, n_users).clip(1, 12),
}

users_df = pd.DataFrame(users_data)
print(f"Generated {len(users_df)} users")
print(users_df.head())

In [None]:
# Generate sample POIs (Points of Interest)
poi_types = ['hotel', 'restaurant', 'attraction', 'activity', 'transport']
destinations = ['Paris', 'Tokyo', 'New York', 'London', 'Bangkok', 'Rome', 'Barcelona', 'Mumbai']

pois_data = {
    'poi_id': range(n_pois),
    'type': np.random.choice(poi_types, n_pois),
    'destination': np.random.choice(destinations, n_pois),
    'price_level': np.random.choice(['$', '$$', '$$$', '$$$$'], n_pois, p=[0.2, 0.4, 0.3, 0.1]),
    'rating': np.random.normal(4.0, 0.8, n_pois).clip(1.0, 5.0),
    'popularity_score': np.random.exponential(0.3, n_pois).clip(0, 1),
}

pois_df = pd.DataFrame(pois_data)
print(f"Generated {len(pois_df)} POIs")
print(pois_df.head())

## 2. Build User-User Similarity Graph

Create a graph where users are connected based on similarity in preferences.

In [None]:
# Create user feature vectors
def encode_categorical(df, column, categories):
    """One-hot encode categorical variables"""
    encoded = pd.get_dummies(df[column], prefix=column)
    # Ensure all categories are present
    for cat in categories:
        col_name = f"{column}_{cat}"
        if col_name not in encoded.columns:
            encoded[col_name] = 0
    return encoded

# Prepare user features for similarity calculation
user_features = users_df[['age', 'group_size', 'travel_frequency']].copy()

# Encode categorical features
budget_encoded = encode_categorical(users_df, 'budget_preference', ['budget', 'mid-range', 'luxury'])
style_encoded = encode_categorical(users_df, 'travel_style', ['adventure', 'cultural', 'relaxation', 'business'])

# Combine all features
user_features = pd.concat([user_features, budget_encoded, style_encoded], axis=1)

# Normalize features
scaler = StandardScaler()
user_features_scaled = scaler.fit_transform(user_features)

print(f"User feature matrix shape: {user_features_scaled.shape}")
print(f"Feature names: {list(user_features.columns)}")

In [None]:
# Calculate user similarity matrix
user_similarity = cosine_similarity(user_features_scaled)

# Create user graph (connect users with similarity > threshold)
similarity_threshold = 0.7
user_graph = nx.Graph()

# Add all users as nodes
for i, row in users_df.iterrows():
    user_graph.add_node(i, **row.to_dict())

# Add edges for similar users
edges_added = 0
for i in range(n_users):
    for j in range(i+1, n_users):
        if user_similarity[i, j] > similarity_threshold:
            user_graph.add_edge(i, j, weight=user_similarity[i, j])
            edges_added += 1

print(f"User graph created:")
print(f"- Nodes: {user_graph.number_of_nodes()}")
print(f"- Edges: {user_graph.number_of_edges()}")
print(f"- Average degree: {2 * user_graph.number_of_edges() / user_graph.number_of_nodes():.2f}")

## 3. Visualize User Graph

In [None]:
# Visualize a subset of the user graph
plt.figure(figsize=(12, 8))

# Take a subgraph for visualization (too many nodes to visualize all)
subgraph_nodes = list(user_graph.nodes())[:50]
subgraph = user_graph.subgraph(subgraph_nodes)

# Create layout
pos = nx.spring_layout(subgraph, k=1, iterations=50)

# Color nodes by travel style
travel_styles = [users_df.loc[node, 'travel_style'] for node in subgraph.nodes()]
style_colors = {'adventure': 'red', 'cultural': 'blue', 'relaxation': 'green', 'business': 'orange'}
node_colors = [style_colors[style] for style in travel_styles]

# Draw graph
nx.draw(subgraph, pos, 
        node_color=node_colors, 
        node_size=100, 
        alpha=0.7,
        with_labels=False,
        edge_color='gray',
        width=0.5)

# Add legend
legend_elements = [plt.Line2D([0], [0], marker='o', color='w', 
                             markerfacecolor=color, markersize=10, label=style)
                  for style, color in style_colors.items()]
plt.legend(handles=legend_elements, title='Travel Style')

plt.title('User Similarity Graph (Sample of 50 users)\nNodes colored by travel style')
plt.axis('off')
plt.tight_layout()
plt.show()

## 4. Build POI Relationship Graph

Create a graph where POIs are connected based on co-occurrence in trips or similarity.

In [None]:
# Create POI feature vectors
poi_features = pois_df[['rating', 'popularity_score']].copy()

# Encode categorical features
type_encoded = encode_categorical(pois_df, 'type', poi_types)
dest_encoded = encode_categorical(pois_df, 'destination', destinations)
price_encoded = encode_categorical(pois_df, 'price_level', ['$', '$$', '$$$', '$$$$'])

# Combine features
poi_features = pd.concat([poi_features, type_encoded, dest_encoded, price_encoded], axis=1)

# Normalize
poi_features_scaled = scaler.fit_transform(poi_features)

print(f"POI feature matrix shape: {poi_features_scaled.shape}")

In [None]:
# Calculate POI similarity matrix
poi_similarity = cosine_similarity(poi_features_scaled)

# Create POI graph
poi_threshold = 0.8
poi_graph = nx.Graph()

# Add POI nodes
for i, row in pois_df.iterrows():
    poi_graph.add_node(i, **row.to_dict())

# Add edges for similar POIs (within same destination)
for i in range(n_pois):
    for j in range(i+1, n_pois):
        # Only connect POIs in the same destination
        if (pois_df.iloc[i]['destination'] == pois_df.iloc[j]['destination'] and 
            poi_similarity[i, j] > poi_threshold):
            poi_graph.add_edge(i, j, weight=poi_similarity[i, j])

print(f"POI graph created:")
print(f"- Nodes: {poi_graph.number_of_nodes()}")
print(f"- Edges: {poi_graph.number_of_edges()}")
print(f"- Average degree: {2 * poi_graph.number_of_edges() / poi_graph.number_of_nodes():.2f}")

## 5. Simple GNN Implementation

Implement a basic Graph Convolutional Network for user preference prediction.

In [None]:
class SimpleGCN(nn.Module):
    """Simple Graph Convolutional Network for recommendation"""
    
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers=2):
        super(SimpleGCN, self).__init__()
        
        self.num_layers = num_layers
        self.convs = nn.ModuleList()
        
        # First layer
        self.convs.append(GCNConv(input_dim, hidden_dim))
        
        # Hidden layers
        for _ in range(num_layers - 2):
            self.convs.append(GCNConv(hidden_dim, hidden_dim))
        
        # Output layer
        self.convs.append(GCNConv(hidden_dim, output_dim))
        
        self.dropout = nn.Dropout(0.2)
        
    def forward(self, x, edge_index):
        for i, conv in enumerate(self.convs):
            x = conv(x, edge_index)
            if i < len(self.convs) - 1:  # Don't apply activation to final layer
                x = F.relu(x)
                x = self.dropout(x)
        return x

print("✅ GCN model defined")

In [None]:
# Convert NetworkX graph to PyTorch Geometric format
def networkx_to_pytorch_geometric(graph, node_features):
    """Convert NetworkX graph to PyTorch Geometric data format"""
    
    # Get edge list
    edge_list = list(graph.edges())
    if not edge_list:
        # If no edges, create empty tensor
        edge_index = torch.empty((2, 0), dtype=torch.long)
    else:
        edge_index = torch.tensor(edge_list, dtype=torch.long).t().contiguous()
    
    # Convert features to tensor
    x = torch.tensor(node_features, dtype=torch.float)
    
    return Data(x=x, edge_index=edge_index)

# Convert user graph to PyTorch Geometric format
user_data = networkx_to_pytorch_geometric(user_graph, user_features_scaled)

print(f"User graph data:")
print(f"- Node features shape: {user_data.x.shape}")
print(f"- Edge index shape: {user_data.edge_index.shape}")
print(f"- Number of edges: {user_data.edge_index.shape[1]}")

## 6. Generate Synthetic Interaction Data

Create user-POI interactions for training our recommendation system.

In [None]:
# Generate synthetic user-POI interactions
np.random.seed(42)

# Create interaction matrix (users x POIs)
n_interactions = 5000
interactions = []

for _ in range(n_interactions):
    user_id = np.random.randint(0, n_users)
    poi_id = np.random.randint(0, n_pois)
    
    # Generate rating based on user-POI compatibility
    user_budget = users_df.iloc[user_id]['budget_preference']
    poi_price = pois_df.iloc[poi_id]['price_level']
    
    # Simple compatibility scoring
    budget_match = {
        ('budget', '$'): 1.0, ('budget', '$$'): 0.5, ('budget', '$$$'): 0.2, ('budget', '$$$$'): 0.1,
        ('mid-range', '$'): 0.7, ('mid-range', '$$'): 1.0, ('mid-range', '$$$'): 0.8, ('mid-range', '$$$$'): 0.3,
        ('luxury', '$'): 0.3, ('luxury', '$$'): 0.5, ('luxury', '$$$'): 0.8, ('luxury', '$$$$'): 1.0
    }
    
    base_compatibility = budget_match.get((user_budget, poi_price), 0.5)
    
    # Add some randomness
    rating = np.random.normal(base_compatibility * 5, 1.0)
    rating = np.clip(rating, 1, 5)
    
    interactions.append({
        'user_id': user_id,
        'poi_id': poi_id,
        'rating': rating,
        'interaction_type': np.random.choice(['view', 'book', 'review'], p=[0.6, 0.3, 0.1])
    })

interactions_df = pd.DataFrame(interactions)
print(f"Generated {len(interactions_df)} interactions")
print(interactions_df.head())
print(f"\nRating distribution:")
print(interactions_df['rating'].describe())

## 7. Recommendation Algorithm Testing

In [None]:
# Simple collaborative filtering baseline
def collaborative_filtering_recommendation(user_id, interactions_df, top_k=5):
    """Simple collaborative filtering recommendation"""
    
    # Get user's interactions
    user_interactions = interactions_df[interactions_df['user_id'] == user_id]
    
    if len(user_interactions) == 0:
        # Cold start - return popular items
        popular_pois = interactions_df.groupby('poi_id')['rating'].mean().sort_values(ascending=False)
        return popular_pois.head(top_k).index.tolist()
    
    # Find similar users based on common interactions
    user_pois = set(user_interactions['poi_id'].tolist())
    
    similar_users = []
    for other_user in interactions_df['user_id'].unique():
        if other_user == user_id:
            continue
        
        other_interactions = interactions_df[interactions_df['user_id'] == other_user]
        other_pois = set(other_interactions['poi_id'].tolist())
        
        # Calculate Jaccard similarity
        intersection = len(user_pois.intersection(other_pois))
        union = len(user_pois.union(other_pois))
        
        if union > 0:
            similarity = intersection / union
            if similarity > 0.1:  # Threshold for similarity
                similar_users.append((other_user, similarity))
    
    # Sort by similarity
    similar_users.sort(key=lambda x: x[1], reverse=True)
    
    # Get recommendations from similar users
    recommendations = {}
    
    for similar_user, similarity in similar_users[:10]:  # Top 10 similar users
        similar_interactions = interactions_df[interactions_df['user_id'] == similar_user]
        
        for _, interaction in similar_interactions.iterrows():
            poi_id = interaction['poi_id']
            rating = interaction['rating']
            
            # Skip POIs user has already interacted with
            if poi_id in user_pois:
                continue
            
            # Weight by similarity and rating
            score = similarity * rating
            
            if poi_id in recommendations:
                recommendations[poi_id] += score
            else:
                recommendations[poi_id] = score
    
    # Sort recommendations
    sorted_recommendations = sorted(recommendations.items(), key=lambda x: x[1], reverse=True)
    
    return [poi_id for poi_id, score in sorted_recommendations[:top_k]]

# Test recommendation for a sample user
test_user_id = 0
recommendations = collaborative_filtering_recommendation(test_user_id, interactions_df)

print(f"Recommendations for user {test_user_id}:")
for i, poi_id in enumerate(recommendations, 1):
    poi_info = pois_df.iloc[poi_id]
    print(f"{i}. POI {poi_id}: {poi_info['type']} in {poi_info['destination']} "
          f"(Rating: {poi_info['rating']:.1f}, Price: {poi_info['price_level']})")

## 8. Evaluation Metrics

In [None]:
# Simple evaluation of recommendation system
def evaluate_recommendations(interactions_df, n_test_users=50):
    """Evaluate recommendation system using simple metrics"""
    
    test_users = np.random.choice(interactions_df['user_id'].unique(), n_test_users, replace=False)
    
    total_precision = 0
    total_recall = 0
    valid_users = 0
    
    for user_id in test_users:
        user_interactions = interactions_df[interactions_df['user_id'] == user_id]
        
        if len(user_interactions) < 3:  # Need minimum interactions
            continue
        
        # Split into train/test
        train_interactions = user_interactions.iloc[:-2]  # All but last 2
        test_interactions = user_interactions.iloc[-2:]   # Last 2 as test
        
        # Get recommendations based on training data
        train_df = interactions_df[interactions_df['user_id'] != user_id]
        train_df = pd.concat([train_df, train_interactions])
        
        recommendations = collaborative_filtering_recommendation(user_id, train_df, top_k=10)
        
        # Calculate metrics
        test_pois = set(test_interactions['poi_id'].tolist())
        recommended_pois = set(recommendations)
        
        if len(recommended_pois) > 0:
            precision = len(test_pois.intersection(recommended_pois)) / len(recommended_pois)
            total_precision += precision
        
        if len(test_pois) > 0:
            recall = len(test_pois.intersection(recommended_pois)) / len(test_pois)
            total_recall += recall
        
        valid_users += 1
    
    if valid_users > 0:
        avg_precision = total_precision / valid_users
        avg_recall = total_recall / valid_users
        f1_score = 2 * (avg_precision * avg_recall) / (avg_precision + avg_recall) if (avg_precision + avg_recall) > 0 else 0
        
        return {
            'precision': avg_precision,
            'recall': avg_recall,
            'f1_score': f1_score,
            'evaluated_users': valid_users
        }
    
    return None

# Evaluate the recommendation system
evaluation_results = evaluate_recommendations(interactions_df)

if evaluation_results:
    print("Recommendation System Evaluation:")
    print(f"- Precision: {evaluation_results['precision']:.3f}")
    print(f"- Recall: {evaluation_results['recall']:.3f}")
    print(f"- F1 Score: {evaluation_results['f1_score']:.3f}")
    print(f"- Users evaluated: {evaluation_results['evaluated_users']}")
else:
    print("Unable to evaluate - insufficient data")

## 9. Next Steps for GNN Implementation

This notebook provides a foundation for GNN-based travel recommendations. Next steps include:

1. **Enhanced GNN Architecture**:
   - Implement attention mechanisms
   - Add graph pooling layers
   - Multi-layer graph convolutions

2. **Real Data Integration**:
   - Connect to actual travel APIs
   - Process real user interaction data
   - Incorporate temporal dynamics

3. **Advanced Features**:
   - Heterogeneous graphs (users, POIs, destinations)
   - Dynamic graph updates
   - Multi-modal embeddings

4. **Production Integration**:
   - Model serving infrastructure
   - Real-time inference
   - A/B testing framework

In [None]:
# Save results for use in the main application
results = {
    'user_similarity_matrix': user_similarity,
    'poi_similarity_matrix': poi_similarity,
    'user_features': user_features_scaled,
    'poi_features': poi_features_scaled,
    'interactions': interactions_df,
    'evaluation_metrics': evaluation_results
}

print("\n✅ GNN Research Notebook Complete!")
print("\nKey findings:")
print(f"- Built user similarity graph with {user_graph.number_of_edges()} connections")
print(f"- Built POI relationship graph with {poi_graph.number_of_edges()} connections")
print(f"- Generated {len(interactions_df)} user-POI interactions")
if evaluation_results:
    print(f"- Achieved F1 score of {evaluation_results['f1_score']:.3f} on recommendation task")
print("\nNext: Integrate these findings into the main AI Travel Planner application!")