In [3]:
import pandas as pd
import numpy as np
import spacy
from geopy.distance import geodesic
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
from lightfm import LightFM
from lightfm.data import Dataset

In [None]:
# Test in Python
from lightfm import LightFM
print("Success!")

In [None]:

# Load spaCy NLP model
nlp = spacy.load('en_core_web_sm')

# ======================
# 1. Sample Data Creation
# ======================

# Sample events data (20 entries)
events = pd.DataFrame({
    'event_id': range(1, 21),
    'name': [
        'Heritage Museum Tour', 'Local Food Festival', 'Jazz in the Park',
        'Artisan Market', 'Historical Walking Tour', 'Childrens Science Workshop',
        'Yoga Sunrise Session', 'Vintage Book Fair', 'Traditional Dance Show',
        'Street Art Exhibition', 'Farmers Market', 'Indie Music Night',
        'Photography Walk', 'Poetry Reading', 'Craft Beer Tasting',
        'Nature Trail Hike', 'Cooking Masterclass', 'Astronomy Night',
        'Pottery Workshop', 'Marathon'
    ],
    'category': ['museum', 'food', 'music', 'shopping', 'history', 'education',
                'fitness', 'shopping', 'culture', 'art', 'food', 'music',
                'art', 'literature', 'food', 'fitness', 'food', 'science',
                'art', 'sports'],
    'description': [
        'Guided tour of local history artifacts',
        'Taste regional specialties from 50+ vendors',
        'Open-air jazz performances every Saturday',
        'Handmade crafts from local artisans',
        'Explore 18th century architecture',
        'Interactive science experiments for kids',
        'Morning yoga session at city park',
        'Rare books and antique manuscripts',
        'Traditional folk dance performances',
        'Urban art exhibition in downtown',
        'Fresh produce and organic goods',
        'Local indie bands showcase',
        'Guided photography tour of city landmarks',
        'Poetry readings at cozy bookstore',
        'Craft beer sampling from microbreweries',
        'Guided hike through nature reserve',
        'Learn recipes from master chefs',
        'Stargazing with astronomy experts',
        'Hands-on clay pottery workshop',
        'Annual city marathon race'
    ],
    'latitude': [12.9716 + np.random.uniform(-0.01, 0.01) for _ in range(20)],
    'longitude': [77.5946 + np.random.uniform(-0.01, 0.01) for _ in range(20)]
})

# Sample user interactions (50 interactions)
users = pd.DataFrame({
    'user_id': np.random.choice([1,2,3,4,5], 50),
    'event_id': np.random.choice(events.event_id, 50),
    'rating': np.random.randint(1, 5, 50)
})


In [None]:

# ======================
# 2. NLP Feature Extraction
# ======================

# Process event descriptions
def get_embedding(text):
    return nlp(text).vector

events['embedding'] = events['description'].apply(get_embedding)


In [None]:

# ======================
# 3. Collaborative Filtering Model
# ======================

# Prepare dataset
dataset = Dataset()
dataset.fit(users['user_id'], users['event_id'])

# Build interactions matrix
(interactions, _) = dataset.build_interactions([(x[0], x[1], x[2]) 
                                for x in users[['user_id', 'event_id', 'rating']].values])

# Train model
model = LightFM(loss='warp')
model.fit(interactions, epochs=20)


In [None]:

# ======================
# 4. Hybrid Recommendation System
# ======================

def recommend_events(user_id, user_location, preferences, top_n=5, max_distance_km=10):
    # Collaborative Filtering Predictions
    all_event_ids = list(events.event_id)
    scores = model.predict(user_id, all_event_ids)
    
    # NLP Similarity
    pref_embedding = nlp(preferences).vector.reshape(1, -1)
    event_embeddings = np.array(events.embedding.tolist())
    similarities = cosine_similarity(pref_embedding, event_embeddings)[0]
    
    # Location Filtering
    distances = events.apply(lambda row: geodesic(user_location, 
                                (row['latitude'], row['longitude'])).km, axis=1)
    
    # Combine scores (50% CF, 30% NLP, 20% proximity)
    combined_scores = (
        0.5 * scores +
        0.3 * similarities +
        0.2 * (1 - distances/max_distance_km)
    )
    
    # Get top recommendations
    events['score'] = combined_scores
    return events.sort_values('score', ascending=False).head(top_n)


In [None]:

# ======================
# 5. Example Usage
# ======================

# Sample user parameters
user_location = (12.9716, 77.5946)  # Bangalore coordinates
user_preferences = "I love cultural activities and outdoor fitness events"

# Get recommendations
recommendations = recommend_events(
    user_id=1,
    user_location=user_location,
    preferences=user_preferences
)

print("\nTop 5 Recommendations:")
print(recommendations[['name', 'category', 'score']])

# ======================
# 6. Visualization
# ======================

import matplotlib.pyplot as plt

# Plot recommendations
plt.figure(figsize=(10, 6))
plt.barh(recommendations['name'], recommendations['score'], color='skyblue')
plt.xlabel('Recommendation Score')
plt.title('Top Recommended Events')
plt.gca().invert_yaxis()
plt.show()