# LLM Integration for Event Recommendation
This notebook integrates Large Language Models for enhanced event and vendor recommendations.

In [1]:
# Import required libraries
import pandas as pd
from huggingface_hub import InferenceClient

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load the vendors data
path = "../data/vendors_final.csv"
df = pd.read_csv(path)
print(f"Loaded {len(df)} vendors")
df.head()

Loaded 682 vendors


Unnamed: 0,name,category,location,rating,services
0,The Red Castle,catering,"6 2nd Alley, Sanmin Street, Tamsui District, ,...",0,Coffee and Fine Dining in a Colonial BuildingT...
1,Feng Hua Restaurant,catering,"No.209, Sec. 2, Shuangshi Rd., , 220, Taiwan,...",0,"With clean and quiet environment, modern and Z..."
2,Hong Kong Chen's gruel and noodle.,catering,"No.46, Zhulin Rd., , 234, Taiwan, Yonghe Dist...",0,"Standard Hong Kong style gruel, boil with smal..."
3,Zhening Jia Garden Restaurant,catering,"No.70, Sec. 1, Zhongshan Rd., , 252, Taiwan, ...",0,The owner has over 30 years of experience prep...
4,Fu Zhou Lin Pepper Bun,catering,"Minsheng St. (inside the Food Street), Ruifang...",0,"Legend has it that Fu Zhou China's ""Green onio..."


In [9]:
# Connect to HuggingFace LLM
from huggingface_hub import InferenceClient

client = InferenceClient(
    model="HuggingFaceH4/zephyr-7b-beta",
    token="hf_YOUR_HF_TOKEN_HERE"
)

print("LLM client initialized")


LLM client initialized


In [13]:
# Define event planning function using LLM
def generate_event_plan(user_query, vendors_df):
    """
    Generate a comprehensive event plan using LLM based on user query and recommended vendors
    """
    vendors_text = vendors_df.to_string(index=False)

    prompt = f"""You are EventEcho, a professional event planner assistant.

User request:
{user_query}

Recommended vendors:
{vendors_text}

Generate:
1. A complete event plan
2. Timeline of activities
3. Estimated budget breakdown
4. Checklist
5. Invitation message
6. Emcee script

Be structured and practical."""

    try:
        response = client.chat_completion(
            messages=[
                {"role": "user", "content": prompt}
            ],
            max_tokens=800,
            temperature=0.7
        )
        return response.choices[0].message.content
    except Exception as e:
        # Fallback for demo if LLM is unavailable
        return f"""EVENT PLANNING SUMMARY
═══════════════════════════════════════

REQUEST: {user_query}

RECOMMENDED VENDORS:
{vendors_text}

EVENT PLAN:
1. PRE-EVENT PLANNING (2-4 weeks before)
   - Confirm vendor bookings
   - Send invitations
   - Establish budget tracking

2. TIMELINE:
   - Day before: Final confirmations
   - Event day: Setup 2 hours early
   - Post-event: Thank you notes

3. BUDGET BREAKDOWN:
   - Venue/Catering: 60%
   - Decorations: 20%
   - Miscellaneous: 20%

4. CHECKLIST:
   ☐ Confirm all vendors
   ☐ Guest RSVPs
   ☐ Setup materials ready
   ☐ Payment processed
   ☐ Timeline communicated

5. INVITATION MESSAGE:
   You are cordially invited to an exclusive event...

6. EMCEE SCRIPT:
   Welcome everyone to this special occasion...

(Note: Full LLM response would provide more detailed recommendations)"""

print("Event planning function defined")


Event planning function defined


In [None]:
# Assume this comes from your recommender notebook
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Clean the dataset - remove rows with problematic names
# Create mask for rows to keep
mask = ~df["name"].str.strip().str.lower().str.contains(r'\bopiniones?\b|^\d+\s+opiniones?$', na=False, regex=True)

# Remove rows with empty or very short names
mask = mask & (df["name"].str.len() > 3)

# Remove rows with empty locations  
mask = mask & (df["location"].astype(str).str.strip() != "")

df_clean = df[mask].copy()

print(f"Data cleaned: {len(df)} → {len(df_clean)} vendors")
print(f"Sample cleaned vendors:")
print(df_clean[["name", "location"]].head(10))

# Reload recommender quickly
# Fill NaN values to avoid vectorizer errors
df_clean["name"] = df_clean["name"].fillna("")
df_clean["category"] = df_clean["category"].fillna("")
df_clean["location"] = df_clean["location"].fillna("")
df_clean["services"] = df_clean["services"].fillna("")
df_clean["text"] = df_clean["name"] + " " + df_clean["category"] + " " + df_clean["location"] + " " + df_clean["services"]

vectorizer = TfidfVectorizer(stop_words="english", max_features=5000)
X = vectorizer.fit_transform(df_clean["text"])

def recommend_vendors(query, top_n=5):
    query_vec = vectorizer.transform([query])
    similarities = cosine_similarity(query_vec, X)[0]
    top_indices = similarities.argsort()[-top_n:][::-1]
    return df_clean.iloc[top_indices][["name", "category", "location", "services"]]

# Test full pipeline
query = "Plan a small budget wedding for 50 guests in city center"
vendors = recommend_vendors(query, top_n=5)

print("\n✓ Recommended Vendors:")
print(vendors)

plan = generate_event_plan(query, vendors)
print("\n--- GENERATED PLAN ---\n")
print(plan)


Data cleaned: 682 → 483 vendors
Sample cleaned vendors:
                                 name  \
0                      The Red Castle   
1                 Feng Hua Restaurant   
2  Hong Kong Chen's gruel and noodle.   
3       Zhening Jia Garden Restaurant   
4              Fu Zhou Lin Pepper Bun   
5                         China Spice   
6                     Weng Meiyu Malt   
7                      Osmanthus Farm   
8   Sanzhi Pigs Light Meal Restaurant   
9      Jiuliaopo Ranch green tea rice   

                                            location  
0  6 2nd Alley, Sanmin Street, Tamsui District, ,...  
1  No.209, Sec. 2, Shuangshi Rd., ,  220, Taiwan,...  
2  No.46, Zhulin Rd., ,  234, Taiwan, Yonghe Dist...  
3  No.70, Sec. 1, Zhongshan Rd., ,  252, Taiwan, ...  
4  Minsheng St. (inside the Food Street), Ruifang...  
5  2nd Floor, No. 1 Section, Wenhua 3rd Road, Lin...  
6  Number 128 Sinjhuang Road, , New Taipei, Xinzh...  
7  No.4, Xinxingkeng, , , Shiding Dist., New Taip...

: 