In [1]:
# Step 1: Import libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer






In [2]:
import pandas as pd

# Step 1: Load CSVs
df_full = pd.read_csv(r"C:\Users\DELL\OneDrive\Desktop\SIH\iterative1.csv")  # full CSV with budget
df_budget = pd.read_csv(r"C:\Users\DELL\OneDrive\Desktop\SIH\iterative.csv")  # with lat/long/desc

# Step 2: Merge on 'poi_id'
df_master = pd.merge(
    df_full, 
    df_budget[['poi_id', 'latitude', 'longitude', 'description', 'opening_hours']], 
    on='poi_id', 
    how='left'
)

# Step 3: Check merged data
print(df_master.head())
print("\nMaster CSV columns:", df_master.columns.tolist())

# Step 4: Save master CSV for later use
df_master.to_csv(r"C:\Users\DELL\OneDrive\Desktop\SIH\master_itinerary.csv", index=False)


  poi_id            name     category  avg_visit_time_hrs  entry_fee_inr  \
0   P001    Hundru Falls    Waterfall                   2             20   
1   P002     Jonha Falls    Waterfall                   2             10   
2   P003    Dassam Falls    Waterfall                   2             10   
3   P004  Patratu Valley       Valley                   3              0   
4   P005        Netarhat  HillStation                   5              0   

      best_season  popularity_score                          tags  budget_inr  \
0  Monsoon-Winter              0.90  nature,adventure,photography         120   
1  Monsoon-Winter              0.85              nature,adventure         110   
2  Monsoon-Winter              0.88              nature,adventure         110   
3   Winter-Summer              0.92   nature,roadtrip,photography         300   
4   Winter-Summer              0.95   nature,hillstation,romantic         300   

   latitude  longitude                           descrip

In [3]:
df_master

Unnamed: 0,poi_id,name,category,avg_visit_time_hrs,entry_fee_inr,best_season,popularity_score,tags,budget_inr,latitude,longitude,description,opening_hours
0,P001,Hundru Falls,Waterfall,2,20,Monsoon-Winter,0.9,"nature,adventure,photography",120,23.37,85.44,Famous waterfall near Ranchi,8AM-5PM
1,P002,Jonha Falls,Waterfall,2,10,Monsoon-Winter,0.85,"nature,adventure",110,23.32,85.58,Popular hanging valley falls,8AM-5PM
2,P003,Dassam Falls,Waterfall,2,10,Monsoon-Winter,0.88,"nature,adventure",110,23.25,85.68,Known for scenic beauty,8AM-5PM
3,P004,Patratu Valley,Valley,3,0,Winter-Summer,0.92,"nature,roadtrip,photography",300,22.65,85.5,Cluster of five waterfalls,8AM-5PM
4,P005,Netarhat,HillStation,5,0,Winter-Summer,0.95,"nature,hillstation,romantic",300,23.47,84.27,"Queen of Chotanagpur, scenic sunsets",Open 24H
5,P006,Mirchaiya Waterfalls,Waterfall,2,0,Monsoon-Winter,0.8,"nature,hidden,adventure",100,23.67,85.28,Scenic valley and lake,Open 24H
6,P007,Betla National Park,Wildlife,6,50,Winter,0.88,"wildlife,adventure,eco",650,23.72,84.12,Wildlife safari with elephants and tigers,6AM-6PM
7,P008,Palamu Fort,Historical,2,25,Winter,0.78,"historical,heritage",125,23.9,84.1,17th-century historical fort,9AM-6PM
8,P009,Ranchi Lake,Lake,1,0,All Seasons,0.82,"lake,boating,photography",100,23.65,84.9,Known as mini-England,Open 24H
9,P010,Nakshatra Van,Park,1,0,All Seasons,0.75,"family,park,nature",100,23.38,85.33,Artificial garden with sculptures,9AM-6PM


In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load master CSV
df = pd.read_csv(r"C:\Users\DELL\OneDrive\Desktop\SIH\master_itinerary.csv")

# Combine tags + description
df['text_features'] = df['tags'].fillna('') + ' ' + df.get('description','').fillna('')

# TF-IDF vectorizer
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['text_features'])

# User inputs
user_budget = int(input("💰 Enter your total budget (INR): "))
total_days = int(input("🗓️ Enter number of days you want to travel: "))
preferred_category = input("🎯 Enter preferred category (nature, heritage, religious, wildlife, urban, all): ").lower()
user_tags = input("✏️ Enter your interests/tags (e.g., adventure, photography, spiritual): ").lower()
daily_time_limit = 8  # hours per day

# Filter by budget + category
filtered_df = df[df['budget_inr'] <= user_budget]
if preferred_category != "all":
    filtered_df = filtered_df[filtered_df['category'].str.lower() == preferred_category]

# Check if filtered_df is empty
if filtered_df.empty:
    print("\n⚠️ No places found matching your budget and category preferences.")
    print("Try increasing your budget or selecting 'all' categories.")
    exit()

# Compute cosine similarity for filtered places
user_vector = tfidf.transform([user_tags])
cos_sim = cosine_similarity(user_vector, tfidf_matrix[filtered_df.index])
filtered_df = filtered_df.copy()
filtered_df['similarity_score'] = cos_sim.flatten()

# Sort by similarity + popularity
filtered_df = filtered_df.sort_values(by=['similarity_score', 'popularity_score'], ascending=False)

# Build itinerary day by day
itinerary = {}
remaining_budget = user_budget
used_places = set()

for day in range(1, total_days + 1):
    day_plan = []
    time_left = daily_time_limit

    for _, row in filtered_df.iterrows():
        if row['poi_id'] in used_places:
            continue
        if row['avg_visit_time_hrs'] <= time_left and row['budget_inr'] <= remaining_budget:
            day_plan.append(row)
            used_places.add(row['poi_id'])
            time_left -= row['avg_visit_time_hrs']
            remaining_budget -= row['budget_inr']

    itinerary[f"Day {day}"] = day_plan

# Display itinerary
for d, places in itinerary.items():
    print(f"\n📍 {d} Itinerary:")
    if places:
        for _, p in pd.DataFrame(places).iterrows():
            print(f"- {p['name']} ({p['category']}) | "
                  f"Time: {p['avg_visit_time_hrs']} hrs | "
                  f"Budget: {p['budget_inr']} INR | "
                  f"Best Season: {p.get('best_season','All Seasons')} | "
                  f"Match Score: {p['similarity_score']:.2f}")
    else:
        print("No places assigned for this day.")

print(f"\n✅ Remaining Budget after trip: {remaining_budget} INR")


💰 Enter your total budget (INR):  6000
🗓️ Enter number of days you want to travel:  6
🎯 Enter preferred category (nature, heritage, religious, wildlife, urban, all):  all
✏️ Enter your interests/tags (e.g., adventure, photography, spiritual):  adventure, photography



📍 Day 1 Itinerary:
- Hundru Falls (Waterfall) | Time: 2 hrs | Budget: 120 INR | Best Season: Monsoon-Winter | Match Score: 0.45
- Dassam Falls (Waterfall) | Time: 2 hrs | Budget: 110 INR | Best Season: Monsoon-Winter | Match Score: 0.30
- Mirchaiya Waterfalls (Waterfall) | Time: 2 hrs | Budget: 100 INR | Best Season: Monsoon-Winter | Match Score: 0.27
- Jonha Falls (Waterfall) | Time: 2 hrs | Budget: 110 INR | Best Season: Monsoon-Winter | Match Score: 0.25

📍 Day 2 Itinerary:
- Dalma Wildlife Sanctuary (Wildlife) | Time: 5 hrs | Budget: 340 INR | Best Season: Winter | Match Score: 0.22
- Dimna Lake (Lake) | Time: 3 hrs | Budget: 300 INR | Best Season: Winter-Summer | Match Score: 0.20

📍 Day 3 Itinerary:
- Betla National Park (Wildlife) | Time: 6 hrs | Budget: 650 INR | Best Season: Winter | Match Score: 0.20
- Ranchi Lake (Lake) | Time: 1 hrs | Budget: 100 INR | Best Season: All Seasons | Match Score: 0.17
- Pahari Mandir (Religious) | Time: 1 hrs | Budget: 100 INR | Best Season: Al