In [58]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import warnings
warnings.filterwarnings('ignore')

In [59]:
try:
    df = pd.read_csv("trek_dataset_with_current_weather.csv")
except FileNotFoundError:
    print("Error: Dataset file 'trek_dataset_with_current_weather.csv' not found. Please provide the correct path.")
    exit()

In [60]:
df = df.drop(['link_AllTrails', 'image'], axis=1, errors='ignore')

In [61]:
df['Length (in km)'] = pd.to_numeric(
    df['Length'].astype(str).str.extract(r'(\d+\.?\d*)')[0],
    errors='coerce'
)

In [62]:

df['Est_time'] = pd.to_numeric(df['Est_time'], errors='coerce').fillna(df['Est_time'].mean())
df['number_of_reviews'] = df['number_of_reviews'].abs()
df['Average_rating'] = df['Average_rating'].fillna(df['Average_rating'].mean())

In [63]:
weather_cols = ['latitude', 'longitude', 'current_temperature', 'current_windspeed',
                'current_winddirection', 'current_weather_code']
for col in weather_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce').fillna(df[col].mean())

In [64]:
df[['City', 'State', 'Country']] = df['Location'].str.split(',', expand=True)
df['City'] = df['City'].str.strip()
df['State'] = df['State'].str.strip()
df['Country'] = df['Country'].str.strip()
df = df.drop('Location', axis=1)
season_mapping = {
    'Himachal Pradesh': 'April - June, September - November',
    'Uttarakhand': 'March - June, September - November',
    'Maharashtra': 'October - February',
    'Karnataka': 'October - February',
    'Kerala': 'September - March',
    'Jammu and Kashmir': 'May - October',
    'West Bengal': 'October - March',
    'Tamil Nadu': 'November - February',
    'Goa': 'November - February'
}
df['Best_Season'] = df['State'].map(season_mapping).fillna('All Year')

In [65]:
# Convert Tags to list for MultiLabelBinarizer
df['tags_list'] = df['Tags'].str.lower().str.split(', ').apply(lambda x: [tag.strip() for tag in x])

In [66]:
# Prepare features
mlb = MultiLabelBinarizer()
tags_encoded = mlb.fit_transform(df['tags_list'])
tags_df = pd.DataFrame(tags_encoded, columns=mlb.classes_)
ohe = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
cat_encoded = ohe.fit_transform(df[['Difficulty', 'Best_Season', 'State']])
cat_df = pd.DataFrame(cat_encoded, columns=ohe.get_feature_names_out(['Difficulty', 'Best_Season', 'State']))
numerical_cols = ['Length (in km)', 'current_windspeed', 'number_of_reviews',
                  'Est_time', 'current_temperature', 'current_weather_code']
X = pd.concat([df[numerical_cols], cat_df, tags_df], axis=1)
y = df['Trail_name']

In [67]:
# Train classifier and evaluate
trail_classifier = None
try:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    trail_classifier = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
    trail_classifier.fit(X_train, y_train)
    print("Classifier trained successfully.")
    
    # Evaluate classifier
    y_pred = trail_classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"\nClassifier Accuracy Score: {accuracy:.4f}")

except Exception as e:
    print(f"Error training or evaluating classifier: {str(e)}")
    exit()

Classifier trained successfully.

Classifier Accuracy Score: 0.1714


In [68]:
# Preprocess user input
def preprocess_user_input(user_input):
    input_data = pd.DataFrame({
        'Difficulty': [user_input['Difficulty']],
        'Best_Season': [user_input['Best_Season']],
        'State': [user_input['State']],
        'Length (in km)': [user_input['Length']],
        'current_windspeed': [user_input['windspeed']],
        'current_temperature': [user_input['temperature']],
        'number_of_reviews': [df['number_of_reviews'].mean()],
        'Est_time': [df['Est_time'].mean()],
        'current_weather_code': [df['current_weather_code'].mean()]
    })
    
    try:
        encoded_cat = pd.DataFrame(
            ohe.transform(input_data[['Difficulty', 'Best_Season', 'State']]),
            columns=ohe.get_feature_names_out(['Difficulty', 'Best_Season', 'State'])
        )
    except ValueError as e:
        return None, f"Error: Invalid categorical input - {str(e)}. Please use valid options."
    
    tags_encoded = pd.DataFrame(
        mlb.transform([user_input['tags_list']]),
        columns=mlb.classes_
    )
    
    X_input = pd.concat([input_data[['Length (in km)', 'current_windspeed', 'number_of_reviews',
                                     'Est_time', 'current_temperature', 'current_weather_code']],
                         encoded_cat, tags_encoded], axis=1)
    
    for col in X.columns:
        if col not in X_input.columns:
            X_input[col] = 0
    
    X_input = X_input[X.columns]
    return X_input, None

In [69]:
# Predict trek
def predict_trek(state, difficulty, length, temperature, windspeed, tags):
    if trail_classifier is None:
        print("Error: Classifier is not initialized. Please ensure the classifier is trained.")
        return
    
    user_input = {
        'State': state,
        'Best_Season': season_mapping.get(state, 'All Year'),
        'Difficulty': difficulty,
        'tags_list': tags if tags else ['hiking'],
        'Length': length,
        'windspeed': windspeed,
        'temperature': temperature
    }
    
    X_input, error = preprocess_user_input(user_input)
    if error:
        print(error)
        return
    
    try:
        proba = trail_classifier.predict_proba(X_input)[0]
        top_indices = np.argsort(proba)[-10:][::-1]
        trail_names = trail_classifier.classes_
        
        print("\nTop 5 predicted trails:")
        for idx in top_indices[:5]:
            trail = trail_names[idx]
            state_of_trail = df[df['Trail_name'] == trail]['State'].iloc[0] if not df[df['Trail_name'] == trail].empty else "Unknown"
            print(f"{trail} (State: {state_of_trail}, Probability: {proba[idx]:.4f})")
        
        for idx in top_indices:
            predicted_trail = trail_names[idx]
            trek_details = df[df['Trail_name'] == predicted_trail]
            if trek_details.empty:
                continue
            if trek_details.iloc[0]['State'] == state:
                max_proba = proba[idx]
                trek_details = trek_details[[
                    'Trail_name', 'Difficulty', 'Length (in km)', 'Best_Season',
                    'State', 'Tags', 'current_windspeed', 'current_temperature', 'description'
                ]].iloc[0]
                print("\nRecommended Trek:")
                print(f"Trek Name: {trek_details['Trail_name']}")
                print(f"Difficulty: {trek_details['Difficulty']}")
                print(f"Length: {trek_details['Length (in km)']} km")
                print(f"Best Season: {trek_details['Best_Season']}")
                print(f"State: {trek_details['State']}")
                print(f"Tags: {trek_details['Tags']}")
                print(f"Current Wind Speed: {trek_details['current_windspeed']} km/h")
                print(f"Current Temperature: {trek_details['current_temperature']} °C")
                print(f"Description: {trek_details['description']}")
                print(f"Confidence Score: {(max_proba * 100):.2f}%")
                return
        print(f"\nError: No trek found in {state}. Try adjusting inputs.")
    except AttributeError as e:
        print(f"Error during prediction: {str(e)}. Ensure the classifier is a valid RandomForestClassifier.")
    except Exception as e:
        print(f"Error during prediction: {str(e)}")


In [70]:











# Terminal-based input interface
def get_user_input():
    print("Enter trek preferences (type 'cancel' to exit at any prompt):")
    
    # State
    print(f"Available states: {', '.join(sorted(df['State'].unique()))}")
    state = input("Enter State: ").strip()
    if state.lower() == 'cancel':
        print("Input cancelled.")
        return None
    if state not in df['State'].unique():
        print(f"Error: '{state}' is not a valid state. Please choose from the available states.")
        return None
    
    # Difficulty
    print(f"Available difficulties: {', '.join(sorted(df['Difficulty'].unique()))}")
    difficulty = input("Enter Difficulty: ").strip()
    if difficulty.lower() == 'cancel':
        print("Input cancelled.")
        return None
    if difficulty not in df['Difficulty'].unique():
        print(f"Error: '{difficulty}' is not a valid difficulty. Please choose from the available difficulties.")
        return None
    
    # Length
    while True:
        length_input = input("Enter Length (km, e.g., 10.5): ").strip()
        if length_input.lower() == 'cancel':
            print("Input cancelled.")
            return None
        try:
            length = float(length_input)
            if length <= 0:
                print("Error: Length must be a positive number.")
                continue
            break
        except ValueError:
            print("Error: Please enter a valid number for length.")
    
    # Temperature
    while True:
        temp_input = input("Enter Temperature (°C, e.g., 20.0): ").strip()
        if temp_input.lower() == 'cancel':
            print("Input cancelled.")
            return None
        try:
            temperature = float(temp_input)
            break
        except ValueError:
            print("Error: Please enter a valid number for temperature.")
    
    # Wind Speed
    while True:
        wind_input = input("Enter Wind Speed (km/h, e.g., 5.0): ").strip()
        if wind_input.lower() == 'cancel':
            print("Input cancelled.")
            return None
        try:
            windspeed = float(wind_input)
            if windspeed < 0:
                print("Error: Wind speed cannot be negative.")
                continue
            break
        except ValueError:
            print("Error: Please enter a valid number for wind speed.")
    
    # Tags
    print(f"Available tags: {', '.join(sorted(mlb.classes_))}")
    print("Enter tags separated by commas (e.g., hiking, forest, views):")
    tags_input = input("Enter Tags: ").strip()
    if tags_input.lower() == 'cancel':
        print("Input cancelled.")
        return None
    tags = [tag.strip().lower() for tag in tags_input.split(',') if tag.strip()]
    invalid_tags = [tag for tag in tags if tag not in mlb.classes_]
    if invalid_tags:
        print(f"Error: Invalid tags {invalid_tags}. Please choose from available tags.")
        return None
    
    return {
        'state': state,
        'difficulty': difficulty,
        'length': length,
        'temperature': temperature,
        'windspeed': windspeed,
        'tags': tags
    }

# Main function to run the prediction
def main():
    if trail_classifier is None:
        print("Error: Classifier is not initialized. Exiting.")
        return
    
    while True:
        user_input = get_user_input()
        if user_input is None:
            break
        predict_trek(
            state=user_input['state'],
            difficulty=user_input['difficulty'],
            length=user_input['length'],
            temperature=user_input['temperature'],
            windspeed=user_input['windspeed'],
            tags=user_input['tags']
        )
        again = input("\nWould you like to try another prediction? (yes/no): ").strip().lower()
        if again != 'yes':
            print("Exiting.")
            break

if __name__ == "__main__":
    main()

Enter trek preferences (type 'cancel' to exit at any prompt):
Available states: Andhra Pradesh, Assam, Chandigarh, Delhi, Goa, Gujarat, Haryana, Himachal Pradesh, Jammu and Kashmir, Karnataka, Kerala, Ladakh, Maharashtra, Meghalaya, Nagaland, Puducherry, Punjab, Rajasthan, Sikkim, Tamil Nadu, Telangana, Uttar Pradesh, Uttarakhand, Uttrakhand, West Bengal
Available difficulties: Easy, Hard, Moderate
Available tags: backpacking, beach, bike touring, bird watching, bridge out, bugs, camping, cave, city walk, dog friendly, fee, fishing, forest, hiking, historic site, horseback riding, hot springs, kid friendly, lake, mountain biking, muddy, no dogs, no shade, off trail, off-road driving, overgrown, paddle sports, partially paved, paved, pub walk, river, road biking, rock climbing, rocky, running, scenic driving, scramble, skiing, snow, snowshoeing, stroller friendly, via ferrata, views, walking, washed out, waterfall, wheelchair friendly, wildflowers, wildlife
Enter tags separated by comma