In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
file_path = "ssh.csv"  # Replace with your file path
df = pd.read_csv(file_path)

# Display options for user input
def get_user_input(df):
    print("\nAvailable options:")

    # Options for location
    locations = df['Location'].unique()
    print(f"Locations: {', '.join(locations)}")
    location = input("Enter the location from the above options: ")

    # Options for accommodation type
    accommodation_types = df['Accommodation_Type'].unique()
    print(f"Accommodation Types: {', '.join(map(str, accommodation_types))}")
    accommodation_type = int(input("Enter the accommodation type (1-5 star): "))

    # Options for starting city
    starting_cities = df['Starting_City'].unique()
    print(f"Starting Cities: {', '.join(starting_cities)}")
    starting_city = input("Enter the starting city from the above options: ")

    # Options for vehicle
    vehicles = df['Vehicle'].unique()
    print(f"Vehicles: {', '.join(vehicles)}")
    vehicle = input("Enter the vehicle from the above options: ")

    # Get number of people and days
    number_of_people = int(input("Enter the number of people: "))
    number_of_days = int(input("Enter the number of days: "))

    return {
        'Location': location,
        'Accommodation_Type': accommodation_type,
        'Starting_City': starting_city,
        'Vehicle': vehicle,
        'Number_of_People': number_of_people,
        'Number_of_Days': number_of_days
    }

# Prepare features (X) and target (y)
X = df[['Location', 'Accommodation_Type', 'Starting_City', 'Vehicle', 'Number_of_People', 'Number_of_Days']]
y = df['Total_Cost']

# Convert categorical variables to numeric using one-hot encoding
X = pd.get_dummies(X, drop_first=True)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Model Evaluation:\nMean Squared Error: {mse:.2f}\nR^2 Score: {r2:.2f}")

# Get user input and predict the price
user_input = get_user_input(df)
user_df = pd.DataFrame([user_input])

# Convert user input into the same format as training data
user_df = pd.get_dummies(user_df, drop_first=True)

# Align columns of user_df with X
user_df = user_df.reindex(columns=X.columns, fill_value=0)

# Predict the price
predicted_price = model.predict(user_df)[0]
print(f"\nThe predicted price for your trip is: ₹{predicted_price:.2f}")


Model Evaluation:
Mean Squared Error: 125945473929.17
R^2 Score: 0.18

Available options:
Locations: Goa, Mumbai, Pune, Kolhapur, Nashik
Enter the location from the above options: Pune
Accommodation Types: 4-Star, 5-Star, 3-Star, 2-Star, 1-Star
Enter the accommodation type (1-5 star): 5
Starting Cities: Nashik, Goa, Kolhapur, Pune, Mumbai
Enter the starting city from the above options: Goa
Vehicles: Car, Aeroplane, Bus, Train
Enter the vehicle from the above options: Bus
Enter the number of people: 2
Enter the number of days: 3

The predicted price for your trip is: ₹36755.60
