In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, MultiLabelBinarizer
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics.pairwise import cosine_similarity
import joblib  # Consider using joblib for potentially better compression

In [2]:
# Load dataset
df = pd.read_csv('Travelchatbots.csv')

In [3]:
df.head()

Unnamed: 0,Provience,Location,Weather,Traveltype,Accommodation,Address,Perpersonbudget,Activities
0,Eastern,Ampara,Hot,City life,WHITE HOUSE-AMPARA,"NO 229, NEW TOWN, AMPARA",11500,"4WD Tours,Safaris,Visit National Parks,Nature ..."
1,Eastern,Ampara,Hot,City life,HIDEAWAY RESORT,"ULLE, POTTUVIL",22500,"4WD Tours,Safaris,Visit National Parks,Nature ..."
2,Eastern,Ampara,Hot,City life,SUPERSTAR TOURIST INN,"NO. 23/1, YARD ROAD, KALMUNAI 01",12400,"4WD Tours,Safaris,Visit National Parks,Nature ..."
3,Eastern,Ampara,Hot,City life,S.T .RAAJ RESTAURANT AND GUEST HOUSE,"OLD HOSPITAL ROAD,AKKARAIPATTU-09",35000,"4WD Tours,Safaris,Visit National Parks,Nature ..."
4,Eastern,Ampara,Hot,City life,K.G.A.ARIYAWAN REST,"K.P.66, 1ST AVANUE, NAWAGAMPURA,AMPARA.",25500,"4WD Tours,Safaris,Visit National Parks,Nature ..."


In [4]:
# Find unique values in the 'Weather' column and sort them
unique_weather_sorted = sorted(df['Weather'].unique())

# Display the unique values
print(unique_weather_sorted)


['Breeze', 'Cold', 'Extreme Cold', 'Extreme hot', 'Hot']


In [5]:
# Find unique values in the 'Weather' column and sort them
unique_Location_sorted = sorted(df['Location'].unique())

# Display the unique values
print(unique_Location_sorted)


['Ampara', 'Anuradhapura', 'Badulla', 'Batticaloa', 'Colombo', 'Galle', 'Gampaha', 'Hambantota', 'Jaffna', 'Kalutara', 'Kandy', 'Kegalle', 'Kilinochchi', 'Kurunegala', 'Mannar', 'Matale', 'Matara', 'Moneragala', 'Mullaitivu', 'Nuwara Eliya', 'Polonnaruwa', 'Puttalam', 'Ratnapura', 'Trincomalee', 'Vavuniya']


In [6]:
# Find unique values in the 'Weather' column and sort them
unique_Traveltype_sorted = sorted(df['Traveltype'].unique())

# Display the unique values
print(unique_Traveltype_sorted)


['Adventure and outdoors', 'City life', 'Cultural', 'Relaxing', 'Spiritual']


In [7]:
# Preprocessing
encoder = OneHotEncoder(sparse=False)
le_location = LabelEncoder()
le_accommodation = LabelEncoder()
le_address = LabelEncoder()
mlb_activities = MultiLabelBinarizer()

# Encode features
df['Location_encoded'] = le_location.fit_transform(df['Location'])
df['Accommodation_encoded'] = le_accommodation.fit_transform(df['Accommodation'])
df['Address_encoded'] = le_address.fit_transform(df['Address'])

# Split Activities by commas and apply MultiLabelBinarizer
df['Activities'] = df['Activities'].apply(lambda x: x.split(', '))
activities_encoded = mlb_activities.fit_transform(df['Activities'])

# Encode Weather, Traveltype, and Perpersonbudget
X_basic = df[['Weather', 'Traveltype', 'Perpersonbudget']]
X_encoded = encoder.fit_transform(X_basic)

# Combine basic encoded features
X_final = pd.DataFrame(X_encoded)

# Target Variables (Including Activities as an output)
y = pd.concat([df[['Location_encoded', 'Accommodation_encoded', 'Address_encoded']], pd.DataFrame(activities_encoded)], axis=1)

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_final, y, test_size=0.2, random_state=42)

# Random Forest for Multi-Output (consider using more trees or different parameters)
clf = MultiOutputClassifier(RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42))
clf.fit(X_train, y_train)

# Predict on the test set
y_pred = clf.predict(X_test)

# Calculate accuracy for each output
accuracies = [accuracy_score(y_test.iloc[:, i], y_pred[:, i]) for i in range(y_test.shape[1])]
average_accuracy = sum(accuracies) / len(accuracies)
print(f'Average Accuracy: {average_accuracy:.4f}')

# Save the model to a pickle file using joblib for better compression
joblib.dump(clf, 'C:/Users/Dell/Desktop/jupiter projects/Travel/travel_chatbot_model.pkl')
print("Model saved travel_chatbot_model.pkl")

# Recommendation Function
def recommend(user_input, num_recommendations=5):
    # Prepare user input (basic features)
    user_input_basic = [user_input[:3]]  # ['Hot', 'City life', 15000]

    # Encode user input
    user_input_basic_encoded = encoder.transform(user_input_basic)

    # Compute similarity between user input and dataset
    similarity_scores = cosine_similarity(user_input_basic_encoded, X_final).flatten()

    # Get top N most similar rows
    top_indices = similarity_scores.argsort()[-num_recommendations:][::-1]

    # Get recommendations
    recommendations = df.iloc[top_indices]

    # Decode the recommendations for display
    results = []
    for index, row in recommendations.iterrows():
        location_pred = le_location.inverse_transform([row['Location_encoded']])[0]
        accommodation_pred = le_accommodation.inverse_transform([row['Accommodation_encoded']])[0]
        address_pred = le_address.inverse_transform([row['Address_encoded']])[0]

        # Fix: Pass a proper 2D array for inverse_transform
        activity_encoded = y.iloc[index, 3:].values.reshape(1, -1)  # Convert to 2D array
        activities_pred = mlb_activities.inverse_transform(activity_encoded)[0]  # Decode activities

        results.append({
            'Location': location_pred,
            'Accommodation': accommodation_pred,
            'Address': address_pred,
            'Activities': ', '.join(activities_pred),
            'Similarity Score': similarity_scores[index]
        })

    return results

# Example user input (adjust as needed)
user_input = ['Hot', 'City life', 13100]

# Get 5 recommendations
recommendations = recommend(user_input, num_recommendations=5)

# Display results
for i, rec in enumerate(recommendations, 1):
    print(f"Recommendation {i}:")
    print(f"Location: {rec['Location']}")
    print(f"Accommodation: {rec['Accommodation']}")
    print(f"Address: {rec['Address']}")
    print(f"Activities: {rec['Activities']}")
   # print(f"Similarity Score: {rec['Similarity Score']}")
    print("\n")




Average Accuracy: 0.8901
Model saved travel_chatbot_model.pkl
Recommendation 1:
Location: Ampara
Accommodation: BALAJI OCEAN VIEW
Address: MAIN STREET,ARUGAMBAY,POTTUVIL.
Activities: 4WD Tours,Safaris,Visit National Parks,Nature and Wildlife Tours


Recommendation 2:
Location: Ampara
Accommodation: TSUNAMI HOTEL
Address: ARUGAMBAY,ULLE,POTHUWIL
Activities: 4WD Tours,Safaris,Visit National Parks,Nature and Wildlife Tours


Recommendation 3:
Location: Ampara
Accommodation: WHITE HOUSE-AMPARA
Address: NO 229, NEW TOWN, AMPARA
Activities: 4WD Tours,Safaris,Visit National Parks,Nature and Wildlife Tours


Recommendation 4:
Location: Puttalam
Accommodation: DINUDA RESORT
Address: PUDUKUDUERIPPU, SETHAWADI, KALPITIYA
Activities: Visit the Famous Munneswaram Hindu Temple,Visit the Famous Munneswaram Hindu Temple,Visit the Kite Center,Visit St. Anne’s Church,Go on a Safari in Wilpattu National Park,Explore Margarita Village


Recommendation 5:
Location: Puttalam
Accommodation: CLUB HOTEL DOLPHI

