In [None]:
import pandas as pd
import ast
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import joblib

# Load the dataset
df = pd.read_csv('travel_dataset.csv')

# Convert stringified lists into actual lists
df['activities'] = df['activities'].apply(ast.literal_eval)
df['destinationType'] = df['destinationType'].apply(ast.literal_eval)

# Extract unique activities and destination types
unique_activities = set([activity for sublist in df['activities'] for activity in sublist])
unique_dest_types = set([dtype for sublist in df['destinationType'] for dtype in sublist])

# Multi-hot encode activities
for activity in unique_activities:
    df[activity] = df['activities'].apply(lambda x: 1 if activity in x else 0)

# Multi-hot encode destination types
for dtype in unique_dest_types:
    df[dtype] = df['destinationType'].apply(lambda x: 1 if dtype in x else 0)

# Encode categorical features (budget and duration)
df = pd.get_dummies(df, columns=['budget', 'duration'], drop_first=True)

# Encode target variable (state_country)
label_encoder = LabelEncoder()
df['state_country'] = label_encoder.fit_transform(df['state_country'])

# Drop original non-numeric columns
df.drop(['activities', 'destinationType'], axis=1, inplace=True)

# Split data into features and target
X = df.drop('state_country', axis=1)
y = df['state_country']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train RandomForestClassifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

# Save the model, label encoder, and feature columns
joblib.dump(model, 'random_forest_model.joblib')
joblib.dump(label_encoder, 'label_encoder.joblib')
joblib.dump(X.columns.tolist(), 'feature_columns.joblib')

print("Model, Label Encoder, and Feature Columns saved successfully!")
