# **Import Statement**

In [1]:
import pandas as pd
import numpy as np
import joblib
import os

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import classification_report, accuracy_score
from sklearn.ensemble import RandomForestClassifier

# **Load Dataset**

In [3]:
data_path = r'/kaggle/input/tourism-final-master-analytics/Tourism_Final_Master_Analytical.csv'
df = pd.read_csv(data_path)

# **Feature Selection**

In [5]:
features = [
    "Year_of_Visit",
    "Month_of_Visit",
    "Attraction_Category",
    "Destination_Region_Name"
]

target = "Traveler_Group_Type"

df_model = df[features + [target]].dropna()

# **Train Test Split**

In [7]:
x = df_model[features]
y = df_model[target]

x_train, x_test, y_train, y_test = train_test_split(
    x,y,
    test_size = 0.2,
    random_state = 42,
    stratify=y
)

# **Preprocessing**

In [8]:
catagorical_features = [ "Attraction_Category","Destination_Region_Name"]
numerical_features = ["Year_of_Visit", "Month_of_Visit"]

preprocessor = ColumnTransformer(
    transformers = [
        ("cat", OneHotEncoder(handle_unknown="ignore"), catagorical_features),
        ("num", "passthrough", numerical_features)
    ]
)


# **Model**

In [18]:
# visit_mode_model = RandomForestClassifier(
#     n_estimators=200,
#     max_depth=15,
#     min_samples_split=5,
#     random_state=42,
#     n_jobs=-1
# )


visit_mode_model = RandomForestClassifier(
    n_estimators=300,
    max_depth=18,
    min_samples_split=5,
    random_state=42,
    n_jobs=-1,
    class_weight="balanced"
)

In [19]:
pipeline = Pipeline(
    steps=[
        ("preprocessing", preprocessor),

        
        ("model", visit_mode_model)
    ]
)

# **Model Training**

In [20]:
pipeline.fit(x_train, y_train)

# **Evaluation**

In [21]:
y_pred = pipeline.predict(x_test)
print("Visit Model classification report")
print(classification_report(y_test, y_pred))
print(f"Accuracy : {accuracy_score(y_test, y_pred)}")

Visit Model classification report
              precision    recall  f1-score   support

    Business       0.02      0.36      0.05       125
     Couples       0.47      0.33      0.39      4324
      Family       0.47      0.35      0.40      3043
     Friends       0.31      0.18      0.23      2189
        Solo       0.13      0.31      0.18       905

    accuracy                           0.30     10586
   macro avg       0.28      0.31      0.25     10586
weighted avg       0.40      0.30      0.34     10586

Accuracy : 0.30408086151520874


# **Saving Model**

In [22]:
joblib.dump(pipeline, "visit_mode_model.pkl")

['visit_mode_model.pkl']