In [2]:
import pandas as pd
import numpy as np

In [3]:
dataset_path = r"D:\UOM\L4S1\FYP\Datasets\demographic_filter_data.csv"

df = pd.read_csv(dataset_path, sep=';')

In [4]:
df.head()

Unnamed: 0,Age Group,Gender,Country,Preferred Destination Category
0,18-30 years,Male,Sri Lanka,Beaches & Coastal Areas
1,51+ years,Female,USA/Canada,Nature & Wildlife
2,51+ years,Female,Middle East,Adventure & Unique Experiences
3,51+ years,Female,USA/Canada,Adventure & Unique Experiences
4,51+ years,Female,Australia/New Zealand,Nature & Wildlife


In [5]:
df = df.sample(n=1000, random_state=42)

In [6]:
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

model_params = {
    'svm': {
        'model': svm.SVC(gamma='auto'),
        'params' : {
            'C': [10,20,30],
            'kernel': ['rbf','sigmoid','poly']
        }
    },
    'random_forest': {
        'model': RandomForestClassifier(),
        'params' : {
            'n_estimators': [10,50,100]
        }
    },
    'logistic_regression' : {
        'model': LogisticRegression(solver='liblinear',multi_class='auto'),
        'params': {
            'C': [1,5,10]
        }
    }
}

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OrdinalEncoder

# Separate features and target
X = df.drop(columns=['Preferred Destination Category'])  # Replace with actual target column
y = df['Preferred Destination Category']

le = LabelEncoder()
X['Gender'] = le.fit_transform(X['Gender'])

oe = OrdinalEncoder(categories=[['18-30 years', '31-50 years', '51+ years']])
X['Age Group'] = oe.fit_transform(X[['Age Group']])

X['Country'] = le.fit_transform(X['Country'])

y = le.fit_transform(y)

In [8]:
from sklearn.model_selection import GridSearchCV

scores = []

for model_name, mp in model_params.items():
    clf = GridSearchCV(mp['model'], mp['params'], cv=5, return_train_score=False, n_jobs=-1)
    clf.fit(X, y)
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })

df = pd.DataFrame(scores, columns=['model', 'best_score', 'best_params'])
df



Unnamed: 0,model,best_score,best_params
0,svm,0.519,"{'C': 20, 'kernel': 'rbf'}"
1,random_forest,0.515,{'n_estimators': 100}
2,logistic_regression,0.402,{'C': 1}
