In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import xgboost as xgb

# Load your dataset
df = pd.read_csv(r'customer_bike_preferences.csv')

# Separate the features and target
X = df.drop(columns=['Preference'])
y = df['Preference']

# Define categorical and numerical columns
categorical_cols = ['Age_Group', 'City', 'Income_Range', 'Company_Model']
numerical_cols = ['Top_Speed_kmh', 'Range_km', 'Price_INR']

# Preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(), categorical_cols)
    ])

# Apply the preprocessor to the data
X_processed = preprocessor.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)

# Initialize and train the XGBoost classifier
xgb_model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss', n_estimators=100, max_depth=4)
xgb_model.fit(X_train, y_train)

# Evaluate the model on the test data
accuracy = xgb_model.score(X_test, y_test)
print(f"Model accuracy: {accuracy:.4f}")


Parameters: { "use_label_encoder" } are not used.



Model accuracy: 0.5011
