# Notebook Contents
- Importing The Dependencies
- Loading The Dataset
- Split Dataset
- Data Scaling
- Random Forest Algorithm
- SVM Algorithm
- Conclusion

### Importing The Dependencies


In [1]:
import os
import sys
import pandas as pd
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

sys.path.append(os.path.abspath("../"))
from assets.utils import applyStandardScaler
from assets.configs import ClassificationModelsHyperparameters

### Load The Dataset

In [2]:
df = pd.read_excel("../datasets/segmented_customers_dataset.xlsx").drop(columns=["Unnamed: 0"])

In [3]:
df.head()

Unnamed: 0,Age,DaysSinceCreation,AverageLeadTime,LodgingRevenue,OtherRevenue,BookingsCanceled,BookingsNoShowed,BookingsCheckedIn,PersonsNights,RoomNights,...,SRAwayFromElevator,SRNoAlcoholInMiniBar,SRQuietRoom,MarketSegment_Complementary,MarketSegment_Corporate,MarketSegment_Direct,MarketSegment_Groups,MarketSegment_Other,MarketSegment_Travel Agent/Operator,CustomerSegmentation
0,31.0,794,7.0,452.0,34.0,0,0,1,4,4,...,0,0,0,0,0,1,0,0,0,4
1,30.0,728,16.0,408.0,92.0,0,0,1,8,4,...,0,0,1,0,0,0,0,1,0,4
2,63.0,85,0.0,0.0,0.0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,1
3,21.0,2,0.0,0.0,0.0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1
4,48.0,808,57.0,299.4,111.6,0,0,1,6,2,...,0,0,0,0,0,0,0,1,0,9


### Split Dataset


In [4]:
x = df.drop(columns = ["CustomerSegmentation"])
y = df["CustomerSegmentation"]

In [5]:
x_train, x_temp, y_train, y_temp = train_test_split(x,y,
                                                    stratify=y,
                                                    test_size = ClassificationModelsHyperparameters.TEST_SIZE,
                                                    random_state = ClassificationModelsHyperparameters.RANDOM_STATE)
x_val, x_test, y_val, y_test = train_test_split(x_temp, y_temp,
                                                test_size = 0.5, stratify=y_temp,
                                                random_state= ClassificationModelsHyperparameters.RANDOM_STATE)

In [6]:
# Shapes
print(f"Training Set: {x_train.shape}, {y_train.shape}")
print(f"Validation Set: {x_val.shape}, {y_val.shape}")
print(f"Testing Set: {x_test.shape}, {y_test.shape}")

Training Set: (7000, 31), (7000,)
Validation Set: (1500, 31), (1500,)
Testing Set: (1500, 31), (1500,)


### Data Scaling 

In [11]:
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_valid_scaled = scaler.transform(x_val)
x_test_scaled = scaler.transform(x_test)

### Random Forest Algorithm 

In [13]:
random_forest = RandomForestClassifier(n_estimators=ClassificationModelsHyperparameters.RANDOM_FOREST_N_ESTIMATORS, random_state=ClassificationModelsHyperparameters.RANDOM_STATE)
random_forest.fit(x_train_scaled, y_train)
y_pred = random_forest.predict(x_valid_scaled)
print(f"Random Forest Train Accuracy: {random_forest.score(x_train_scaled, y_train)}")
print(f"Random Forest Valid Accuracy: {random_forest.score(x_valid_scaled, y_val)}")
print(f"Random Forest Test Accuracy: {random_forest.score(x_test_scaled, y_test)}")

Random Forest Train Accuracy: 1.0
Random Forest Valid Accuracy: 0.968
Random Forest Test Accuracy: 0.964
