In [2]:
import warnings
warnings.filterwarnings("ignore")

##### **Most Preferred Delivery DAY - Region Wise**

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [4]:
data = pd.read_csv('Set/Base.csv')

label_encoder = LabelEncoder()
data['RECEIVER REGION'] = label_encoder.fit_transform(data['RECEIVER REGION'])
data['TIME SLOT'] = label_encoder.fit_transform(data['TIME SLOT'])
data['DAY'] = label_encoder.fit_transform(data['DAY'])

In [5]:
X = data.drop(columns=['CUSTOMER ID', 'DAY', 'DELIVERY OUTCOME'])
y = data['DAY']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

xgb_model = XGBClassifier(objective='multi:softmax', num_class=len(data['DAY'].unique()), use_label_encoder=False, eval_metric='mlogloss')

In [6]:
param_grid = {
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7],
    'n_estimators': [100, 200, 300],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0]
}

In [7]:
grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 108 candidates, totalling 540 fits


In [8]:
best_params = grid_search.best_params_

In [9]:
best_xgb_model = XGBClassifier(**best_params, objective='multi:softmax', num_class=len(data['DAY'].unique()), use_label_encoder=False, eval_metric='mlogloss')
best_xgb_model.fit(X_train, y_train)

In [10]:
y_pred = best_xgb_model.predict(X_test)

In [11]:
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

In [12]:
print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(report)
print("Confusion Matrix:")
print(conf_matrix)

Accuracy: 0.1859872611464968
Classification Report:
              precision    recall  f1-score   support

           0       0.18      0.17      0.17       158
           1       0.19      0.25      0.22       154
           2       0.16      0.20      0.18       148
           3       0.19      0.22      0.20       153
           4       0.28      0.10      0.15       172

    accuracy                           0.19       785
   macro avg       0.20      0.19      0.18       785
weighted avg       0.20      0.19      0.18       785

Confusion Matrix:
[[27 38 42 39 12]
 [28 38 39 39 10]
 [35 42 30 30 11]
 [34 37 37 34 11]
 [27 43 44 41 17]]


In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

data = pd.read_csv('Set/Base.csv')

label_encoder_region = LabelEncoder()
label_encoder_day = LabelEncoder()
data['RECEIVER REGION'] = label_encoder_region.fit_transform(data['RECEIVER REGION'])
data['TIME SLOT'] = LabelEncoder().fit_transform(data['TIME SLOT'])
data['DAY'] = label_encoder_day.fit_transform(data['DAY'])

X = data.drop(columns=['CUSTOMER ID', 'DAY', 'DELIVERY OUTCOME'])
y = data['DAY']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

xgb_model = XGBClassifier(objective='multi:softmax', num_class=len(data['DAY'].unique()), use_label_encoder=False, eval_metric='mlogloss')

param_grid = {
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7],
    'n_estimators': [100, 200, 300],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0]
}

grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_

best_xgb_model = XGBClassifier(**best_params, objective='multi:softmax', num_class=len(data['DAY'].unique()), use_label_encoder=False, eval_metric='mlogloss')
best_xgb_model.fit(X_train, y_train)

data['PREDICTED_DAY'] = best_xgb_model.predict(X)

data['PREDICTED_DAY'] = label_encoder_day.inverse_transform(data['PREDICTED_DAY'])
data['DAY'] = label_encoder_day.inverse_transform(data['DAY'])

preferred_days = data.groupby('RECEIVER REGION')['PREDICTED_DAY'].agg(lambda x: x.mode()[0]).reset_index()

Fitting 5 folds for each of 108 candidates, totalling 540 fits


In [14]:
import pandas as pd

data = {
    'RECEIVER REGION': [0, 1, 2, 3],
    'PREDICTED_DAY': ['Friday', 'Monday', 'Monday', 'Thursday']
}

df = pd.DataFrame(data)

regions = ["Bangalore North", "Bangalore South", "Bangalore Central", "Bangalore East"]

df['RECEIVER REGION'] = df['RECEIVER REGION'].map(lambda x: regions[x])

def pretty_print_df(df):
    return df.style.set_table_styles([{
        'selector': 'thead th',
        'props': [('background-color', '#f5f5f5'),
                  ('color', '#333'),
                  ('font-weight', 'bold')]
    }]).set_properties(**{
        'text-align': 'center',
        'font-size': '12pt',
        'border': '1px solid black'
    }).hide(axis='index')

pretty_print_df(df)

RECEIVER REGION,PREDICTED_DAY
Bangalore North,Friday
Bangalore South,Monday
Bangalore Central,Monday
Bangalore East,Thursday
