## Building out a model

In [18]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [19]:
model_features = pd.read_csv("C:/Users/HP/OneDrive/Documents/DANNY DATA/Customer Support Data/model_data.csv")

In [20]:
columns_to_drop = ["category", "sub-category", "issue_reported_at", "issue_responded", "response_time_(minutes)", 
                   "response_time_(seconds)", "time_of_issue", "time_of_response", "agent_name", "supervisor"]

model_features.drop(columns=columns_to_drop, axis=1, inplace=True)
model_features.head()

Unnamed: 0,channel_name,response_time_(hours),report_day_name,report_month_day,response_month_day,response_day_name,manager,tenure_bucket,agent_shift,csat_score
0,Outcall,0.566667,Tuesday,1,1,Tuesday,Jennifer Nguyen,On Job Training,Morning,5
1,Outcall,0.033333,Tuesday,1,1,Tuesday,Michael Lee,>90,Morning,5
2,Inbound,0.366667,Tuesday,1,1,Tuesday,William Kim,On Job Training,Evening,5
3,Inbound,0.333333,Tuesday,1,1,Tuesday,John Smith,>90,Evening,5
4,Inbound,0.033333,Tuesday,1,1,Tuesday,Michael Lee,0-30,Morning,5


In [21]:
model_features.columns

Index(['channel_name', 'response_time_(hours)', 'report_day_name',
       'report_month_day', 'response_month_day', 'response_day_name',
       'manager', 'tenure_bucket', 'agent_shift', 'csat_score'],
      dtype='object')

In [22]:
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
label_encoder = LabelEncoder()
one_hot_encoder = OneHotEncoder()

In [23]:
scale_features = ["response_time_(hours)", "report_month_day", "response_month_day"]

# one-hot encoding, no inhherent order
model_features = pd.get_dummies(model_features, columns=["channel_name", "manager", "agent_shift", "report_day_name", "response_day_name"])

# Inherent  order is present
model_features["tenure_bucket"] = label_encoder.fit_transform(model_features["tenure_bucket"])

In [24]:
model_features.columns

Index(['response_time_(hours)', 'report_month_day', 'response_month_day',
       'tenure_bucket', 'csat_score', 'channel_name_Email',
       'channel_name_Inbound', 'channel_name_Outcall', 'manager_Emily Chen',
       'manager_Jennifer Nguyen', 'manager_John Smith', 'manager_Michael Lee',
       'manager_Olivia Tan', 'manager_William Kim', 'agent_shift_Afternoon',
       'agent_shift_Evening', 'agent_shift_Morning', 'agent_shift_Night',
       'agent_shift_Split', 'report_day_name_Friday', 'report_day_name_Monday',
       'report_day_name_Saturday', 'report_day_name_Sunday',
       'report_day_name_Thursday', 'report_day_name_Tuesday',
       'report_day_name_Wednesday', 'response_day_name_Friday',
       'response_day_name_Monday', 'response_day_name_Saturday',
       'response_day_name_Sunday', 'response_day_name_Thursday',
       'response_day_name_Tuesday', 'response_day_name_Wednesday'],
      dtype='object')

In [25]:
from sklearn.model_selection import train_test_split, GridSearchCV

X = model_features.drop("csat_score", axis=1)
y = model_features["csat_score"]

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.8, random_state = 42) 

print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")

X_train shape: (68725, 32)
X_test shape: (17182, 32)
y_train shape: (68725,)
y_test shape: (17182,)


## Picking Our Models

In [26]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

In [27]:
model_grid = {

    "LogisticRegression" : {
        "C" : [0.1, 1, 10]
    },
    

    "RandomForestClassifier" : {
        'n_estimators': [100, 200, 300],
        'max_depth': [None, 10, 20],
        'min_samples_split': [2, 5, 10]
    }
}

1. Rabdom Forest

In [28]:
logistic_model = LogisticRegression(class_weight="balanced", C=1)
rand_forest_model = RandomForestClassifier(class_weight="balanced")




# grid_search = GridSearchCV(estimator=rand_forest_model, param_grid=model_grid["RandomForestClassifier"], cv=10)

# grid_search.fit(X_train, y_train)

# print(grid_search.best_params_)

# print(grid_search.best_score_)





In [29]:
logistic_model.fit(X_train, y_train)
prediction = logistic_model.predict(X_test)



In [30]:
print(classification_report(y_true=y_test, y_pred=prediction))

              precision    recall  f1-score   support

           1       0.22      0.28      0.24      2241
           2       0.01      0.14      0.02       227
           3       0.03      0.17      0.06       503
           4       0.14      0.16      0.15      2277
           5       0.75      0.43      0.55     11934

    accuracy                           0.36     17182
   macro avg       0.23      0.24      0.21     17182
weighted avg       0.57      0.36      0.43     17182

