In [1]:
import numpy as np
import pandas as pd
pd.set_option('max_columns', None)

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVC, SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

In [2]:
df = pd.read_excel('Training-testing-AT&T.xlsx',sheet_name = 'Sheet1')
df.rename(columns = {'Date - Month':'Date'},inplace = True)
df.rename(columns = {'Current Status':'Current_Status'},inplace = True)
df = df.query("Current_Status not in ('Never Started','Leave of Absence')")
df.rename(columns = {'Current_Status':'Current Status'},inplace = True)
df.shape

(1178, 81)

In [410]:
df['Department'].value_counts()

Representative    2999
Name: Department, dtype: int64

In [370]:
values = df.query("Date in ('January' ,'February' , 'December')")
values['Position'].value_counts()

Representative    2209
Name: Position, dtype: int64

In [3]:
def onehot_encode(df, column):
    df = df.copy()
    dummies = pd.get_dummies(df[column], prefix=column)
    df = pd.concat([df, dummies], axis=1)
    df = df.drop(column, axis=1)
    return df

In [4]:
def preprocess_input(df):
    df = df.copy()
    
    # Filtring out null working hours
    df = df.query("Date in ('January' ,'February' , 'December')")
    df = df.fillna(0)
    # droping one value columns or null
    #df = df.drop(['Manager','Address 1','Last Working Day', 'Position', 'Position Group','Ident','Employee Name','Client','Market','Language','Location','Contract Type','Department','Termination Type','Termination Reason'], axis=1)
    df = df.drop(['University','Nationality','Address 1','Major'
                  ,'Last Working Day', 'Position', 'Position Group','Ident','Employee Name','Client',
                  'Market','Language','Location','Contract Type','Termination Type','Termination Reason',
                  'Sum of Internal Moves','Transfers %','QA Score','Coached Calls','Coached Required','Coaching Attainment',
                  'Avg. Time To Coach Hrs','Sum of Over-Time Duration','Utilization %','Productivity %',
                  'Canceled Actions'
                  ,'Approved Green','Canceled Green','Approved Yellow','Canceled Yellow','Approved Orange','Canceled Orange',
                  'Approved ACM Investigation','Canceled ACM Investigation','Approved Investigation','Canceled Investigation','Department'], axis=1)
   
    # Binary-encode binary columns
    df['Gender.'] = df['Gender.'].replace({'Female': 0, 'Male': 1})
    # Ordinal-encode
    df['Date'] = df['Date'].replace({'January': 1, 'February': 2,'December': 0,'March':3})
    df['Graduate / Undergrad'] = df['Graduate / Undergrad'].replace({'Drop Out': 0, 'Undergrad': 1,'Graduate': 2 , 'Gap Year':3})
    df['Current Status'] = df['Current Status'].replace({'Active': 1, 'Resigned': 0,'Terminated': 0,'Never Started':0,'Leave of Absence':0})
    # One-hot encoding
    for column in ['Program','Current Civil Status','District','Manager']:
        df = onehot_encode(df, column=column)
    
    
    
    # Split df into X and y
    y = df['Current Status']
    df = df.reindex(labels=Predict.columns,axis=1)
    df = df.fillna(0)
    X = df.drop('Current Status', axis=1)
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, shuffle=True, random_state=1)
    
    # Scale X
    #scaler = StandardScaler()
    #scaler.fit(X_train)
    #X_train = pd.DataFrame(scaler.transform(X_train), index=X_train.index, columns=X_train.columns)
    #X_test = pd.DataFrame(scaler.transform(X_test), index=X_test.index, columns=X_test.columns)
    
    return X_train, X_test, y_train, y_test

In [5]:
def preprocess_input2(df):
    df = df.copy()
    # Filtring out null working hours
    df = df.query("Date == 'March'")
    df = df.fillna(0)
    df['Gender.'] = df['Gender.'].replace({'Female': 0, 'Male': 1})
    # Ordinal-encode
    #df['Graduate / Undergrad'] = df['Graduate / Undergrad'].replace({'Drop Out': 0, 'Undergrad': 1,'Graduate': 2 , 'Gap Year':3})
    #df['Current Status'] = df['Current Status'].replace({'Active': 1, 'Resigned': 0,'Terminated': 0})
    df = df.drop(['University','Nationality','Major','Address 1','Last Working Day', 'Position', 'Position Group','Ident','Employee Name','Client',
                  'Market','Language','Location','Contract Type','Termination Type','Termination Reason',
                  'Sum of Internal Moves','Transfers %','QA Score','Coached Calls','Coached Required','Coaching Attainment',
                  'Avg. Time To Coach Hrs','Sum of Over-Time Duration','Utilization %','Productivity %','Canceled Actions'
                  ,'Approved Green','Canceled Green','Approved Yellow','Canceled Yellow','Approved Orange','Canceled Orange',
                  'Approved ACM Investigation','Canceled ACM Investigation','Approved Investigation','Canceled Investigation','Department'], axis=1)
 
    # Ordinal-encode
    df['Date'] = df['Date'].replace({'January': 1, 'February': 2,'December': 0,'March':3})
    df['Graduate / Undergrad'] = df['Graduate / Undergrad'].replace({'Drop Out': 0, 'Undergrad': 1,'Graduate': 2 , 'Gap Year':3})
    # One-hot encoding
    for column in ['Program','Current Civil Status','District','Manager']:
        df = onehot_encode(df, column=column)
    return df

In [6]:
Predict = preprocess_input2(df)
X_train, X_test, y_train, y_test = preprocess_input(df)
X_train

Unnamed: 0,Date - Year,Date,Sum of Salary,Gender.,Age,Graduate / Undergrad,AHT,Sum of Handled Transactions,Sum of Total Surveys,NPS %,DSAT %,FCR %,Sum of Broadband Sales,Sum of Wireless,QA Monitored,"QA Critical Pass % (EUC, BC, CC, ZTP)",QA EUC Pass %,QA BC Pass %,QA CC Pass %,QA ZTP Pass %,Other Monitored Transactions,Positive Coaching,Sum of Working Hours On Premises,Sum of Working Hours WAHA,Sum of Over-Time Days,Sum of Working During Holdidays,Occupancy %,ABS %,Sum of Paid Leaves,Sum of Unpaid Leaves,Sum of Sick-Leaves,Sum of No Shows,Sum of Attrition,Approved Actions,Approved Red,Canceled Red,Approved Black,Canceled Black,Program_AT&T PLAZA Agent,Program_AT&T PLAZA Escalation Desk,Program_AT&T PLAZA New Hire,Program_AT&T PLAZA Support Line,Current Civil Status_Single,Current Civil Status_Unknown,District_-,District_15 May City,District_6th of October City 1,District_Abdeen,District_Abu Kebir,District_Agouza,District_Ain Shams,District_Asyut 2,District_Badr City,District_Banha,District_Basyoun,District_Belqas,District_Beni Suef,District_Bulaq,District_Daher,District_Damanhour,District_Damietta,District_Dokki,District_El Arab,District_El Bagour,District_El Basatin,District_El Gamaliya,District_El Haram,District_El Khalifa,District_El Manial,District_El Mansoura 1,District_El Marg,District_El Masara,District_El Matareya,District_El Nozha,District_El Omraniya,District_El Qanater El Khayreya,District_El Qobbah,District_El Raml 1,District_El Rehab,District_El Sahel,District_El Salam,District_El Sayeda Zeinab,District_El Sharq,District_El Shorouk,District_El Tor,District_El Warraq,District_El Weili,District_El Zawya El Hamra,District_Esna,District_Faiyum,District_Faiyum 1,District_Faqous,District_Fifth Settlement,District_Garden City,District_Giza,District_Heliopolis,District_Helwan,District_Imbaba,District_Ismailia,District_Ismailia 2,District_Ismailia 3,District_Kafr El Sheikh,District_Kafr Elshekh,District_Kafr Shukr,District_Kom Hamada,District_Maadi,District_Metoubes,District_Minya,District_Minya 1,District_Mokattam,District_Nasr City 1,District_Nasr City 2,District_New Cairo 1,District_New Cairo 2,District_New Cairo 3,District_Old Cairo,District_Quweisna,District_Ras Gharib,District_Rod El Farag,District_Sheikh Zayed City,District_Shubra El Kheima 1,District_Sidi Gaber,District_Talbia,District_Tanta,District_Zeitoun,District_shubra el Kheima 2,District_suez,"Manager_Abdalla, Mohamed A.","Manager_Abdelaaty, Salma K.","Manager_Abdelaziz, Ehab M.","Manager_Abdelkader, Shorouk A.","Manager_Abdelkafy, Youssef N.","Manager_Abdelnaim, Mohamed S.","Manager_Abdelrahim, Mohamed S.","Manager_Abdelsalam, Ahmed A.","Manager_Abdo, Mostafa M.","Manager_Abobakr, Toka H.","Manager_Adel, Hanin A.","Manager_AlSalem, Ammar K.","Manager_Alfouly, Hassan T.","Manager_Ali, Fares R.","Manager_Amer, Shady M.","Manager_Ayman, Seif E.","Manager_Elgharbawy, Dina H.","Manager_Elramadi, Abdallah Y.","Manager_Elsayed, Ahmed Adel","Manager_Farag, Mostafa H.","Manager_Gadelrab, Abdelrahman M.","Manager_Hassanin, Noura I.","Manager_Ibrahim, Sirag M.","Manager_Lotfy, Mahmoud A.","Manager_Mabrouk, Mohamed T.","Manager_Omer, Sham A.","Manager_Safwat, Shehab M.","Manager_Shaaban, Abdallah A."
93,2021,0,6000,1,27,2,805.366310,374,18,0.0,0.0,0.0,21.0,3.0,2.0,1.0,1.0,1.0,1.0,0.0,29.0,0.0,352.000000,0,7.0,0.0,0.974981,0.049861,27.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0.0,0,0,0,0,0.0,0,0,0,0,0,0,0,0.0,0,0
115,2021,0,6000,1,26,2,879.553459,477,17,0.0,0.0,0.0,3.0,3.0,2.0,0.5,0.5,0.5,1.0,0.0,36.0,0.0,345.950000,0,5.0,0.0,0.972201,0.154933,9.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0.0,0,0,0.0,0,0,0,0,0.0,0,0,0,0,0,0,0,0.0,0,0
1167,2022,1,6000,0,25,1,0.000000,0,6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.0,0.0,240.950000,0,0.0,1.0,0.000000,0.087698,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0.0,0,0,0,0,0.0,0,0,0,1,0,0,0,0.0,0,0
77,2021,0,6000,1,19,1,915.802817,71,0,0.0,0.0,0.0,1.0,1.0,2.0,0.5,0.5,1.0,1.0,0.0,4.0,0.0,307.933333,0,4.0,0.0,0.961043,0.069383,45.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0.0,0,0,0,0,0.0,0,0,0,0,0,0,0,0.0,0,0
1169,2022,1,6000,1,20,1,0.000000,0,19,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,2.0,0.0,243.000000,0,0.0,1.0,0.000000,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0.0,0,0,0,0,0.0,0,0,0,1,0,0,0,0.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1005,2022,1,6000,1,28,2,807.797101,621,18,0.0,0.0,0.0,15.0,38.0,2.0,0.5,0.5,1.0,1.0,0.0,66.0,0.0,414.800000,0,6.0,1.0,0.969217,0.052582,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0.0,0,0,0,0,0.0,0,0,1,0,0,0,0,0.0,0,0
1057,2022,1,6000,0,21,1,841.818731,331,19,0.0,0.0,0.0,4.0,1.0,2.0,1.0,1.0,1.0,1.0,0.0,24.0,0.0,312.983333,0,2.0,2.0,0.984033,0.394957,9.0,0.0,0.0,10.0,0.0,1.0,1.0,0.0,0.0,0.0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0.0,0,0,0,0,0.0,0,0,0,0,0,1,0,0.0,0,0
73,2021,0,6000,1,39,2,632.504762,105,10,0.0,0.0,0.0,4.0,1.0,2.0,0.5,1.0,0.5,1.0,0.0,6.0,0.0,418.000000,0,0.0,0.0,0.974690,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0.0,0,0,0,0,0.0,0,0,0,0,0,0,0,0.0,0,0
236,2021,0,6000,1,21,1,929.214953,107,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,360.366667,0,1.0,0.0,0.977282,0.046793,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0.0,0,0,0,0,0.0,0,0,0,0,0,0,0,0.0,0,0


In [7]:
models = {
    "                   Logistic Regression": LogisticRegression(),
    "                   K-Nearest Neighbors": KNeighborsClassifier(),
    "                         Decision Tree": DecisionTreeClassifier(),
    "Support Vector Machine (Linear Kernel)": LinearSVC(),
    "   Support Vector Machine (RBF Kernel)": SVC(),
    "                        Neural Network": MLPClassifier(),
    "                         Random Forest": RandomForestClassifier(),
    "                     Gradient Boosting": GradientBoostingClassifier()
}

for name, model in models.items():
    model.fit(X_train, y_train)
    print(name + " trained.")
    
for name, model in models.items():
    print(name + ": {:.2f}%".format(model.score(X_test, y_test) * 100))
    
Predict = Predict.drop(columns=['Current Status'], axis = 1)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


                   Logistic Regression trained.
                   K-Nearest Neighbors trained.
                         Decision Tree trained.
Support Vector Machine (Linear Kernel) trained.
   Support Vector Machine (RBF Kernel) trained.
                        Neural Network trained.
                         Random Forest trained.
                     Gradient Boosting trained.
                   Logistic Regression: 68.40%
                   K-Nearest Neighbors: 65.43%
                         Decision Tree: 73.23%
Support Vector Machine (Linear Kernel): 37.55%
   Support Vector Machine (RBF Kernel): 62.45%
                        Neural Network: 46.84%
                         Random Forest: 73.23%
                     Gradient Boosting: 76.58%


In [338]:
Predict = Predict.drop(columns=['Test'], axis = 1)

In [8]:
log_mod = LogisticRegression()
log_mod.fit(X_train, y_train)
Predict['Test'] = log_mod.predict(Predict)
df['Outcome'] = Predict['Test']
Predict['Test'].mean(),Predict['Test'].value_counts()

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


(0.7992957746478874,
 1    227
 0     57
 Name: Test, dtype: int64)

In [363]:
Predict = Predict.drop(columns=['Test'], axis = 1)
log_mod = KNeighborsClassifier()
log_mod.fit(X_train, y_train)
Predict['Test'] = log_mod.predict(Predict)
df['Outcome'] = Predict['Test']
Predict['Test'].mean(),Predict['Test'].value_counts()

(0.9455696202531646,
 1    747
 0     43
 Name: Test, dtype: int64)

In [350]:
Predict = Predict.drop(columns=['Test'], axis = 1)
Neural = DecisionTreeClassifier()
Neural.fit(X_train, y_train)
Predict['Test'] = Neural.predict(Predict)
df['Outcome'] = Predict['Test']
Predict['Test'].mean(),Predict['Test'].value_counts()

(0.8645569620253165,
 1    683
 0    107
 Name: Test, dtype: int64)

In [351]:
Predict = Predict.drop(columns=['Test'], axis = 1)
Neural = SVC()
Neural.fit(X_train, y_train)
Predict['Test'] = Neural.predict(Predict)
df['Outcome'] = Predict['Test']
Predict['Test'].mean(),Predict['Test'].value_counts()

(1.0,
 1    790
 Name: Test, dtype: int64)

In [352]:
Predict = Predict.drop(columns=['Test'], axis = 1)
Neural = MLPClassifier()
Neural.fit(X_train, y_train)
Predict['Test'] = Neural.predict(Predict)
df['Outcome'] = Predict['Test']
Predict['Test'].mean(),Predict['Test'].value_counts()

(0.9848101265822785,
 1    778
 0     12
 Name: Test, dtype: int64)

In [353]:
Predict = Predict.drop(columns=['Test'], axis = 1)
Neural = RandomForestClassifier()
Neural.fit(X_train, y_train)
Predict['Test'] = Neural.predict(Predict)
df['Outcome'] = Predict['Test']
Predict['Test'].mean(),Predict['Test'].value_counts()

(0.9620253164556962,
 1    760
 0     30
 Name: Test, dtype: int64)

In [354]:
Predict = Predict.drop(columns=['Test'], axis = 1)
Neural = GradientBoostingClassifier()
Neural.fit(X_train, y_train)
Predict['Test'] = Neural.predict(Predict)
df['Outcome'] = Predict['Test']
Predict['Test'].mean(),Predict['Test'].value_counts()

(0.9582278481012658,
 1    757
 0     33
 Name: Test, dtype: int64)

In [None]:
df['']

In [9]:
df.to_excel('At&T Risk List.xlsx',index = False)