In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split 
import time
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
import pickle
import matplotlib.pyplot as plt



def rfe_feature_selection(indep_X, dep_Y, n_features):
    from sklearn.feature_selection import RFE
    from sklearn.linear_model import LogisticRegression

    # Define the model
    model = LogisticRegression(random_state=0,solver='liblinear')

    # Create RFE selector with the specified model and number of features
    selector = RFE(estimator=model, n_features_to_select=n_features)
    selector.fit(indep_X, dep_Y)

    # Transform the features to keep the selected ones
    selected_features = selector.transform(indep_X)
    return selected_features




def split_scalar(indep_X, dep_Y):
    X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size=0.25, random_state=0)
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)    
    return X_train, X_test, y_train, y_test

def cm_prediction(classifier, X_test, y_test):
    y_pred = classifier.predict(X_test)
    
    # Making the Confusion Matrix
    from sklearn.metrics import confusion_matrix
    cm = confusion_matrix(y_test, y_pred)
    
    from sklearn.metrics import accuracy_score 
    from sklearn.metrics import classification_report 
    
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    return classifier, accuracy, report, X_test, y_test, cm

def logistic(X_train, y_train, X_test, y_test):       
    from sklearn.linear_model import LogisticRegression
    classifier = LogisticRegression(random_state=0)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)

def svm_linear(X_train, y_train, X_test, y_test):
    from sklearn.svm import SVC
    classifier = SVC(kernel='linear', random_state=0)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)

def svm_NL(X_train, y_train, X_test, y_test):
    from sklearn.svm import SVC
    classifier = SVC(kernel='rbf', random_state=0)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)

def naive(X_train, y_train, X_test, y_test):       
    from sklearn.naive_bayes import GaussianNB
    classifier = GaussianNB()
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)

def knn(X_train, y_train, X_test, y_test):
    from sklearn.neighbors import KNeighborsClassifier
    classifier = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)

def decision(X_train, y_train, X_test, y_test):
    from sklearn.tree import DecisionTreeClassifier
    classifier = DecisionTreeClassifier(criterion='entropy', random_state=0)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)

def random(X_train, y_train, X_test, y_test):
    from sklearn.ensemble import RandomForestClassifier
    classifier = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)

def selectk_Classification(acclog, accsvml, accsvmnl, accknn, accnav, accdes, accrf): 
    dataframe = pd.DataFrame(index=['RFE'], columns=['Logistic', 'SVMl', 'SVMnl', 'KNN', 'Naive', 'Decision', 'Random'])
    for number, idex in enumerate(dataframe.index):      
        dataframe['Logistic'][idex] = acclog[number]       
        dataframe['SVMl'][idex] = accsvml[number]
        dataframe['SVMnl'][idex] = accsvmnl[number]
        dataframe['KNN'][idex] = accknn[number]
        dataframe['Naive'][idex] = accnav[number]
        dataframe['Decision'][idex] = accdes[number]
        dataframe['Random'][idex] = accrf[number]
    return dataframe

In [3]:
print(df2.columns)

Index(['Age', 'Years_At_Company', 'Performance_Score', 'Monthly_Salary',
       'Work_Hours_Per_Week', 'Projects_Handled', 'Promotions', 'Resigned',
       'Department_Engineering', 'Department_Finance', 'Department_HR',
       'Department_IT', 'Department_Legal', 'Department_Marketing',
       'Department_Operations', 'Department_Sales', 'Gender_Male',
       'Gender_Other', 'Job_Title_Consultant', 'Job_Title_Developer',
       'Job_Title_Engineer', 'Job_Title_Manager', 'Job_Title_Specialist',
       'Job_Title_Technician', 'Education_Level_High School',
       'Education_Level_Master', 'Education_Level_PhD'],
      dtype='object')


In [4]:
df2

Unnamed: 0,Age,Years_At_Company,Performance_Score,Monthly_Salary,Work_Hours_Per_Week,Projects_Handled,Promotions,Resigned,Department_Engineering,Department_Finance,...,Gender_Other,Job_Title_Consultant,Job_Title_Developer,Job_Title_Engineer,Job_Title_Manager,Job_Title_Specialist,Job_Title_Technician,Education_Level_High School,Education_Level_Master,Education_Level_PhD
0,55,2,5.0,6750,33,32,0,False,0,0,...,0,0,0,0,0,1,0,1,0,0
1,29,0,5.0,7500,34,34,2,False,0,1,...,0,0,1,0,0,0,0,1,0,0
2,55,8,3.0,5850,37,27,0,False,0,1,...,0,0,0,0,0,1,0,1,0,0
3,48,7,2.0,4800,52,10,1,False,0,0,...,0,0,0,0,0,0,0,0,0,0
4,36,3,2.0,4800,38,11,1,False,1,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
244,45,4,5.0,9000,48,9,1,False,0,0,...,0,0,0,0,1,0,0,0,0,0
245,32,2,4.0,6300,55,46,2,False,0,0,...,0,0,0,0,0,1,0,0,0,0
246,24,7,4.0,4900,30,33,0,False,0,0,...,0,0,0,0,0,0,1,0,0,0
247,43,3,2.0,4800,57,36,1,False,0,0,...,0,0,0,0,0,0,0,0,0,0


In [2]:
dataset1 = pd.read_csv("PreProcessedEmployee.csv", index_col=None)
dataset1
df2 = dataset1
df2 = pd.get_dummies(df2, drop_first=True)


# Convert all boolean columns to 0 and 1, except 'Resigned'
for col in df2.select_dtypes(include=['bool']).columns:
    if col != 'Resigned':  # Keep 'Resigned' as is
        df2[col] = df2[col].astype(int)
df2
indep_X = df2.drop(['Performance_Score','Work_Hours_Per_Week','Projects_Handled','Resigned','Department_Engineering',	'Department_Finance','Gender_Other','Job_Title_Consultant',	'Job_Title_Developer',	'Job_Title_Engineer',	'Job_Title_Manager'	,'Job_Title_Specialist'	,'Job_Title_Technician',	'Education_Level_Master',	'Education_Level_PhD'] ,axis=1)  # Features
dep_Y = df2['Performance_Score']  # Target


In [23]:
rfe_selected_features = rfe_feature_selection(indep_X, dep_Y, 3)    

acclog=[]
accsvml=[]
accsvmnl=[]
accknn=[]
accnav=[]
accdes=[]
accrf=[]


In [24]:
rfe_selected_features

array([[0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 1],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 1],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [1, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 1],
       [0, 1, 0],
       [0, 0, 1],
       [0, 0, 0],
       [0, 0, 1],
       [0, 1, 0],
       [0, 0, 0],
       [0, 0, 1],
       [0, 0, 0],
       [1, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 1],
       [0, 0, 0],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 0],
       [0, 0, 0],
       [1, 0, 0],
       [0, 0, 1],
       [0, 0, 0],
       [0, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [0, 0, 1],
       [0, 0, 0],
       [0, 0, 1],
       [0, 0, 0],
       [1, 0, 1],
       [1, 0, 0],
       [0, 1, 0],
       [0, 0, 1],
       [0, 1, 1],
       [0, 0, 0],
       [1, 0, 0],
       [0, 0, 0],
       [0, 0, 1],
       [0, 1, 1],
       [0,

In [25]:
X_train, X_test, y_train, y_test=split_scalar(rfe_selected_features,dep_Y)   

classifier, Accuracy, report, X_test, y_test, cm = logistic(X_train, y_train, X_test, y_test)
acclog.append(Accuracy)

classifier, Accuracy, report, X_test, y_test, cm = svm_linear(X_train, y_train, X_test, y_test)  
accsvml.append(Accuracy)

classifier, Accuracy, report, X_test, y_test, cm = svm_NL(X_train, y_train, X_test, y_test)  
accsvmnl.append(Accuracy)

classifier, Accuracy, report, X_test, y_test, cm = knn(X_train, y_train, X_test, y_test)  
accknn.append(Accuracy)

classifier, Accuracy, report, X_test, y_test, cm = naive(X_train, y_train, X_test, y_test)  
accnav.append(Accuracy)

classifier, Accuracy, report, X_test, y_test, cm = decision(X_train, y_train, X_test, y_test)  
accdes.append(Accuracy)

classifier, Accuracy, report, X_test, y_test, cm = random(X_train, y_train, X_test, y_test)  
accrf.append(Accuracy)

result = selectk_Classification(acclog, accsvml, accsvmnl, accknn, accnav, accdes, accrf)



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

In [9]:
result #2

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Naive,Decision,Random
RFE,0.238095,0.238095,0.238095,0.126984,0.142857,0.238095,0.238095


In [13]:
result #4

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Naive,Decision,Random
RFE,0.206349,0.206349,0.174603,0.190476,0.142857,0.174603,0.15873


In [17]:
result #5

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Naive,Decision,Random
RFE,0.190476,0.190476,0.190476,0.206349,0.142857,0.174603,0.142857


In [22]:
result #1

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Naive,Decision,Random
RFE,0.238095,0.238095,0.238095,0.126984,0.142857,0.238095,0.238095


In [26]:
result #3

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Naive,Decision,Random
RFE,0.206349,0.206349,0.206349,0.190476,0.142857,0.206349,0.206349
