In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

In [12]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

In [3]:
df = pd.read_csv('realistic_kraljic_dataset.csv')

In [4]:
df

Unnamed: 0,Product_ID,Product_Name,Supplier_Region,Lead_Time_Days,Order_Volume_Units,Cost_per_Unit,Supply_Risk_Score,Profit_Impact_Score,Environmental_Impact,Single_Source_Risk,Kraljic_Category
0,P001,Semiconductors,South America,81,171,255.03,5,5,4,Yes,Strategic
1,P002,Semiconductors,South America,8,763,380.33,5,4,4,Yes,Strategic
2,P003,Pharma APIs,Asia,65,413,385.24,4,5,5,Yes,Strategic
3,P004,Semiconductors,South America,70,882,287.64,5,5,5,Yes,Strategic
4,P005,Lithium Batteries,Asia,15,120,382.26,4,4,4,Yes,Strategic
...,...,...,...,...,...,...,...,...,...,...,...
995,P996,Printer Ink,Global,19,19648,10.20,2,1,1,No,Non-Critical
996,P997,Cleaning Supplies,Global,21,19143,15.30,1,2,1,No,Non-Critical
997,P998,Uniforms,Global,70,5352,30.23,1,3,2,No,Non-Critical
998,P999,Printer Ink,Global,44,18070,30.73,1,1,2,No,Non-Critical


In [5]:
df.columns


Index(['Product_ID', 'Product_Name', 'Supplier_Region', 'Lead_Time_Days',
       'Order_Volume_Units', 'Cost_per_Unit', 'Supply_Risk_Score',
       'Profit_Impact_Score', 'Environmental_Impact', 'Single_Source_Risk',
       'Kraljic_Category'],
      dtype='object')

In [7]:
df.drop(['Product_ID', 'Product_Name', 'Supplier_Region'], axis=1, inplace=True)

In [8]:
df

Unnamed: 0,Lead_Time_Days,Order_Volume_Units,Cost_per_Unit,Supply_Risk_Score,Profit_Impact_Score,Environmental_Impact,Single_Source_Risk,Kraljic_Category
0,81,171,255.03,5,5,4,Yes,Strategic
1,8,763,380.33,5,4,4,Yes,Strategic
2,65,413,385.24,4,5,5,Yes,Strategic
3,70,882,287.64,5,5,5,Yes,Strategic
4,15,120,382.26,4,4,4,Yes,Strategic
...,...,...,...,...,...,...,...,...
995,19,19648,10.20,2,1,1,No,Non-Critical
996,21,19143,15.30,1,2,1,No,Non-Critical
997,70,5352,30.23,1,3,2,No,Non-Critical
998,44,18070,30.73,1,1,2,No,Non-Critical


In [9]:
df['Single_Source_Risk'] = df['Single_Source_Risk'].map({'Yes':1,'No':0})

In [10]:
df

Unnamed: 0,Lead_Time_Days,Order_Volume_Units,Cost_per_Unit,Supply_Risk_Score,Profit_Impact_Score,Environmental_Impact,Single_Source_Risk,Kraljic_Category
0,81,171,255.03,5,5,4,1,Strategic
1,8,763,380.33,5,4,4,1,Strategic
2,65,413,385.24,4,5,5,1,Strategic
3,70,882,287.64,5,5,5,1,Strategic
4,15,120,382.26,4,4,4,1,Strategic
...,...,...,...,...,...,...,...,...
995,19,19648,10.20,2,1,1,0,Non-Critical
996,21,19143,15.30,1,2,1,0,Non-Critical
997,70,5352,30.23,1,3,2,0,Non-Critical
998,44,18070,30.73,1,1,2,0,Non-Critical


In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 8 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Lead_Time_Days        1000 non-null   int64  
 1   Order_Volume_Units    1000 non-null   int64  
 2   Cost_per_Unit         1000 non-null   float64
 3   Supply_Risk_Score     1000 non-null   int64  
 4   Profit_Impact_Score   1000 non-null   int64  
 5   Environmental_Impact  1000 non-null   int64  
 6   Single_Source_Risk    1000 non-null   int64  
 7   Kraljic_Category      1000 non-null   object 
dtypes: float64(1), int64(6), object(1)
memory usage: 62.6+ KB


In [14]:
df.isnull().sum()

Unnamed: 0,0
Lead_Time_Days,0
Order_Volume_Units,0
Cost_per_Unit,0
Supply_Risk_Score,0
Profit_Impact_Score,0
Environmental_Impact,0
Single_Source_Risk,0
Kraljic_Category,0


In [15]:
df.columns

Index(['Lead_Time_Days', 'Order_Volume_Units', 'Cost_per_Unit',
       'Supply_Risk_Score', 'Profit_Impact_Score', 'Environmental_Impact',
       'Single_Source_Risk', 'Kraljic_Category'],
      dtype='object')

In [16]:
x = df.drop('Kraljic_Category', axis=1)
y = df['Kraljic_Category']

In [17]:
x

Unnamed: 0,Lead_Time_Days,Order_Volume_Units,Cost_per_Unit,Supply_Risk_Score,Profit_Impact_Score,Environmental_Impact,Single_Source_Risk
0,81,171,255.03,5,5,4,1
1,8,763,380.33,5,4,4,1
2,65,413,385.24,4,5,5,1
3,70,882,287.64,5,5,5,1
4,15,120,382.26,4,4,4,1
...,...,...,...,...,...,...,...
995,19,19648,10.20,2,1,1,0
996,21,19143,15.30,1,2,1,0
997,70,5352,30.23,1,3,2,0
998,44,18070,30.73,1,1,2,0


In [18]:
y

Unnamed: 0,Kraljic_Category
0,Strategic
1,Strategic
2,Strategic
3,Strategic
4,Strategic
...,...
995,Non-Critical
996,Non-Critical
997,Non-Critical
998,Non-Critical


In [19]:
# Calling the Models

knn = KNeighborsClassifier()
nb = GaussianNB()
lr = LogisticRegression()
svm = SVC()

In [20]:
# train_test_split

x_train,x_test,y_train,y_test = (train_test_split(x,y,test_size=0.2,random_state=42))

In [21]:
# Traning the Model

knn.fit(x_train,y_train)
nb.fit(x_train,y_train)
lr.fit(x_train,y_train)
svm.fit(x_train,y_train)

In [24]:
# Testing the Models

knn_y_pred = knn.predict(x_test)
nb_y_pred = nb.predict(x_test)
lr_y_pred = lr.predict(x_test)
svm_y_pred = svm.predict(x_test)

In [25]:
# Evaluation


print(classification_report(y_test, knn_y_pred))
print(confusion_matrix(y_test, knn_y_pred))
print(accuracy_score(y_test, knn_y_pred))

              precision    recall  f1-score   support

  Bottleneck       0.82      0.94      0.88        52
    Leverage       0.87      0.85      0.86        55
Non-Critical       0.84      0.86      0.85        49
   Strategic       0.92      0.75      0.82        44

    accuracy                           0.85       200
   macro avg       0.86      0.85      0.85       200
weighted avg       0.86      0.85      0.85       200

[[49  0  0  3]
 [ 0 47  8  0]
 [ 0  7 42  0]
 [11  0  0 33]]
0.855


In [26]:

print(classification_report(y_test, nb_y_pred))
print(confusion_matrix(y_test, nb_y_pred))
print(accuracy_score(y_test, nb_y_pred))

              precision    recall  f1-score   support

  Bottleneck       0.98      1.00      0.99        52
    Leverage       1.00      1.00      1.00        55
Non-Critical       1.00      1.00      1.00        49
   Strategic       1.00      0.98      0.99        44

    accuracy                           0.99       200
   macro avg       1.00      0.99      0.99       200
weighted avg       1.00      0.99      0.99       200

[[52  0  0  0]
 [ 0 55  0  0]
 [ 0  0 49  0]
 [ 1  0  0 43]]
0.995


In [27]:

print(classification_report(y_test, lr_y_pred))
print(confusion_matrix(y_test, lr_y_pred))
print(accuracy_score(y_test, lr_y_pred))

              precision    recall  f1-score   support

  Bottleneck       0.88      0.94      0.91        52
    Leverage       0.98      0.96      0.97        55
Non-Critical       0.96      0.98      0.97        49
   Strategic       0.93      0.84      0.88        44

    accuracy                           0.94       200
   macro avg       0.94      0.93      0.93       200
weighted avg       0.94      0.94      0.93       200

[[49  0  0  3]
 [ 0 53  2  0]
 [ 0  1 48  0]
 [ 7  0  0 37]]
0.935


In [28]:

print(classification_report(y_test, svm_y_pred))
print(confusion_matrix(y_test, svm_y_pred))
print(accuracy_score(y_test, svm_y_pred))

              precision    recall  f1-score   support

  Bottleneck       0.71      1.00      0.83        52
    Leverage       0.78      0.89      0.83        55
Non-Critical       0.95      0.71      0.81        49
   Strategic       0.85      0.52      0.65        44

    accuracy                           0.80       200
   macro avg       0.82      0.78      0.78       200
weighted avg       0.82      0.80      0.79       200

[[52  0  0  0]
 [ 0 49  2  4]
 [ 0 14 35  0]
 [21  0  0 23]]
0.795


In [None]:
# IN this dataset NB is the best Model