In [1]:

import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [2]:
df = pd.read_csv("/content/realistic_kraljic_dataset.csv")


In [3]:
df

Unnamed: 0,Product_ID,Product_Name,Supplier_Region,Lead_Time_Days,Order_Volume_Units,Cost_per_Unit,Supply_Risk_Score,Profit_Impact_Score,Environmental_Impact,Single_Source_Risk,Kraljic_Category
0,P001,Semiconductors,South America,81,171,255.03,5,5,4,Yes,Strategic
1,P002,Semiconductors,South America,8,763,380.33,5,4,4,Yes,Strategic
2,P003,Pharma APIs,Asia,65,413,385.24,4,5,5,Yes,Strategic
3,P004,Semiconductors,South America,70,882,287.64,5,5,5,Yes,Strategic
4,P005,Lithium Batteries,Asia,15,120,382.26,4,4,4,Yes,Strategic
...,...,...,...,...,...,...,...,...,...,...,...
995,P996,Printer Ink,Global,19,19648,10.20,2,1,1,No,Non-Critical
996,P997,Cleaning Supplies,Global,21,19143,15.30,1,2,1,No,Non-Critical
997,P998,Uniforms,Global,70,5352,30.23,1,3,2,No,Non-Critical
998,P999,Printer Ink,Global,44,18070,30.73,1,1,2,No,Non-Critical


In [4]:
df.columns

Index(['Product_ID', 'Product_Name', 'Supplier_Region', 'Lead_Time_Days',
       'Order_Volume_Units', 'Cost_per_Unit', 'Supply_Risk_Score',
       'Profit_Impact_Score', 'Environmental_Impact', 'Single_Source_Risk',
       'Kraljic_Category'],
      dtype='object')

In [5]:
df["Kraljic_Category"].unique()

array(['Strategic', 'Bottleneck', 'Leverage', 'Non-Critical'],
      dtype=object)

In [6]:
df1 = df.drop(['Product_Name', 'Supplier_Region','Product_ID'], axis=1)


In [7]:
df1

Unnamed: 0,Lead_Time_Days,Order_Volume_Units,Cost_per_Unit,Supply_Risk_Score,Profit_Impact_Score,Environmental_Impact,Single_Source_Risk,Kraljic_Category
0,81,171,255.03,5,5,4,Yes,Strategic
1,8,763,380.33,5,4,4,Yes,Strategic
2,65,413,385.24,4,5,5,Yes,Strategic
3,70,882,287.64,5,5,5,Yes,Strategic
4,15,120,382.26,4,4,4,Yes,Strategic
...,...,...,...,...,...,...,...,...
995,19,19648,10.20,2,1,1,No,Non-Critical
996,21,19143,15.30,1,2,1,No,Non-Critical
997,70,5352,30.23,1,3,2,No,Non-Critical
998,44,18070,30.73,1,1,2,No,Non-Critical


In [8]:
df1.columns

Index(['Lead_Time_Days', 'Order_Volume_Units', 'Cost_per_Unit',
       'Supply_Risk_Score', 'Profit_Impact_Score', 'Environmental_Impact',
       'Single_Source_Risk', 'Kraljic_Category'],
      dtype='object')

In [9]:
df1["Single_Source_Risk"] = df1["Single_Source_Risk"].map({"Yes":1,"No":0})

In [10]:
df1

Unnamed: 0,Lead_Time_Days,Order_Volume_Units,Cost_per_Unit,Supply_Risk_Score,Profit_Impact_Score,Environmental_Impact,Single_Source_Risk,Kraljic_Category
0,81,171,255.03,5,5,4,1,Strategic
1,8,763,380.33,5,4,4,1,Strategic
2,65,413,385.24,4,5,5,1,Strategic
3,70,882,287.64,5,5,5,1,Strategic
4,15,120,382.26,4,4,4,1,Strategic
...,...,...,...,...,...,...,...,...
995,19,19648,10.20,2,1,1,0,Non-Critical
996,21,19143,15.30,1,2,1,0,Non-Critical
997,70,5352,30.23,1,3,2,0,Non-Critical
998,44,18070,30.73,1,1,2,0,Non-Critical


In [11]:
x = df1[['Lead_Time_Days', 'Order_Volume_Units', 'Cost_per_Unit',
       'Supply_Risk_Score', 'Profit_Impact_Score', 'Environmental_Impact',
       'Single_Source_Risk']]
y = df1['Kraljic_Category']

In [12]:
knn_model = KNeighborsClassifier()
NB_model  = GaussianNB()
LR_model  = LogisticRegression()
svm_model = SVC()

In [13]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2 , random_state=12)

In [14]:
knn_model.fit(x_train,y_train)

NB_model.fit(x_train,y_train)

LR_model.fit(x_train,y_train)

svm_model.fit(x_train,y_train)


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [15]:
y_pred_knn = knn_model.predict(x_test)

y_pred_NB  = NB_model.predict(x_test)

y_pred_LR  = LR_model.predict(x_test)

y_pred_svm = svm_model.predict(x_test)

In [16]:
print(classification_report(y_test, y_pred_knn))

              precision    recall  f1-score   support

  Bottleneck       0.79      0.91      0.84        54
    Leverage       0.84      0.86      0.85        50
Non-Critical       0.86      0.85      0.85        52
   Strategic       0.86      0.70      0.78        44

    accuracy                           0.83       200
   macro avg       0.84      0.83      0.83       200
weighted avg       0.84      0.83      0.83       200



In [17]:
print(classification_report(y_test, y_pred_NB))

              precision    recall  f1-score   support

  Bottleneck       0.98      1.00      0.99        54
    Leverage       1.00      1.00      1.00        50
Non-Critical       1.00      1.00      1.00        52
   Strategic       1.00      0.98      0.99        44

    accuracy                           0.99       200
   macro avg       1.00      0.99      0.99       200
weighted avg       1.00      0.99      0.99       200



In [18]:
print(classification_report(y_test, y_pred_LR))

              precision    recall  f1-score   support

  Bottleneck       0.93      0.78      0.85        54
    Leverage       1.00      0.98      0.99        50
Non-Critical       0.98      1.00      0.99        52
   Strategic       0.77      0.93      0.85        44

    accuracy                           0.92       200
   macro avg       0.92      0.92      0.92       200
weighted avg       0.93      0.92      0.92       200



In [19]:
print(classification_report(y_test, y_pred_svm))

              precision    recall  f1-score   support

  Bottleneck       0.75      0.98      0.85        54
    Leverage       0.76      0.94      0.84        50
Non-Critical       0.97      0.71      0.82        52
   Strategic       0.90      0.59      0.71        44

    accuracy                           0.81       200
   macro avg       0.84      0.81      0.81       200
weighted avg       0.84      0.81      0.81       200



In [20]:
import pickle

In [21]:
with open("NB_model.pkl","wb") as f:
  pickle.dump(NB_model,f)