In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Reading CSV data using pandas

In [None]:
df = pd.read_csv("expanded_green_supplier_selection.csv")
data_array = df.drop(columns=["Supplier"]).to_numpy()
weights_green = np.array([0.10, 0.10, 0.07, 0.06, 0.03, 0.05, 0.10, 0.07, 0.06, 0.04, 0.05, 0.05, 0.07, 0.06, 0.04, 0.02, 0.03])



Normalising Data and cleaning it for TOPSIS

In [None]:
from sklearn.preprocessing import MinMaxScaler

# Normalize data using MinMaxScaler
def normalize_data(data):
    scaler = MinMaxScaler()
    normalized_data = scaler.fit_transform(data)
    return normalized_data

Weighed Data Calculator
It multiplies the weights to the data

In [None]:
def weighed_data(weight, data):
    weighed_normal = weights_green * data
    return weighed_normal


Calculates the Positive Ideal Solution (Greatest positive deviation)
and Negative Ideal Solution (Greatest negative Deviation)

In [None]:
def ideals(weighted_normal, benefits):
    positive_ideal = np.max(weighted_normal, axis=0)
    negative_ideal = np.min(weighted_normal, axis=0)
    for i, benefit in enumerate(benefits):
        if not benefit:
            positive_ideal[i], negative_ideal[i] = negative_ideal[i], positive_ideal[i]
    return positive_ideal, negative_ideal


Euclidean Distance Calculator (Deviation)

In [None]:
def distance(weighted_data, ideal):
    return np.sqrt(((weighted_data - ideal) ** 2).sum(axis=1))

Relative Closeness Evaluation for TOPSIS

In [None]:

def relative_closeness(positive_dist, negative_dist):
    return negative_dist / (negative_dist + positive_dist)


TOPSIS evaluator for suppliers

In [None]:

def topsis(data, weights, benefit_criteria):
    norm_data = normalize_data(data)
    weighted_norm = weighed_data(weights, norm_data)
    
    positive_ideal, negative_ideal = ideals(weighted_norm, benefit_criteria)
    
    distances_to_positive = distance(weighted_norm, positive_ideal)
    distances_to_negative = distance(weighted_norm, negative_ideal)
    
    closeness_scores = relative_closeness(distances_to_positive, distances_to_negative)
    

    rankings = np.argsort(closeness_scores)[::-1]  # Sort in descending order
    
    return closeness_scores, rankings, positive_ideal, negative_ideal, weighted_norm, distances_to_positive, distances_to_negative


In [None]:

# benefit criteria
benefit_criteria = [False, True, True, True, False, True, False, True, True, True, True, True, True, True, True, True, True]


closeness_scores, rankings, positive_ideal, negative_ideal, weighted_norm, distances_to_positive, distances_to_negative= topsis(data_array, weights_green, benefit_criteria)


print("Closeness Scores:", closeness_scores)
print("Rankings (from best to worst):", rankings + 1)

Scatter plot with scale for comparing Rankings of Suppliers

In [None]:
# Scatter plot of positive and negative distances
positive_distances = distance(weighted_norm, positive_ideal)
negative_distances = distance(weighted_norm, negative_ideal)

plt.figure(figsize=(8, 6))
plt.scatter(positive_distances, negative_distances, c=closeness_scores, cmap='viridis', s=100)
for i, txt in enumerate(range(1, len(closeness_scores) + 1)):
    plt.annotate(txt, (positive_distances[i], negative_distances[i]), fontsize=9)
plt.xlabel("Distance to Positive Ideal")
plt.ylabel("Distance to Negative Ideal")
plt.title("Scatter Plot of Distances to Ideals")
plt.colorbar(label="Closeness Score")
plt.show()



Histogram of closeness score of Suppliers

In [None]:
import seaborn as sns
import numpy as np

import matplotlib.pyplot as plt

# Bar plot of closeness scores
plt.figure(figsize=(20, 6))
plt.bar(range(1, len(closeness_scores) + 1), closeness_scores, color='skyblue')
plt.xlabel("Supplier")
plt.ylabel("Closeness Score")
plt.title("Closeness Scores of Suppliers")
plt.xticks(range(1, len(closeness_scores) + 1))  # Label suppliers from 1 upwards
plt.show()



In [None]:
from sklearn.model_selection import train_test_split
x = df.drop(columns=['Supplier', 'Green Image (1-10)'])
feature_names = x.columns
y = df['Green Image (1-10)']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size= 0.2, random_state=0)

Balance Dataset by using SMOTE(Synthetic Minority Oversampling Technique)
It samples and uses the minority data again so that it is not underrepresented and dataset is well balanced for model to learn upon

In [None]:
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split

# Split features and target
X = df.drop(columns=['Supplier', 'Green Image (1-10)'])
y = df['Green Image (1-10)']

# Apply SMOTE
smote = SMOTE()
X_resampled, y_resampled = smote.fit_resample(X, y)


x_train, x_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

print("Original dataset size:", df.shape)
print("Resampled dataset size:", X_resampled.shape)


Using Random Forest Classifier for prediction of Green Image

In [None]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(random_state=0)
classifier.fit(x_train, y_train)

Sample Output

In [None]:
classifier.predict([[70,8,7,9,5,200,9,7,8,9,7,8,120,9,8,6]])

Confusion Matrix and metrics for Random Forest Classifier

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

# Predict on the test set
y_pred = model.predict(x_test)

# Print evaluation metrics
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))


Using KNN for prediction of Green Image

In [None]:
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier(n_neighbors=5)
model.fit(x_train, y_train)
a = [[90,2,7,9,5,90,7,7,8,9,7,8,120,9,8,6]]

x_test_df = pd.DataFrame(a, columns=feature_names)
model.predict(x_test_df)



Confusion Matrix and metrics for KNN Classifier

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
y_pred = model.predict(x_test)
# Print evaluation metrics
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))