In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import warnings
warnings.filterwarnings("ignore")

def preprocess_data(df, selected_features, selected_classes):
    # Filter data based on selected classes
    df_filtered = df[df['Class'].isin(selected_classes)]

    # Handle null values
    for feature in selected_features:
        df_filtered.loc[:, feature] = df_filtered[feature].fillna(df_filtered[feature].mean())
    
    # Encoding and dropping columns
    df_filtered['Class_encoded'] = np.where(df_filtered['Class'] == selected_classes[0], 1, -1)

    # mapping = {'BOMBAY': -1, 'CALI': 0, 'SIRA': 1}
    # df_filtered.loc[:, 'Class_encoded'] = df_filtered['Class'].map(mapping)
    # df_filtered.drop(columns=['Class'], inplace=True)
    
    # Outlier handling
    def find_range(col, df, class_value):
        class_rows = df[df['Class_encoded'] == class_value][col]
        Q1, Q3 = class_rows.quantile(0.25), class_rows.quantile(0.75)
        IQR = Q3 - Q1
        lower_range = Q1 - 1.5 * IQR
        upper_range = Q3 + 1.5 * IQR
        return lower_range, upper_range
    
    for col in selected_features:
        for class_value in selected_classes:
            lower_limit, upper_limit = find_range(col, df_filtered, class_value)
            class_rows = df_filtered['Class_encoded'] == class_value
            df_filtered.loc[class_rows, col] = df_filtered.loc[class_rows, col].clip(lower_limit, upper_limit)
    
    # Scaling
    df_filtered.loc[:, selected_features] = (df_filtered[selected_features] - df_filtered[selected_features].mean()) / df_filtered[selected_features].std()
    
    # Shuffle the DataFrame
    df_filtered = shuffle(df_filtered, random_state=42)
    
    # Splitting
    X = df_filtered[selected_features]
    y = df_filtered['Class_encoded']
    
    return X, y


In [225]:
import tkinter as tk
from tkinter import ttk
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Perceptron, SGDRegressor
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings("ignore")

class DryBeansClassificationGUI(tk.Tk):
    def __init__(self):
        super().__init__()
        self.title("Dry Beans Classification")
        self.geometry("800x800")

        # Features selection
        features_frame = ttk.LabelFrame(self, text="Select Features", padding=(10, 5))
        features_frame.grid(row=0, column=0, padx=10, pady=10, sticky="ew")

        self.feature_vars = []
        self.feature_checkbuttons = []

        features = ["Area", "Perimeter", "MajorAxisLength", "MinorAxisLength", "Roundness"]
        for i, feature in enumerate(features):
            var = tk.BooleanVar(value=False)
            self.feature_vars.append(var)
            checkbutton = ttk.Checkbutton(features_frame, text=feature, variable=var, style="TCheckbutton")
            checkbutton.grid(row=i, column=0, sticky="w", padx=(10, 0), pady=5)
            self.feature_checkbuttons.append(checkbutton)

        # Classes selection
        classes_frame = ttk.LabelFrame(self, text="Select Classes", padding=(10, 5))
        classes_frame.grid(row=1, column=0, padx=10, pady=10, sticky="ew")

        classes_label = ttk.Label(classes_frame, text="Select classes:")
        classes_label.grid(row=0, column=0, padx=(10, 5), pady=5, sticky="w")

        self.available_classes = ["BOMBAY", "CALI", "SIRA"]
        class_combinations = [' & '.join(comb) for comb in combinations(self.available_classes, 2)]
        self.selected_classes = tk.StringVar()  # Variable to store selected class combination
        self.classes_combobox = ttk.Combobox(classes_frame, values=class_combinations, textvariable=self.selected_classes)
        self.classes_combobox.grid(row=0, column=1, padx=(0, 10), pady=5, sticky="ew")

        # Learning parameters
        parameters_frame = ttk.LabelFrame(self, text="Learning Parameters", padding=(10, 5))
        parameters_frame.grid(row=2, column=0, padx=10, pady=10, sticky="ew")

        lp_label = ttk.Label(parameters_frame, text="Enter learning rate:")
        lp_label.grid(row=0, column=0, padx=(10, 5), pady=5, sticky="w")
        self.lp_entry = ttk.Entry(parameters_frame)
        self.lp_entry.grid(row=0, column=1, padx=(0, 10), pady=5, sticky="ew")

        epochs_label = ttk.Label(parameters_frame, text="Enter number of epochs:")
        epochs_label.grid(row=1, column=0, padx=(10, 5), pady=5, sticky="w")
        self.epochs_entry = ttk.Entry(parameters_frame)
        self.epochs_entry.grid(row=1, column=1, padx=(0, 10), pady=5, sticky="ew")

        mse_label = ttk.Label(parameters_frame, text="Enter MSE threshold:")
        mse_label.grid(row=2, column=0, padx=(10, 5), pady=5, sticky="w")
        self.mse_entry = ttk.Entry(parameters_frame)
        self.mse_entry.grid(row=2, column=1, padx=(0, 10), pady=5, sticky="ew")

        # Bias checkbox
        self.bias_var = tk.BooleanVar()
        bias_checkbox = ttk.Checkbutton(self, text="Add Bias", variable=self.bias_var, style="TCheckbutton")
        bias_checkbox.grid(row=3, column=0, padx=10, pady=(10, 5), sticky="w")

        # Algorithm selection
        algorithm_frame = ttk.LabelFrame(self, text="Algorithm Selection", padding=(10, 5))
        algorithm_frame.grid(row=4, column=0, padx=10, pady=10, sticky="ew")

        self.algorithm_var = tk.StringVar(value="Perceptron")
        perceptron_radio = ttk.Radiobutton(algorithm_frame, text="Perceptron", variable=self.algorithm_var, value="Perceptron", style="TRadiobutton")
        perceptron_radio.grid(row=0, column=0, padx=(10, 5), pady=5, sticky="w")
        adaline_radio = ttk.Radiobutton(algorithm_frame, text="Adaline", variable=self.algorithm_var, value="Adaline", style="TRadiobutton")
        adaline_radio.grid(row=0, column=1, padx=(0, 10), pady=5, sticky="ew")

        # Classify button
        classify_button = ttk.Button(self, text="Classify", command=self.classify)
        classify_button.grid(row=5, column=0, padx=10, pady=10, sticky="ew")
        
    def classify(self):
    # Fetch user inputs
        selected_features = [self.feature_checkbuttons[i]['text'] for i in range(len(self.feature_checkbuttons)) if self.feature_vars[i].get()]
        selected_classes_str = self.selected_classes.get()
        selected_classes = selected_classes_str.split(" & ")
        learning_rate = float(self.lp_entry.get())
        epochs = int(self.epochs_entry.get())
        mse_thresh = float(self.mse_entry.get())
        add_bias = self.bias_var.get()
        selected_algorithm = self.algorithm_var.get()

        # Read the data
        df = pd.read_csv("Dry_Bean_Dataset.csv")

        # Preprocess data
        X, y = preprocess_data(df, selected_features, selected_classes)

        # Train and test the classifier
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, stratify=y, random_state=41)

        if selected_algorithm == "Perceptron":
            model = Perceptron(eta0=learning_rate, max_iter=epochs, tol=None, shuffle=False, random_state=0)
        else:  # Adaline
            model = SGDRegressor(learning_rate='constant', eta0=learning_rate, max_iter=epochs, tol=mse_thresh, shuffle=False, random_state=0)

        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # Calculate accuracy and confusion matrix
        accuracy = np.mean(y_pred == y_test)
        confusion_matrix = self.calculate_confusion_matrix(y_test, y_pred)

        print("Accuracy:", accuracy)
        print("Confusion Matrix:")
        print(confusion_matrix) 
        mapping = {'BOMBAY': -1, 'CALI': 0, 'SIRA': 1}
        decoded_y_pred = [list(mapping.keys())[list(mapping.values()).index(pred)] for pred in y_pred]

        # Print predictions with class names
        for idx, pred_class_name in enumerate(decoded_y_pred):
            print(f"Sample {idx + 1}: Predicted class - {pred_class_name}")


    def calculate_confusion_matrix(self, y_true, y_pred):
        unique_labels = np.unique(np.concatenate((y_true, y_pred)))
        num_labels = len(unique_labels)
        confusion_matrix = np.zeros((num_labels, num_labels), dtype=int)

        label_to_index = {label: i for i, label in enumerate(unique_labels)}

        for true_label, pred_label in zip(y_true, y_pred):
            true_index = label_to_index[true_label]
            pred_index = label_to_index[pred_label]
            confusion_matrix[true_index, pred_index] += 1

        # Compute precision, recall, and F1-score for each class
        precision = np.zeros(num_labels)
        recall = np.zeros(num_labels)
        f1 = np.zeros(num_labels)
        support = np.sum(confusion_matrix, axis=1)

        for i in range(num_labels):
            true_positive = confusion_matrix[i, i]
            false_positive = np.sum(confusion_matrix[:, i]) - true_positive
            false_negative = np.sum(confusion_matrix[i, :]) - true_positive

            precision[i] = true_positive / (true_positive + false_positive) if true_positive + false_positive > 0 else 0
            recall[i] = true_positive / (true_positive + false_negative) if true_positive + false_negative > 0 else 0
            f1[i] = 2 * (precision[i] * recall[i]) / (precision[i] + recall[i]) if precision[i] + recall[i] > 0 else 0

        # Compute overall accuracy
        correct_predictions = np.sum(np.diag(confusion_matrix))
        total_predictions = np.sum(confusion_matrix)
        accuracy = correct_predictions / total_predictions

        # Format the output
        confusion_matrix_output = ""
        confusion_matrix_output += "   precision  recall  f1-score  support\n"
        for label, precision_value, recall_value, f1_value, support_value in zip(unique_labels, precision, recall, f1, support):
            confusion_matrix_output += f"{label}\t{precision_value:.2f}\t{recall_value:.2f}\t{f1_value:.2f}\t{support_value}\n"
        confusion_matrix_output += f"\naccuracy\t\t\t{accuracy:.2f}\n"

        return confusion_matrix_output

app = DryBeansClassificationGUI()
app.mainloop()


Accuracy: 1.0
Confusion Matrix:
   precision  recall  f1-score  support
-1	1.00	1.00	1.00	20
0	1.00	1.00	1.00	20

accuracy			1.00

Sample 1: Predicted class - CALI
Sample 2: Predicted class - BOMBAY
Sample 3: Predicted class - CALI
Sample 4: Predicted class - BOMBAY
Sample 5: Predicted class - BOMBAY
Sample 6: Predicted class - BOMBAY
Sample 7: Predicted class - CALI
Sample 8: Predicted class - BOMBAY
Sample 9: Predicted class - BOMBAY
Sample 10: Predicted class - BOMBAY
Sample 11: Predicted class - CALI
Sample 12: Predicted class - BOMBAY
Sample 13: Predicted class - CALI
Sample 14: Predicted class - CALI
Sample 15: Predicted class - CALI
Sample 16: Predicted class - CALI
Sample 17: Predicted class - CALI
Sample 18: Predicted class - BOMBAY
Sample 19: Predicted class - CALI
Sample 20: Predicted class - BOMBAY
Sample 21: Predicted class - BOMBAY
Sample 22: Predicted class - BOMBAY
Sample 23: Predicted class - CALI
Sample 24: Predicted class - BOMBAY
Sample 25: Predicted class - CALI
Sa

In [16]:
import pandas as pd
import numpy as np

df =pd.read_csv("Dry_Bean_Dataset.csv")

df.head()

Unnamed: 0,Area,Perimeter,MajorAxisLength,MinorAxisLength,roundnes,Class
0,114004.0,1279.356,451.361256,323.747996,0.87528,BOMBAY
1,117034.0,1265.926,425.923788,351.215109,0.91771,BOMBAY
2,126503.0,1326.959,475.772459,339.381887,0.902809,BOMBAY
3,128118.0,1360.135,504.024964,,0.870274,BOMBAY
4,129409.0,1348.888,484.364424,341.172659,0.893763,BOMBAY


In [61]:
import numpy as np

class SLP:
    def __init__(self,random_state=42,epochs=1000,lr=0) -> None:
        np.random.seed(random_state)
        self.weight=None
        self.pred=None
        self.learning_rate=lr
        self.epochs=epochs
    def fit(self,X,Y):

        
        X=np.concatenate([np.ones((X.shape[0],1)),X.to_numpy()],axis=1)
        self.weight=np.random.rand(X.shape[1]).reshape(-1,1)
        # print("Initial Weights:" , self.weight)
        for _ in range(self.epochs):
            for i,x in enumerate(X):

                self.net_value = np.dot(self.weight.T,x.reshape(-1,1))
                # print(self.net_value[0][0])
                if self.net_value[0][0]>0:
                    self.A = 1
                elif self.net_value[0][0]<0:
                    self.A = -1
                else:
                    self.A = 0
                if Y.iloc[i]!=self.A:
                    print("ERror")
                    error = Y.iloc[i]-self.A
                    self.weight=self.weight + self.learning_rate*(error*x.reshape(-1,1))

    def predict(self,X_test):
        X_test=np.concatenate([np.ones((X_test.shape[0],1)),X_test.to_numpy()],axis=1)
        print(len(X_test))
        self.net_value = np.dot(self.weight.T,X_test.T)
        self.net_value=np.where(self.net_value>0,1,np.where(self.net_value<0,-1,0))
        return self.net_value

    

class Adaline:
    def __init__(self,random_state=42,epochs=1000,lr=0) -> None:
        np.random.seed(random_state)
        self.weight=None
        self.pred=None
        self.learning_rate=lr
        self.epochs=epochs
    def fit(self,X,Y,MSE_Threshold):

        
        X=np.concatenate([np.ones((X.shape[0],1)),X.to_numpy()],axis=1)
        self.weight=np.random.rand(X.shape[1]).reshape(-1,1)
        print(self.weight)
        for _ in range(self.epochs):
            for i,x in enumerate(X):

                self.net_value = np.dot(self.weight.T,x.reshape(-1,1))

                error = Y.iloc[i]-self.net_value[0][0]

                self.weight = self.weight - self.learning_rate * error * x.reshape(-1,1)
            for i,x in enumerate(X):
                 self.net_value = np.dot(self.weight.T,x.reshape(-1,1))
                 MSE=0
                 MSE+=0.5*((Y.iloc[i]-self.net_value)**2)
            if (MSE/len(X))<MSE_Threshold:
                break





In [18]:
X, y = preprocess_data(df, ["Area","Perimeter"], ["BOMBAY","CALI"])

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, stratify=y, random_state=41)

In [68]:
model = SLP(random_state=42,epochs=2,lr=0.001)
model.fit(X_train, y_train)
print(model.weight)
y_pred=model.predict(X_test)



[[0.37454012]
 [0.95071431]
 [0.73199394]]
40


In [70]:
accuracy = np.mean(y_pred == y_test.to_numpy())

In [85]:
y_test.to_numpy().reshape(1,-1)

array([[ 1, -1,  1, -1, -1, -1,  1, -1, -1, -1,  1, -1,  1,  1,  1,  1,
         1, -1,  1, -1, -1, -1,  1, -1,  1,  1, -1,  1, -1, -1,  1, -1,
        -1, -1,  1, -1,  1,  1,  1,  1]])

In [122]:
def calculate_confusion_matrix(y_true, y_pred):
        unique_labels = np.unique(np.concatenate((y_true, y_pred)))
        num_labels = len(unique_labels)
        confusion_matrix = np.zeros((num_labels, num_labels), dtype=int)

        label_to_index = {label: i for i, label in enumerate(unique_labels)}

        for true_label, pred_label in zip(y_true[0], y_pred[0]):
            true_index = label_to_index[true_label]
            pred_index = label_to_index[pred_label]
            confusion_matrix[true_index, pred_index] += 1

        # Compute precision, recall, and F1-score for each class
        precision = np.zeros(num_labels)
        recall = np.zeros(num_labels)
        f1 = np.zeros(num_labels)
        support = np.sum(confusion_matrix, axis=1)

        for i in range(num_labels):
            true_positive = confusion_matrix[i, i]
            false_positive = np.sum(confusion_matrix[:, i]) - true_positive
            false_negative = np.sum(confusion_matrix[i, :]) - true_positive

            precision[i] = true_positive / (true_positive + false_positive) if true_positive + false_positive > 0 else 0
            recall[i] = true_positive / (true_positive + false_negative) if true_positive + false_negative > 0 else 0
            f1[i] = 2 * (precision[i] * recall[i]) / (precision[i] + recall[i]) if precision[i] + recall[i] > 0 else 0

        # Compute overall accuracy
        correct_predictions = np.sum(np.diag(confusion_matrix))
        total_predictions = np.sum(confusion_matrix)
        accuracy = correct_predictions / total_predictions

        # Format the output
        
        confusion_matrix_output = "Confusion Matrix:\n"
        confusion_matrix_output += "   precision  recall  f1-score  support\n"
        for label, precision_value, recall_value, f1_value, support_value in zip(unique_labels, precision, recall, f1, support):
            confusion_matrix_output += f"{label}\t{precision_value:.2f}\t{recall_value:.2f}\t{f1_value:.2f}\t{support_value}\n"
        confusion_matrix_output += f"\nAccuracy:\t\t\t{accuracy:.2f}\n"

        # print(confusion_matrix_output)
        return confusion_matrix_output

In [123]:
print(calculate_confusion_matrix( y_test.to_numpy().reshape(1,-1),y_pred))

Confusion Matrix:
   precision  recall  f1-score  support
-1	1.00	1.00	1.00	20
1	1.00	1.00	1.00	20

Accuracy:			1.00



In [6]:
# for x in X:
#     print(x)
#     print("=============")

In [118]:
np.random.seed(42)
X=np.concatenate([np.ones((df[["Area","Perimeter"]].shape[0],1)),df[["Area","Perimeter"]].to_numpy()],axis=1)
w=np.random.rand(X.shape[1]).reshape(-1,1)
print(X[0].reshape(-1,1))
print(w)

np.dot(w.T,X.T)

[[1.000000e+00]
 [1.140040e+05]
 [1.279356e+03]]
[[0.37454012]
 [0.95071431]
 [0.73199394]]


array([[109322.08916949, 112192.92283928, 121239.91239292,
        122799.60062879, 124018.7400625 , 124402.56014346,
        124656.92701822, 125787.09354872, 126009.43552833,
        126730.33053989, 126851.85698945, 127306.15728454,
        127345.24900364, 128170.56008338, 128421.90249924,
        128653.31388379, 129561.91641566, 129741.5140149 ,
        130214.83036612, 131368.27532761, 131402.16301014,
        131587.5021127 , 131778.63094683, 131976.57248881,
        132126.70277111, 132323.52227771, 132309.23305559,
        132452.56632804, 132472.85340581, 132861.56232561,
        132964.64573309, 133154.84144376, 133679.65769785,
        133766.72317354, 133828.32312594, 133996.14161012,
        134446.18756791, 134959.93212485, 135060.39659233,
        135463.44865615, 135600.70945556, 135908.03422791,
        135983.41458404, 136156.98642065, 136271.43240414,
        136428.09369371, 136595.82172528, 137554.40409449,
        138029.47208423, 138048.52502021,  43842.4547763

In [119]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Area,150.0,75557.933333,44232.03417,31519.0,35139.25,56756.5,132879.5,144079.0
Perimeter,150.0,999.372293,293.752695,668.106,708.69,914.957,1369.27775,1463.258
MajorAxisLength,150.0,370.564985,105.115378,233.804968,264.743366,352.010221,497.101354,540.677823
MinorAxisLength,149.0,240.245914,75.49063,157.80274,175.379706,206.618773,338.364471,376.550241
roundnes,150.0,0.875078,0.034295,0.688618,0.855589,0.880003,0.893303,0.954104
