In [1]:
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.models import Model
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns


In [2]:
# Set the path to your dataset and Info.txt file
path = "C:/users/pc/breast_cancer_project/all-mias/"
info_file = "C:/users/pc/breast_cancer_project/Info.txt"

In [3]:
print("Reading dataset")
info = pd.read_csv(info_file, sep=" ")
info = info.drop('Unnamed: 7', axis=1)

Reading dataset


In [4]:
info['SEVERITY'].fillna('N', inplace=True)

In [5]:
info

Unnamed: 0,REFNUM,BG,CLASS,SEVERITY,X,Y,RADIUS
0,mdb001,G,CIRC,B,535.0,425.0,197.0
1,mdb002,G,CIRC,B,522.0,280.0,69.0
2,mdb003,D,NORM,N,,,
3,mdb004,D,NORM,N,,,
4,mdb005,F,CIRC,B,477.0,133.0,30.0
...,...,...,...,...,...,...,...
325,mdb318,D,NORM,N,,,
326,mdb319,D,NORM,N,,,
327,mdb320,D,NORM,N,,,
328,mdb321,D,NORM,N,,,


In [5]:
lb = LabelEncoder()
info["SEVERITY"] = lb.fit_transform(info["SEVERITY"])

In [6]:
label = []
for i in range (len(info)):
    if info.SEVERITY[i] == 0:
        label.append(0)
    elif info.SEVERITY[i] == 1:
        label.append(1)
    else:
        label.append(2)

In [7]:
img_name = []
for i in range(len(label)):
    img_name.append(path + info.REFNUM[i] + ".pgm")
img_name = np.array(img_name)

In [8]:
# Load VGG16 model for feature extraction
base_model = VGG16(weights='imagenet', include_top=False,
                   input_shape=(128, 128, 3))
model = Model(inputs=base_model.input,
              outputs=base_model.get_layer('block5_pool').output)

In [9]:
# Function to extract features from images using VGG16
def extract_features(image_paths, model):
    features = []
    for img_path in image_paths:
        img = cv2.imread(img_path)
        img = cv2.resize(img, (128, 128))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = preprocess_input(img)
        img = np.expand_dims(img, axis=0)
        feature = model.predict(img)
        features.append(feature.flatten())
    return np.array(features)

In [10]:
# Extract features from the images
x_features = extract_features(img_name, model)



In [11]:
# Split train and test set
x_train, x_test, y_train, y_test = train_test_split(x_features, label, test_size=0.20, random_state=42)

In [None]:

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from exc_dist import exc_dist_dist
from functools import partial

x_train_flat = x_train.reshape(x_train.shape[0], -1)
x_test_flat = x_test.reshape(x_test.shape[0], -1)

x_train_scaled = x_train_flat / 255
x_test_scaled = x_test_flat / 255

# List of distance metrics to try
distance_metrics = ['euclidean', 'manhattan', 'minkowski', 'chebyshev']
prime_numbers = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29]
n_neighbors = 3 

def create_knn_model(metric, x_train, y_train, n):
    if metric == 'exc_dist':
        for i in prime_numbers:
            exc_dist_decimal = 2  # Set the desired decimal places for rounding in the p-adic distance
            distance_func = partial(exc_dist_dist, exc_dist_p=i, dist_dec=exc_dist_decimal)

            # Create knn model with the current distance metric
            knn_model = KNeighborsClassifier(n_neighbors=5, metric=distance_func)
            knn_model.fit(x_train_scaled, y_train)
            results['exc_dist'][i] = evaluate_model(knn_model, x_test_scaled)
    else:
        distance_func = metric
        knn_model = KNeighborsClassifier(n_neighbors=n, metric=distance_func)
        knn_model.fit(x_train, y_train)
        results[metric] = evaluate_model(knn_model, x_test_scaled)

def evaluate_model(model, x_test_scaled):
    y_pred = model.predict(x_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    classification_rep = classification_report(
        y_test, y_pred, output_dict=True, zero_division=1)  # Add zero_division=1
    return {
        'Accuracy': accuracy,
        'Confusion Matrix': conf_matrix,
        'Classification Report' : classification_rep,
    }


# Dictionary to store results
results = {'exc_dist':{}}

for metric in distance_metrics + ['exc_dist']:
    create_knn_model(metric, x_train_scaled, y_train, n_neighbors)

# Print the results for each metric
for metric, metric_results in results.items():
    if metric == 'exc_dist':
        for prime_number, prime_results in metric_results.items():
            print(f"Metric: exc_dist_{prime_number}")
            print(f"Accuracy: {prime_results['Accuracy']}")
            print("Confusion Matrix:")
            print(prime_results['Confusion Matrix'])
            print("Classification Report:")
            print(prime_results['Classification Report'])
            print("\n")
    else:
        print(f"Metric: {metric}")
        print(f"Accuracy: {metric_results['Accuracy']}")
        print("Confusion Matrix:")
        print(metric_results['Confusion Matrix'])
        print("Classification Report:")
        print(metric_results['Classification Report'])
        print("\n")


In [15]:
import pandas as pd
import os

def write_results_to_excel(results, excel_file_path, n_neighbors, sheet_name):
    columns = ['Distance', 'n', 'p', 'Accuracy', 'Mic_Recall', 'Mic_Precision',
               'Mic_F1_Score', 'Mac_Recall', 'Mac_Precision', 'Mac_F1_Score']

    # Check if the Excel file already exists
    if os.path.exists(excel_file_path):
        # Load the existing data from the Excel file
        existing_data = pd.read_excel(excel_file_path, sheet_name=None)
    else:
        existing_data = {}

    # Create a DataFrame to store the new results
    df_results = pd.DataFrame(columns=columns)

    for metric, metric_results in results.items():
        if metric == 'exc_dist':
            prime_numbers = list(metric_results.keys())
            for prime_number in prime_numbers:
                prime_results = metric_results[prime_number]
                distance = 'exc_dist'
                # Get the accuracy if available, otherwise set to None
                accuracy = prime_results.get('Accuracy', None)
                # Get the classification report if available, otherwise set to None
                classification_report = prime_results.get(
                    'Classification Report', None)

                if classification_report:
                    mac_recall = classification_report['macro avg']['recall']
                    mac_precision = classification_report['macro avg']['precision']
                    mac_f1 = classification_report['macro avg']['f1-score']
                else:
                    mac_recall, mac_precision, mac_f1 = None, None, None

                df_results = df_results.append({
                    'Distance': distance,
                    'n': n_neighbors,
                    'Accuracy': accuracy,
                    'p': prime_number,
                    'Mac_Recall': mac_recall,
                    'Mac_Precision': mac_precision,
                    'Mac_F1_Score': mac_f1,
                }, ignore_index=True)
        else:
            distance = metric
            # Get the accuracy if available, otherwise set to None
            accuracy = metric_results.get('Accuracy', None)
            # Get the classification report if available, otherwise set to None
            classification_report = metric_results.get(
                'Classification Report', None)

            if classification_report:
                mac_recall = classification_report['macro avg']['recall']
                mac_precision = classification_report['macro avg']['precision']
                mac_f1 = classification_report['macro avg']['f1-score']
            else:
                mac_recall, mac_precision, mac_f1 = None, None, None

            df_results = df_results.append({
                'Distance': distance,
                'n': n_neighbors,
                'Accuracy': accuracy,
                'p': '',
                'Mac_Recall': mac_recall,
                'Mac_Precision': mac_precision,
                'Mac_F1_Score': mac_f1,
            }, ignore_index=True)

    # Save the new results to the specified sheet
    with pd.ExcelWriter(excel_file_path, engine='xlsxwriter') as writer:
        if sheet_name in existing_data:
            existing_data[sheet_name] = pd.concat(
                [existing_data[sheet_name], df_results], ignore_index=True)
        else:
            existing_data[sheet_name] = df_results
        for sheet, data in existing_data.items():
            data.to_excel(writer, sheet_name=sheet, index=False)


# Excel file path
excel_file_path = 'reports.xlsx'
sheet_name = 'knn_multi_extraction'  # Change this to the desired sheet name

write_results_to_excel(results, excel_file_path, n_neighbors, sheet_name)
