# Import needed libraries

In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression as LR
import numpy as np

# Load sklearn breast cancer dataset

In [2]:
X, y = load_breast_cancer(return_X_y=True, as_frame=True)

# Or

# Dataset = loader(return_X_y=True, as_frame=True)
# X=Dataset[0]
# y=Dataset[1]

# print(X)
# print(y)

**Note: When the return_X_y parameter of the dataset loader is set to True, a tuple is returned which can be unpacked into**
**the feature variables (X) and target variable (y)**
**Setting as_frame to True returns the feature variables as Pandas DataFrame**

In [3]:
dataset_as_dict = load_breast_cancer(return_X_y=False, as_frame=True)

# print(dataset_as_dict,"\n")
# print(dataset_as_dict.data)

**Note: When the return_X_y parameter of the dataset loader is set to False, a dictionary with the following keys: 'data', 'target',**
**'frame', 'target_names', 'DESCR', 'feature_names', 'filename' and 'data_module' is returned as attributes which can be used**
**to access the dictionary**
**Setting as_frame to True returns the feature variables as Pandas DataFrame**

# Data preprocessing

In [4]:
# print(f"X has {X.shape[0]} rows and {X.shape[1]} columns. I.e., There are {X.shape[1]} feature variable of {X.shape[0]} rows.\n")

# print(X.isnull().any(),"\n") # or X.isna().any() 
# print("The description of the dataset is given below")
# X.describe()

In [5]:
# len(X)

# Dataset splitting

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=500, random_state=123,)

X_train, X_test, y_train, y_test = X_train.to_numpy(), X_test.to_numpy(), y_train.to_numpy(), y_test.to_numpy()

# Model deserialization (unpickling)

In [7]:
# model deserialization (unpickling)
def unpickle_model():
    import pickle
    import pathlib
    path=pathlib.Path("pickled_models/sklearn_breast_cancer_classifier.pkl")
    with open(path, mode="rb") as in_file:
        unpickled_model = pickle.load(in_file)
    return unpickled_model
        
my_model = unpickle_model()
my_model

# Mapping the target (y) variable

In [8]:
def map_y():
    y = []
    for i in y_test:
        if i == 1:
            y.append("Malignant")
        else:
            y.append("Benign")
    return np.array(y)

y_test_label = map_y()

# print(y_test_label) 

# Function to predict the classes of cancer given the start and end indexes

In [13]:
print(f"Function to predict the classes of cancer given the start and end indexes. Only integers between 1 and {len(X_test)} are allowed\n")

while True:
    try:
        start = int(input("Start Index of the classes you want to predict: "))
        end = int(input("End Index of the classes you want to predict:   "))
        break
    except:
        print("\nPlease indicate the start and end indexes. Empty values, and letters are not allowed. Only integers are allowed\n")

if start > end:
    print("\nPlease the start index CANNOT be higher than the end index. Click ctrl + ENTER to start again!")   
else:
    start_index, end_index = start-1, end # substract 1 to get the actual index since array index starts from 0
    
    if start_index > len(X_test) or end_index > len(X_test):
        print("\nThe index you entered is out of range. Click ctrl + ENTER to try again!")
    else:
        def predict():
            prediction_list = []
            prediction = list(my_model.predict(X_test[start_index:end_index]))
            for i in prediction:
                if i == 1:
                    prediction_list.append("Malignant")
                else:
                    prediction_list.append("Benign")
            return prediction_list

        def actual():
            actual = list(y_test_label[start_index:end_index])
            return actual

        predicted_class = predict()
        actual_class = actual()

        print(f"\nPredicted classes: {predicted_class}")
        print(f"\nActual classes:    {actual_class}")

Function to predict the classes of cancer given the start and end indexes. Only integers between 1 and 500 are allowed

Start Index of the classes you want to predict: 5
End Index of the classes you want to predict:   4

Please the start index CANNOT be higher than the end index. Click ctrl + ENTER to start again!
