# Breast Cancer Recurrance Prediction Fuzzy System Implementation
First we install the scikit fuzzy library

In [1]:
%pip install -U scikit-fuzzy

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.3 -> 23.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Then we import pandas to manipulate dataframe, numpy to perform array calculations, scikit-fuzzy to implement th fuzzy system and control to determine the fuzzy variables

In [2]:
import pandas as pd
import numpy as np
import skfuzzy as fuzz
from skfuzzy import control as ctrl

Then we load the dataset from the uciml repository

In [3]:
from ucimlrepo import fetch_ucirepo 
breast_cancer = fetch_ucirepo(id=14)
X = breast_cancer.data.features 
y = breast_cancer.data.targets 
data = X
data['Class'] = y  # Adding the target variable 'Class'
data

Unnamed: 0,age,menopause,tumor-size,inv-nodes,node-caps,deg-malig,breast,breast-quad,irradiat,Class
0,30-39,premeno,30-34,0-2,no,3,left,left_low,no,no-recurrence-events
1,40-49,premeno,20-24,0-2,no,2,right,right_up,no,no-recurrence-events
2,40-49,premeno,20-24,0-2,no,2,left,left_low,no,no-recurrence-events
3,60-69,ge40,15-19,0-2,no,2,right,left_up,no,no-recurrence-events
4,40-49,premeno,0-4,0-2,no,2,right,right_low,no,no-recurrence-events
...,...,...,...,...,...,...,...,...,...,...
281,30-39,premeno,30-34,0-2,no,2,left,left_up,no,recurrence-events
282,30-39,premeno,20-24,0-2,no,3,left,left_up,yes,recurrence-events
283,60-69,ge40,20-24,0-2,no,1,right,left_up,no,recurrence-events
284,40-49,ge40,30-34,5-Mar,no,3,left,left_low,no,recurrence-events


In [4]:
data_clean = data[["age","tumor-size","inv-nodes", "deg-malig", "Class"]]
from datetime import datetime

#Clean the ages by replacing range with average
for index, row in data_clean.iterrows():
    age_list = row["age"].split("-")
    age_average = (float(age_list[0]) + float(age_list[1])) / 2
    # Replace the value in the "age" column with the calculated average
    data_clean.at[index, "age"] = age_average

    tumor_size_list = row["tumor-size"].split("-")
    try:
        tumor_size_list[1] = datetime.strptime(tumor_size_list[1], '%b').month
    except:
        pass
    tumor_size_average = (float(tumor_size_list[0]) + float(tumor_size_list[1])) /2 
    data_clean.at[index, "tumor-size"] = tumor_size_average

    inv_nodes_list = row["inv-nodes"].split("-")
    try:
        inv_nodes_list[1] = datetime.strptime(inv_nodes_list[1], '%b').month
    except:
        pass
    inv_nodes_average = (float(inv_nodes_list[0]) + float(inv_nodes_list[1])) /2 
    data_clean.at[index, "inv-nodes"] = inv_nodes_average
    
    data_clean.at[index, "Class"] = 0 if row["Class"] == "no-recurrence-events" else 1

data_clean["age"] = data_clean["age"].astype(float)
data_clean["tumor-size"] = data_clean["tumor-size"].astype(float)
data_clean["inv-nodes"] = data_clean["inv-nodes"].astype(float)
data_clean["Class"] = data_clean["Class"].astype(float)
data_clean.to_csv('clean-breast-cancer.csv', index=False, header=True)
data_clean

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_clean["age"] = data_clean["age"].astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_clean["tumor-size"] = data_clean["tumor-size"].astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_clean["inv-nodes"] = data_clean["inv-nodes"].astype(float)
A value is trying to be

Unnamed: 0,age,tumor-size,inv-nodes,deg-malig,Class
0,34.5,32.0,1.0,3,0.0
1,44.5,22.0,1.0,2,0.0
2,44.5,22.0,1.0,2,0.0
3,64.5,17.0,1.0,2,0.0
4,44.5,2.0,1.0,2,0.0
...,...,...,...,...,...
281,34.5,32.0,1.0,2,1.0
282,34.5,22.0,1.0,3,1.0
283,64.5,22.0,1.0,1,1.0
284,44.5,32.0,4.0,3,1.0


Then we define the fuzzy variables:
*  Placeholder

In [5]:
# Define fuzzy variables
age = ctrl.Antecedent(np.arange(10, 101, 1), 'age')
tumor_size = ctrl.Antecedent(np.arange(0, 61, 1), 'tumor_size')
inv_nodes = ctrl.Antecedent(np.arange(0, 41, 1), 'inv_nodes')
deg_malig = ctrl.Antecedent(np.arange(1, 4, 1), 'deg_malig')
recurrence = ctrl.Consequent(np.arange(0, 101, 1), 'recurrence')

Then we define the membership functions for each variable
* Placeholder


In [6]:
#Age membership functions
age['young'] = fuzz.trapmf(age.universe, [10, 10, 25, 50])
age['middle_aged'] = fuzz.trimf(age.universe, [25, 50, 70])
age['elderly'] = fuzz.trapmf(age.universe, [50, 70, 100, 100])

#Tumor size membership function
tumor_size['small'] = fuzz.trapmf(tumor_size.universe, [0, 0, 15, 30])
tumor_size['medium'] = fuzz.trimf(tumor_size.universe, [15, 30, 55])
tumor_size['large'] = fuzz.trapmf(tumor_size.universe, [30, 55, 60, 60])

#inv nodes membership function
inv_nodes['few'] = fuzz.trapmf(inv_nodes.universe, [0, 0, 2, 10])
inv_nodes['moderate'] = fuzz.trimf(inv_nodes.universe, [2, 10, 15])
inv_nodes['many'] = fuzz.trapmf(inv_nodes.universe, [10, 15, 40, 40])

#deg_malig membership function
deg_malig['low'] = fuzz.trimf(deg_malig.universe, [1, 1, 4])
deg_malig['high'] = fuzz.trimf(deg_malig.universe, [1, 4, 4])

# Define membership functions for the output (recurrence)
recurrence['low'] = fuzz.trimf(recurrence.universe, [0, 0, 50])
recurrence['medium'] = fuzz.trimf(recurrence.universe, [0, 50, 100])
recurrence['high'] = fuzz.trimf(recurrence.universe, [50, 100, 100])


Then we define the fuzzy rules
* Placeholder

In [7]:
# Define fuzzy rules based on the membership functions and reasoning

#age
rule1 = ctrl.Rule(age['young'] & inv_nodes['few'], recurrence['low'])
rule2 = ctrl.Rule(age['young'] & inv_nodes['moderate'], recurrence['medium'])
rule3 = ctrl.Rule(age['young'] & deg_malig['low'], recurrence['medium'])
rule4 = ctrl.Rule(age['young'] & deg_malig['high'], recurrence['high'])

rule5 = ctrl.Rule(age['middle_aged'] & inv_nodes['few'], recurrence['low'])
rule6 = ctrl.Rule(age['middle_aged'] & inv_nodes['moderate'], recurrence['medium'])
rule7 = ctrl.Rule(age['middle_aged'] & inv_nodes['many'], recurrence['medium'])
rule8 = ctrl.Rule(age['middle_aged'] & deg_malig['low'], recurrence['low'])
rule9 = ctrl.Rule(age['middle_aged'] & deg_malig['high'], recurrence['medium'])
rule10 = ctrl.Rule(age['middle_aged'] & tumor_size['small'], recurrence['low'])
rule11 = ctrl.Rule(age['middle_aged'] & tumor_size['medium'], recurrence['low'])
rule12 = ctrl.Rule(age['middle_aged'] & tumor_size['large'], recurrence['low'])

rule13 = ctrl.Rule(age['elderly'] & inv_nodes['few'], recurrence['low'])
rule14 = ctrl.Rule(age['elderly'] & inv_nodes['moderate'], recurrence['medium'])
rule15 = ctrl.Rule(age['elderly'] & inv_nodes['many'], recurrence['high'])
rule16 = ctrl.Rule(age['elderly'] & deg_malig['low'], recurrence['low'])
rule17 = ctrl.Rule(age['elderly'] & deg_malig['high'], recurrence['medium'])
rule18 = ctrl.Rule(age['elderly'] & tumor_size['small'], recurrence['low'])
rule19 = ctrl.Rule(age['elderly'] & tumor_size['medium'], recurrence['low'])
rule20 = ctrl.Rule(age['elderly'] & tumor_size['large'], recurrence['low'])

# # inv_nodes and degree_malignence
# rule29 = ctrl.Rule(inv_nodes['few'], recurrence['low'])
# rule30 = ctrl.Rule(inv_nodes['moderate'] & deg_malig['low'], recurrence['low'])
# rule31 = ctrl.Rule(inv_nodes['moderate'] & deg_malig['high'], recurrence['high'])
# rule32 = ctrl.Rule(inv_nodes['many'] & deg_malig['high'], recurrence['high'])

# # inv_nodes and tumor_size
# rule33 = ctrl.Rule(inv_nodes['few'], recurrence['low'])
# rule34 = ctrl.Rule(inv_nodes['moderate'] & tumor_size['large'], recurrence['high'])

# #degree_malignence and tumor_size
# rule35 = ctrl.Rule(deg_malig['high'] & tumor_size['large'] , recurrence['high'])
# rule36 = ctrl.Rule(deg_malig['low'] & tumor_size['small'] , recurrence['low'])
# rule37 = ctrl.Rule(deg_malig['low'] & tumor_size['medium'] , recurrence['low'])

recurrence_ctrl = ctrl.ControlSystem(rules=[rule1, rule2, rule3, rule4, rule5, rule6, rule7, rule8, rule9, rule10, rule11, rule12, rule13, rule14, rule15, rule16, rule17, rule18, rule19, rule20]) # Include all rules here

rules

Then we add the rules to the control system

recurrence_ctrl = ctrl.ControlSystem(rules)  # Include all rules here

Then we create simulution

In [8]:
# Create simulation
recurrence_sim = ctrl.ControlSystemSimulation(recurrence_ctrl)

In [9]:
# Define inputs
recurrence_sim.input['age'] = 64.5  # Replace with your input values
recurrence_sim.input['tumor_size'] = 22
recurrence_sim.input['inv_nodes'] = 1
recurrence_sim.input['deg_malig'] = 1

# Compute output
recurrence_sim.compute()

# Access output
print(f"Chance for recurrance is: {recurrence_sim.output['recurrence']}%")


Chance for recurrance is: 17.65522875816993%


In [12]:
count = 0 
correct = 0
for index, row in data_clean.iterrows():
    recurrence_sim.input['age'] = row["age"]
    recurrence_sim.input['tumor_size'] = row["tumor-size"]
    recurrence_sim.input['inv_nodes'] = row["inv-nodes"]
    recurrence_sim.input['deg_malig'] = row["deg-malig"]
    recurrence_sim.compute()
    crisp_output = 1 if recurrence_sim.output['recurrence'] >30 else 0
    correct = correct+1 if crisp_output == row["Class"] else correct
    count += 1
    string = "Correct" if crisp_output == row["Class"] else "Wrong"
    if (string == "Wrong"):
        print(f"Data number {count}: age={row['age']}, tumor_size={row['tumor-size']}, inv_nodes={row['inv-nodes']}, deg-malig={row['deg-malig']}")
        print(f"Percentage {recurrence_sim.output['recurrence']}%, Output {crisp_output}, Answer {row['Class']}, {string}")

print(f"Number of correct diagnoses {correct}") 
print(f"Number of wrong diagnoses {count-correct}") 
print(f"Total diagnoses {count}") 
print(f"Accuracy {correct/count*100}%") 

Data number 1: age=34.5, tumor_size=32.0, inv_nodes=1.0, deg-malig=3.0
Percentage 50.00000000000009%, Output 1, Answer 0.0, Wrong
Data number 2: age=44.5, tumor_size=22.0, inv_nodes=1.0, deg-malig=2.0
Percentage 39.35739130950054%, Output 1, Answer 0.0, Wrong
Data number 3: age=44.5, tumor_size=22.0, inv_nodes=1.0, deg-malig=2.0
Percentage 39.35739130950054%, Output 1, Answer 0.0, Wrong
Data number 4: age=64.5, tumor_size=17.0, inv_nodes=1.0, deg-malig=2.0
Percentage 38.19137854848532%, Output 1, Answer 0.0, Wrong
Data number 5: age=44.5, tumor_size=2.0, inv_nodes=1.0, deg-malig=2.0
Percentage 39.35739130950054%, Output 1, Answer 0.0, Wrong
Data number 6: age=64.5, tumor_size=17.0, inv_nodes=1.0, deg-malig=2.0
Percentage 38.19137854848532%, Output 1, Answer 0.0, Wrong
Data number 7: age=54.5, tumor_size=27.0, inv_nodes=1.0, deg-malig=2.0
Percentage 37.6975981248459%, Output 1, Answer 0.0, Wrong
Data number 9: age=44.5, tumor_size=52.0, inv_nodes=1.0, deg-malig=2.0
Percentage 39.3573913