# Breast cancer prediagnosis - **FURIA**

**Prepare enviroment**

In [None]:
import os
import sys
sys.path
sys.path.append("/usr/lib/jvm/java-11-openjdk-amd64/bin/")
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-11-openjdk-amd64/"
!apt-get install build-essential python3-dev
!apt-get install python3-pil python3-pygraphviz
!apt install openjdk-11-jdk
!pip3 install javabridge --no-cache-dir
!pip3 install python-weka-wrapper3 --no-cache-dir

import weka.core.jvm as jvm

**Start JVM**

In [None]:
jvm.start(packages=True)

**Install Furia**

In [None]:
import weka.core.packages as packages
packages.install_package("fuzzyUnorderedRuleInduction")
items = packages.installed_packages()
for item in items:
    print(item.name + " " + item.url)

**Prepare dataset**

In [None]:
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data

In [None]:
import pandas as pd

column_names = [ "id", "diagnosis", "radius_mean", "texture_mean", "perimeter_mean", "area_mean", "smoothness_mean", "compactness_mean", "concavity_mean", "concave_points_mean","symmetry_mean",
                "fractal_dimension_mean", "radius_se", "texture_se", "perimeter_se", "area_se", "smoothness_se", "compactness_se", "concavity_se", "concave points_se", "symmetry_se",
                "fractal_dimension_se", "radius_worst", "texture_worst", "perimeter_worst", "area_worst", "smoothness_worst", "compactness_worst", "concavity_worst", "concave points_worst",
                "symmetry_worst", "fractal_dimension_worst" ]
                
dataset = pd.read_csv("wdbc.data", names=column_names, header=None, index_col=[0])

In [None]:
dataset['uniformity'] = dataset.apply(lambda row: row.radius_worst - row.radius_mean, axis=1)
dataset['homogenity'] = dataset.apply(lambda row: row.symmetry_worst - row.symmetry_mean, axis=1)

In [None]:
dataset.to_csv (r'dataset.csv', index = False, header=True)

**Load dataset in Weka**

In [None]:
import weka.core.converters as converters
data = converters.load_any_file("dataset.csv")
data.class_is_first()

print(data)

**Whole dataset Furia rule extraction**

In [None]:
from weka.classifiers import Classifier
cls = Classifier(classname="weka.classifiers.rules.FURIA")
cls.build_classifier(data)

predictions = []

for index, inst in enumerate(data):
    pred = cls.classify_instance(inst)
    dist = cls.distribution_for_instance(inst)
    predictions.append(round(dist[0]))
    print(str(index+1) + ": label index=" + str(pred) + ", class distribution=" + str(dist))

In [None]:
print(cls)

FURIA rules:

 => diagnosis=M (CF = 0.0)
(perimeter_worst in [96.05, 101.7, inf, inf]) and (uniformity in [2.26, 2.3, inf, inf]) => diagnosis=M (CF = 0.97)
 => diagnosis=M (CF = 0.0)
 => diagnosis=M (CF = 0.0)
(perimeter_worst in [-inf, -inf, 105, 127.3]) => diagnosis=B (CF = 0.88)
 => diagnosis=B (CF = 0.0)
 => diagnosis=B (CF = 0.0)
(area_worst in [-inf, -inf, 880.8, 989.5]) => diagnosis=B (CF = 0.88)
 => diagnosis=B (CF = 0.0)
(concave_points_mean in [-inf, -inf, 0.0389, 0.04562]) => diagnosis=B (CF = 0.95)

Number of Rules : 10



**Evaluate**

In [None]:
#Save predictions
output_values = []
output_labels = []

#Accuracy variables
accuracy_sum = 0
all_predicted = 0

#Recall / precision / true negative rate
TP = 0
TN = 0
FN = 0
FP = 0

for i, (index, row) in enumerate(dataset.iterrows()):

  label = predictions[i]
  if label == 0:
    label = 'B'
  else:
    label = 'M'

  output_labels.append(label)

  #Get accuracy
  if label != "Undef":
    all_predicted += 1
    if label == row['diagnosis']: #Predicted good
      accuracy_sum += 1
      if label == 'M': #Predicted Malignant good
        TP += 1
      if label == 'B':  #Predicted Benign good
        TN += 1
    else: #Predicted bad
      if label == 'M': #Predicted Malignant bad
        FN += 1
      if label == 'B': #Predicted Benign bad
        FP += 1

In [None]:
recall = TP / (TP + FN)
precision = TP / (TP + FP)
specificity = TN / (TN + FP)
f1 = 2 * (precision * recall) / (precision + recall)
f2 = 5 * (precision * recall) / (4 * precision + recall)
print("Dataset size: %d" % len(dataset.index))
print("Predicted proportions: %f" % (all_predicted / len(dataset.index)))
print("Undefined count: %d" % (len(dataset.index) - all_predicted))
print("---------------------------")
print("Accuracy: %f" % (accuracy_sum / all_predicted))
print("Recall: %f" %  (recall)) 
print("Precision: %f" %  (precision)) 
print("Specificity: %f" %  (specificity))
print("---------------------------")
print("F1.0: %f" % (f1))
print("F2.0: %f" % (f2))

Dataset size: 569
Predicted proportions: 1.000000
Undefined count: 0
---------------------------
Accuracy: 0.994728
Recall: 1.000000
Precision: 0.985849
Specificity: 0.991667
---------------------------
F1.0: 0.992874
F2.0: 0.997137


**Crossvalidation**

In [None]:
from weka.classifiers import Evaluation
from weka.core.classes import Random
evl = Evaluation(data)
evl.crossvalidate_model(cls, data, 10, Random(2020))


print(evl.percent_correct)
print(evl.summary())
print(evl.class_details())

precision = evl.weighted_precision
recall = evl.weighted_recall

print("F2.0: ", end=" ")
print(5 * (precision * recall) / (4 * precision + recall))

96.66080843585237

Correctly Classified Instances         550               96.6608 %
Incorrectly Classified Instances        19                3.3392 %
Kappa statistic                          0.9284
Mean absolute error                      0.0469
Root mean squared error                  0.1823
Relative absolute error                 10.0318 %
Root relative squared error             37.7068 %
Total Number of Instances              569     

=== Detailed Accuracy By Class ===

                 TP Rate  FP Rate  Precision  Recall   F-Measure  MCC      ROC Area  PRC Area  Class
                 0.948    0.022    0.962      0.948    0.955      0.928    0.976     0.958     M
                 0.978    0.052    0.969      0.978    0.974      0.928    0.976     0.975     B
Weighted Avg.    0.967    0.041    0.967      0.967    0.967      0.928    0.976     0.969     

F2.0:  0.9665999414236072


**Crossvalidation**

In [None]:
cls = Classifier(classname="weka.classifiers.rules.FURIA")
train,test=data.train_test_split(percentage=80)
cls.build_classifier(train)

predictions = []
good = []

for index, inst in enumerate(test):
    pred = cls.classify_instance(inst)
    dist = cls.distribution_for_instance(inst)
    predictions.append(round(dist[0]))
    #print(str(index+1) + ": label index=" + str(pred) + ", class distribution=" + str(dist))

print(cls)

FURIA rules:

 => diagnosis=M (CF = 0.0)
 => diagnosis=M (CF = 0.0)
 => diagnosis=M (CF = 0.0)
(perimeter_worst in [-inf, -inf, 105, 106.4]) => diagnosis=B (CF = 0.94)
 => diagnosis=B (CF = 0.0)
(perimeter_worst in [-inf, -inf, 117.2, 126.9]) and (texture_mean in [-inf, -inf, 19.6, 21.46]) => diagnosis=B (CF = 0.91)
 => diagnosis=B (CF = 0.0)

Number of Rules : 7

