In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_excel('Data/CTG.xls', sheet_name = 'Raw Data')

In [3]:
df.drop(index = 0, axis = 0, inplace = True)
df.drop(df.tail(3).index, inplace = True)

In [4]:
df.columns

Index(['FileName', 'Date', 'SegFile', 'b', 'e', 'LBE', 'LB', 'AC', 'FM', 'UC',
       'ASTV', 'MSTV', 'ALTV', 'MLTV', 'DL', 'DS', 'DP', 'DR', 'Width', 'Min',
       'Max', 'Nmax', 'Nzeros', 'Mode', 'Mean', 'Median', 'Variance',
       'Tendency', 'A', 'B', 'C', 'D', 'E', 'AD', 'DE', 'LD', 'FS', 'SUSP',
       'CLASS', 'NSP'],
      dtype='object')

In [5]:
df.drop(['FileName', 'Date', 'SegFile', 'b', 'e', 'LB','Min', 'Max', 'Nmax', 'Nzeros', 'Mode', 'Mean', 'Median', 'Variance', 'A', 'B', 'C', 'D', 'E', 'AD', 'DE', 'LD', 'FS', 'SUSP', 'CLASS'],axis =1, inplace = True)

In [6]:
df.columns

Index(['LBE', 'AC', 'FM', 'UC', 'ASTV', 'MSTV', 'ALTV', 'MLTV', 'DL', 'DS',
       'DP', 'DR', 'Width', 'Tendency', 'NSP'],
      dtype='object')

In [7]:
import skfuzzy as fuzz

In [8]:
df['ASTV'] = df['ASTV']*10

In [9]:
df['ALTV'] = df['ALTV']*10

In [10]:
df['Tendency'] += 1

In [11]:
df.drop(['DR'], axis=1 ,inplace = True)

In [12]:
len(df.columns)

14

In [13]:
from sklearn.model_selection import train_test_split

In [14]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(['NSP'],axis = 1), df['NSP'], test_size=0.20, random_state=7777)

In [15]:
import random

In [16]:
def createLinguisticVariables(df, column):
    columnMin = df[column].min()
    columnMax = df[column].max()
    rangeDistance = columnMax - columnMin
    columnRange = np.array(range(0,(int)(columnMax + 1)))
    columnLow = fuzz.membership.zmf(columnRange,columnMin + rangeDistance*0.1,columnMin + rangeDistance*0.5)
    columnMid = fuzz.membership.trimf(columnRange, [columnMin + rangeDistance*0.25, columnMin + rangeDistance*0.5, columnMin + rangeDistance*0.75])
    columnHigh = fuzz.membership.trapmf(columnRange, [columnMin + rangeDistance*0.66,columnMin + rangeDistance*0.9, columnMax+1, columnMax+2])
    return [columnLow, columnMid, columnHigh]

In [17]:
def createMembershipDictionary(df):
    dictionary = {}
    for column in df.columns[:-1]:
        dictionary[column] = createLinguisticVariables(df,column)
    return dictionary

In [18]:
def ruleFiringStrength(rule, membershipDict, x):
    ruleLength = len(rule)
    columns = df.columns
    currentMin = 1
    isValid = False
    for index in range(0,len(rule) - 1):
        if rule[index] == -1:
            continue
        isValid = True
        currentMembership = membershipDict[columns[index]][rule[index]][int(x[columns[index]])]
        if currentMin > currentMembership:
            currentMin = currentMembership
    if isValid == False:
        return (0,rule[len(rule) - 1])
    return (currentMin, rule[len(rule) - 1])

In [19]:
def fuzzyClassifyFromRules(rules, classesNumber, membershipDict, x):
    result = [0 for i in range(classesNumber)]
    for rule in rules:
        membership, classOutput = ruleFiringStrength(rule, membershipDict, x)
        if result[classOutput] < membership:
            result[classOutput] = membership
    return result

In [20]:
def defuzzyfication(membership):
    maxValue = np.array(membership).max()
    if maxValue == 0:
        return [0]
    result = []
    for index in range(0,len(membership)):
        if membership[index] == maxValue:
            result.append(index)
    return result

In [21]:
def predictionScore(correctClass, membership):
    return correctClass in membership

In [22]:
def generateAllRules(linguisticVariables, predictingClass, features):
    return [list(x) + [predictingClass] for x in list(itertools.product(range(-1,linguisticVariables), repeat=features))]

In [23]:
def fitnessFunction(rule, X_train, y_train, membershipDict):
    error = 0
    predictingClass = rule[len(rule)-1]
    for i in range(len(X_train)):
        membership, _ = ruleFiringStrength(rule, membershipDict, X_train.iloc[i])
        if y_train.iloc[i] == predictingClass + 1:
            error = error + 1 - membership
        else:
            error = error + 1 + membership
    return error

In [24]:
def calculateRulesAccuracy(rules, X_train, y_train, membershipDict):
    return [(rule,fitnessFunction(rule,X_train, y_train, membershipDict)) for rule in rules]

In [25]:
dict = createMembershipDictionary(df)

In [28]:
import itertools
allRules = generateAllRules(3,0,7)

In [None]:
rulesForClass0 = calculateRulesAccuracy(generateAllRules(3,0,7), X_train, y_train, dict)

In [None]:
rulesForClass0.sort(key=lambda x:x[1])

In [41]:
rulesForClass1 = calculateRulesAccuracy(generateAllRules(3,1,7), X_train, y_train, dict)

In [42]:
rulesForClass2 = calculateRulesAccuracy(generateAllRules(3,2,7), X_train, y_train, dict)

In [45]:
rulesForClass1.sort(key=lambda x:x[1])

In [47]:
rulesForClass2.sort(key=lambda x:x[1])

In [49]:
rulesForClass2

[([-1, -1, -1, -1, -1, -1, 2, 2], 127.24358974358971),
 ([-1, -1, -1, -1, -1, 0, 2, 2], 127.24358974358971),
 ([-1, -1, 0, -1, -1, -1, 2, 2], 127.24358974358971),
 ([-1, -1, 0, -1, -1, 0, 2, 2], 127.24358974358971),
 ([-1, 0, -1, -1, -1, -1, 2, 2], 127.24358974358971),
 ([-1, 0, -1, -1, -1, 0, 2, 2], 127.24358974358971),
 ([-1, 0, 0, -1, -1, -1, 2, 2], 127.24358974358971),
 ([-1, 0, 0, -1, -1, 0, 2, 2], 127.24358974358971),
 ([-1, -1, -1, 0, -1, -1, 2, 2], 127.52586866504633),
 ([-1, -1, -1, 0, -1, 0, 2, 2], 127.52586866504633),
 ([-1, -1, 0, 0, -1, -1, 2, 2], 127.52586866504633),
 ([-1, -1, 0, 0, -1, 0, 2, 2], 127.52586866504633),
 ([-1, 0, -1, 0, -1, -1, 2, 2], 127.52586866504633),
 ([-1, 0, -1, 0, -1, 0, 2, 2], 127.52586866504633),
 ([-1, 0, 0, 0, -1, -1, 2, 2], 127.52586866504633),
 ([-1, 0, 0, 0, -1, 0, 2, 2], 127.52586866504633),
 ([-1, -1, -1, -1, 2, -1, 2, 2], 132.05372405372404),
 ([-1, -1, -1, -1, 2, 0, 2, 2], 132.05372405372404),
 ([-1, -1, -1, 0, 2, -1, 2, 2], 132.053724053

In [54]:
model = rulesForClass1[:3] + rulesForClass0[:3] + rulesForClass2[:3]

In [55]:
model = [rule for (rule,fitness) in model]

In [56]:
model

[[-1, -1, -1, 0, 2, -1, 1, 1],
 [-1, -1, -1, 0, 2, 0, 1, 1],
 [-1, -1, 0, 0, 2, -1, 1, 1],
 [-1, -1, -1, -1, -1, -1, 0, 0],
 [-1, -1, 0, -1, -1, -1, 0, 0],
 [-1, -1, 0, -1, -1, -1, -1, 0],
 [-1, -1, -1, -1, -1, -1, 2, 2],
 [-1, -1, -1, -1, -1, 0, 2, 2],
 [-1, -1, 0, -1, -1, -1, 2, 2]]

In [81]:
y_train.value_counts()

1.0    1321
2.0     231
3.0     148
Name: NSP, dtype: int64

In [28]:
def modelAccuracy(model, numberClasses, membershipDict, X_test, y_test):
    correctPredicted = 0
    for i in range(len(X_test)):
        fuzzyResult = fuzzyClassifyFromRules(model, numberClasses, membershipDict,X_test.iloc[i])
        classOutputList = defuzzyfication(fuzzyResult)
        correctPredicted += predictionScore(y_test.iloc[i] - 1, classOutputList)
    return correctPredicted

In [60]:
accuracy = modelAccuracy(model, 3, dict, X_train, y_train)

In [65]:
print(accuracy/len(y_train))

0.7841176470588235


In [66]:
testAccuracy = modelAccuracy(model, 3, dict, X_test, y_test)

In [67]:
print(testAccuracy/len(y_test))

0.7934272300469484


In [29]:
def classAccuracy(model, predictClass, membershipDict, X_test, y_test):
    correctPredicted = 0
    allClass = 0
    for i in range(len(X_test)):
        if y_test.iloc[i] != predictClass:
            continue
        allClass += 1
        fuzzyResult = fuzzyClassifyFromRules(model,3,membershipDict,X_test.iloc[i])
        classOutputList = defuzzyfication(fuzzyResult)
        correctPredicted += predictionScore(y_test.iloc[i] - 1,classOutputList)
    return correctPredicted

In [72]:
model

[[-1, -1, -1, 0, 2, -1, 1, 1],
 [-1, -1, -1, 0, 2, 0, 1, 1],
 [-1, -1, 0, 0, 2, -1, 1, 1],
 [-1, -1, -1, -1, -1, -1, 0, 0],
 [-1, -1, 0, -1, -1, -1, 0, 0],
 [-1, -1, 0, -1, -1, -1, -1, 0],
 [-1, -1, -1, -1, -1, -1, 2, 2],
 [-1, -1, -1, -1, -1, 0, 2, 2],
 [-1, -1, 0, -1, -1, -1, 2, 2]]

In [76]:
import pickle

In [90]:
def findBestModel(rules0, rules1, rules2, numberClasses, membershipDict, X_test, y_test):
    maxAccuracy = 0
    for i in range(1,10):
        for j in range(1, 10):
            for k in range(1, 10):
                model = [rule for (rule, fitness) in rules0[:i]] + [rule for (rule, fitness) in rules1[:j]] + [rule for (rule, fitness) in rules2[:k]]
                accuracy = classAccuracy(model, 3, membershipDict, X_test, y_test)
                if (accuracy > maxAccuracy):
                    maxAccuracy = accuracy
                    print(i,j,k)
                    print(accuracy)
    print(maxAccuracy)

In [91]:
findBestModel(rulesForClass0, rulesForClass1, rulesForClass2, 3, dict, X_train, y_train)

1 1 1
36
36


In [95]:
model = [rule for (rule,fitness) in rulesForClass2[:10]]

In [96]:
model

[[-1, -1, -1, -1, -1, -1, 2, 2],
 [-1, -1, -1, -1, -1, 0, 2, 2],
 [-1, -1, 0, -1, -1, -1, 2, 2],
 [-1, -1, 0, -1, -1, 0, 2, 2],
 [-1, 0, -1, -1, -1, -1, 2, 2],
 [-1, 0, -1, -1, -1, 0, 2, 2],
 [-1, 0, 0, -1, -1, -1, 2, 2],
 [-1, 0, 0, -1, -1, 0, 2, 2],
 [-1, -1, -1, 0, -1, -1, 2, 2],
 [-1, -1, -1, 0, -1, 0, 2, 2]]

In [97]:
classAccuracy(model, 3, dict, X_train, y_train)

37

In [108]:
fuzzyResult = fuzzyClassifyFromRules(model,3,dict,X_test.iloc[15])

In [109]:
fuzzyResult

[0, 0, 0]

In [110]:
X_test.iloc[15]

LBE         133.0
AC            0.0
FM            1.0
UC            5.0
ASTV        600.0
MSTV          2.7
ALTV          0.0
MLTV          8.2
DL            2.0
DS            0.0
DP            2.0
Width       105.0
Tendency      1.0
Name: 1949, dtype: float64

In [115]:
ruleFiringStrength(model[0],dict, X_test.iloc[15])

ALTV 0.0


(0.0, 2)

In [113]:
dict['MSTV']

[array([1.        , 0.99610727, 0.66089965, 0.09731834, 0.        ,
        0.        , 0.        , 0.        ]),
 array([0.        , 0.        , 0.05882353, 0.64705882, 0.76470588,
        0.17647059, 0.        , 0.        ]),
 array([0.        , 0.        , 0.        , 0.        , 0.        ,
        0.19117647, 0.80392157, 1.        ])]

In [117]:
df.columns

Index(['LBE', 'AC', 'FM', 'UC', 'ASTV', 'MSTV', 'ALTV', 'MLTV', 'DL', 'DS',
       'DP', 'Width', 'Tendency', 'NSP'],
      dtype='object')

In [30]:
df.drop(['AC', 'FM', 'UC', 'MSTV', 'MLTV', 'Width', 'Tendency'], axis = 1 , inplace= True)

In [31]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(['NSP'],axis = 1), df['NSP'], test_size=0.20, random_state=7777)

In [32]:
dict = createMembershipDictionary(df)

In [131]:
rulesForClass0 = calculateRulesAccuracy(generateAllRules(3,0,6), X_train, y_train, dict)

In [132]:
rulesForClass1 = calculateRulesAccuracy(generateAllRules(3,1,6), X_train, y_train, dict)

In [133]:
rulesForClass2 = calculateRulesAccuracy(generateAllRules(3,2,6), X_train, y_train, dict)

In [135]:
rulesForClass0.

[([-1, -1, -1, -1, -1, -1, 0], 1700),
 ([-1, -1, -1, -1, -1, 0, 0], 707.1875),
 ([-1, -1, -1, -1, -1, 1, 0], 1740.0),
 ([-1, -1, -1, -1, -1, 2, 0], 1707.0),
 ([-1, -1, -1, -1, 0, -1, 0], 753.0),
 ([-1, -1, -1, -1, 0, 0, 0], 702.1875),
 ([-1, -1, -1, -1, 0, 1, 0], 1740.0),
 ([-1, -1, -1, -1, 0, 2, 0], 1707.0),
 ([-1, -1, -1, -1, 1, -1, 0], 1700.0),
 ([-1, -1, -1, -1, 1, 0, 0], 1700.0)]

In [34]:
import pickle

In [35]:
with open("rules.txt", 'rb') as f:
    list1, list2, list3 = pickle.load(f)

In [92]:
currentMax = -1
for i in range(0,100):
    model = [rule for (rule,fitness) in list3[:i]]
    newMax = classAccuracy(model, 3, dict, X_train, y_train)
    print(newMax, i)
    if (currentMax < newMax):
        currentMax = newMax
        print(i)

0 0
0
39 1
1
39 2
39 3
39 4
39 5
39 6
39 7
39 8
76 9
9
76 10
76 11
76 12
76 13
76 14
76 15
76 16
76 17
76 18
76 19
76 20
76 21
76 22
76 23
76 24
76 25
76 26
76 27
76 28
76 29
76 30
76 31
76 32
76 33
76 34
76 35
76 36
76 37
76 38
76 39
76 40
76 41
76 42
76 43
76 44
76 45
76 46
76 47
76 48
76 49
76 50
76 51
76 52
76 53
76 54
76 55
76 56
76 57
76 58
76 59
76 60
76 61
76 62
76 63
76 64
76 65
76 66
76 67
76 68
76 69
76 70
76 71
76 72
92 73
73
92 74
92 75
92 76
92 77
92 78
92 79
92 80
92 81
92 82
92 83
92 84
92 85
92 86
92 87
92 88
92 89
92 90
92 91
92 92
98 93
93
98 94


KeyboardInterrupt: 

In [48]:
tempModel = [rule for (rule, fitness) in list3[:93]]

In [49]:
classAccuracy(tempModel, 3, dict, X_train, y_train)

98

In [56]:
modelForClass3 = [list3[0][0], list3[1][0], list3[9][0], list3[73][0], list3[93][0]]

In [57]:
classAccuracy(modelForClass3, 3, dict, X_train, y_train)

98

In [104]:
def findBestModelForClass(rulesList, predictedClass, X_train, y_train):
    result = []
    model = []
    currentMax = -1
    for i in range(len(rulesList)):
        model.append(rulesList[i][0])
        newMax = classAccuracy(model, predictedClass, dict, X_train, y_train)
        if (currentMax < newMax):
            currentMax = newMax
            result.append(i)
        else:
            model = model[:-1]
    return result

In [101]:
bestRulesClass1 = findBestModelForClass(list3, 3, X_train, y_train)

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
291.09905648231506


In [103]:
bestRulesClass2 = findBestModelForClass(list2, 2, X_train, y_train)

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000


In [105]:
bestRulesClass3 = bestRulesClass1

In [106]:
bestRulesClass1 = findBestModelForClass(list1, 1, X_train, y_train)

In [163]:
bestRulesClass2

[0,
 24,
 32,
 2967,
 3307,
 3667,
 3669,
 3697,
 3789,
 3804,
 3840,
 3939,
 3969,
 3987,
 3995]

In [119]:
model = [list1[index][0] for index in bestRulesClass1] + [list2[index][0] for index in bestRulesClass2] + [list3[index][0] for index in bestRulesClass3]

In [120]:
model

[[-1, -1, 0, -1, 0, 0, 0],
 [-1, 2, 1, -1, -1, -1, 1],
 [-1, -1, 1, 0, -1, -1, 1],
 [2, 2, -1, 0, -1, -1, 1],
 [0, 0, -1, -1, -1, 2, 1],
 [2, 0, -1, 1, -1, -1, 1],
 [0, 2, 0, 0, 0, 0, 1],
 [1, 1, -1, 0, -1, 1, 1],
 [0, 0, -1, 1, -1, -1, 1],
 [1, 2, 0, 0, 0, 0, 1],
 [2, 0, 0, -1, -1, -1, 1],
 [2, 1, 0, 0, -1, -1, 1],
 [0, 1, 0, 0, 0, 0, 1],
 [0, 0, 0, 0, 0, 0, 1],
 [1, 1, 0, 0, 0, 0, 1],
 [1, 0, 0, 0, 0, 0, 1],
 [-1, -1, 0, -1, -1, 1, 2],
 [-1, -1, 2, -1, -1, -1, 2],
 [-1, -1, -1, -1, -1, 2, 2],
 [-1, -1, -1, -1, 2, -1, 2],
 [-1, 2, -1, 1, -1, -1, 2],
 [0, -1, -1, 2, -1, -1, 2],
 [0, 2, -1, -1, -1, -1, 2],
 [0, -1, -1, 1, -1, -1, 2],
 [2, 2, 0, 0, -1, -1, 2],
 [1, 2, 1, -1, -1, -1, 2],
 [1, 2, 0, 0, -1, -1, 2],
 [1, 1, -1, 1, -1, -1, 2],
 [0, 1, 0, 0, -1, -1, 2],
 [0, 0, 0, 0, -1, 0, 2],
 [1, 0, 0, 0, 0, -1, 2],
 [1, 1, 0, 0, -1, -1, 2]]

In [127]:
modelAccuracy(model, 3, dict, X_test, y_test) / len(X_test)

0.8568075117370892

In [130]:
list1[0][0]

[-1, -1, 0, -1, 0, 0, 0]

In [131]:
df.columns

Index(['LBE', 'ASTV', 'ALTV', 'DL', 'DS', 'DP', 'NSP'], dtype='object')

In [148]:
def cleanRules(model, X_train, y_train, membershipDict):
    result = []
    startAccuracy = modelAccuracy(model, 3, membershipDict, X_train, y_train)
    for i in range(0,len(model)):
        newAccuracy = modelAccuracy(model[:i] + model[i+1:], 3, membershipDict, X_train, y_train)
        if (newAccuracy == startAccuracy):
            result.append(i)
            model = model[:i] + model[i+1:]
    return model

In [149]:
cleanedModel = cleanRules(model, X_train, y_train,dict)

In [150]:
len(cleanedModel)

20

In [151]:
len(model)

32

In [152]:
cleanedModel

[[-1, -1, 0, -1, 0, 0, 0],
 [-1, -1, 1, 0, -1, -1, 1],
 [2, 2, -1, 0, -1, -1, 1],
 [2, 0, -1, 1, -1, -1, 1],
 [1, 1, -1, 0, -1, 1, 1],
 [1, 2, 0, 0, 0, 0, 1],
 [2, 1, 0, 0, -1, -1, 1],
 [0, 0, 0, 0, 0, 0, 1],
 [1, 1, 0, 0, 0, 0, 1],
 [1, 0, 0, 0, 0, 0, 1],
 [-1, -1, 2, -1, -1, -1, 2],
 [-1, -1, -1, -1, 2, -1, 2],
 [0, -1, -1, 2, -1, -1, 2],
 [0, 2, -1, -1, -1, -1, 2],
 [0, -1, -1, 1, -1, -1, 2],
 [1, 2, 1, -1, -1, -1, 2],
 [1, 2, 0, 0, -1, -1, 2],
 [0, 1, 0, 0, -1, -1, 2],
 [1, 0, 0, 0, 0, -1, 2],
 [1, 1, 0, 0, -1, -1, 2]]

In [160]:
list1

[([-1, -1, 0, -1, 0, 0, 0], 636.9539231373025),
 ([-1, -1, 0, -1, -1, 0, 0], 641.9539231373025),
 ([-1, -1, 0, -1, 0, -1, 0], 687.6957342108445),
 ([-1, -1, 0, -1, -1, -1, 0], 692.6957342108445),
 ([-1, -1, -1, -1, 0, 0, 0], 702.1875),
 ([-1, -1, -1, -1, -1, 0, 0], 707.1875),
 ([-1, -1, -1, -1, 0, -1, 0], 753.0),
 ([-1, -1, 0, 0, 0, 0, 0], 786.5034470380238),
 ([-1, -1, 0, 0, -1, 0, 0], 786.5522751630238),
 ([-1, -1, 0, 0, 0, -1, 0], 818.1612737365656),
 ([-1, -1, 0, 0, -1, -1, 0], 818.2101018615656),
 ([-1, -1, -1, 0, 0, 0, 0], 856.572265625),
 ([-1, -1, -1, 0, -1, 0, 0], 856.62109375),
 ([-1, -1, -1, 0, 0, -1, 0], 888.30078125),
 ([-1, -1, -1, 0, -1, -1, 0], 888.349609375),
 ([1, -1, 0, -1, 0, 0, 0], 1173.0407115015587),
 ([1, -1, 0, -1, -1, 0, 0], 1173.5962670571141),
 ([1, -1, -1, -1, 0, 0, 0], 1185.081018518513),
 ([1, -1, -1, -1, -1, 0, 0], 1185.6365740740684),
 ([1, -1, 0, -1, 0, -1, 0], 1211.5893226126698),
 ([1, -1, 0, -1, -1, -1, 0], 1212.1448781682252),
 ([1, -1, -1, -1, 0, 