In [1]:
import pandas as pd
import numpy as np

I decided to use the data on Heart disease UCI as the first dataset. This database contains 76 attributes, but all published experiments refer to using a subset of 14 of them. The "goal" field refers to the presence of heart disease in the patient.<br /> <br />
#### Attribute information:<br />
age<br />
sex<br />
chest pain type (4 values)<br />
resting blood pressure<br />
serum cholestoral in mg/dl<br />
fasting blood sugar > 120 mg/dl<br />
resting electrocardiographic results (values 0,1,2)<br />
maximum heart rate achieved<br />
exercise induced angina<br />
oldpeak = ST depression induced by exercise relative to rest<br />
the slope of the peak exercise ST segment<br />
number of major vessels (0-3) colored by flourosopy<br />
thal: 3 = normal; 6 = fixed defect; 7 = reversable defect<br />


In [2]:
data = pd.read_csv('heart.csv')

In [3]:
data

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,57,0,0,140,241,0,1,123,1,0.2,1,0,3,0
299,45,1,3,110,264,0,1,132,0,1.2,1,0,3,0
300,68,1,0,144,193,1,1,141,0,3.4,1,2,3,0
301,57,1,0,130,131,0,1,115,1,1.2,1,1,3,0


### First of all we need to preprocess our data

In [4]:
data = data.dropna()

In [5]:
data['age_cat'] = pd.qcut(data['age'], 5)
data['trestbps_cat'] = pd.qcut(data['trestbps'], 5)
data['chol_cat'] = pd.cut(data['chol'], 5)
data['thalach_cat'] = pd.cut(data['thalach'], 5)
data['oldpeak_cat'] = pd.cut(data['oldpeak'], 5)

In [6]:
data = pd.get_dummies(data, columns = ['slope', 'ca', 'thal', 'cp'])
data = data.drop(['age','trestbps','chol','thalach','oldpeak'], 1)
data = pd.get_dummies(data, columns=['age_cat', 'trestbps_cat', 'chol_cat', 'thalach_cat', 'oldpeak_cat'])

In [7]:
data.to_csv('heart_binary.csv',index=False)

In [8]:
databin = pd.read_csv('heart_binary.csv')
features = list(databin)
databin1 = databin.drop(['target'],1)
col_name="target"
first_col = databin.pop(col_name)
databin1.insert(45,col_name, first_col)
databin = databin1

### Splitting data and creating plus and minus contexts

In [9]:
def contexts(train):
    return train[train[:, -1] == 1][:, :-1], train[train[:, -1] == 0][:, :-1]
def split(data, seed=1):
    splits = []
    if seed:
        np.random.seed(seed)
    sp = 4
    split_size = data.shape[0] // sp
    samples = data.to_numpy()
    np.random.shuffle(samples)
    for i in range(sp):
        train = np.concatenate([samples[:i*split_size], samples[(i+1)*split_size:]], axis=0)
        test = samples[i*split_size:(i+1)*split_size]
        splits.append([train, test])
    return splits

## FCA algorithm

It is a simple implementation of FCA logic based on plus and minus contexts. For any object, if the number of intersections with our object with plus context is more than with minus one, then the object is classified as positive, otherwise as negative.

In [10]:
def FCA_1(databin):
    splits = split(databin, seed=1)
    TPl = []
    FPl = []
    TNl = []
    FNl = []
    def Intersect(s, context1, context2):
        intersections = 0
        for i in context1:
            intersections += ((s * i).sum())
        return intersections
    for train, test in splits:
        plus_train, minus_train = contexts(train)
        plus_test, minus_test = contexts(test)
        tp = 0
        fp = 0
        for s in plus_test:
            plus = Intersect(s, plus_train, minus_train)
            minus = Intersect(s, minus_train, plus_train)
            if plus > minus:
                tp += 1
            else:
                fp += 1
        TPl.append(tp)
        FPl.append(fp)
        tn = 0
        fn = 0
        for s in minus_test:
            plus = Intersect(s, plus_train, minus_train)
            minus = Intersect(s, minus_train, plus_train)
            if minus > plus:
                tn += 1
            else:
                fn += 1
        TNl.append(tn)
        FNl.append(fn)
    TP = sum(TPl)
    FP = sum(FPl)
    TN = sum(TNl)
    FN = sum(FNl)
    total = TP + FP + TN + FN
    print ("True positive:", TP ,"\n" 
        "True Negative:", TN,"\n"
        "False Positive:", FP,"\n"
        "False Negative:", FN,"\n"
        "True Positive Rate:", float(TP) / (TP + FN),"\n"
        "True Negative Rate:", float(TN) / (TN + FP),"\n"
        "Negative Predictive Value:", float(TN) / (TN + FN),"\n"
        "False Positive Rate:",float(FP) / (FP + TN),"\n"
        "False Discovery Rate:",float(FP) / (TP + FP),"\n"
        "Accuracy:" ,(TP+TN)/total,"\n"
        "Precision:" ,TP/(TP + FP),"\n"
        "Recall:" ,TP/(TP + FN))

In [11]:
FCA_1(databin)

True positive: 157 
True Negative: 83 
False Positive: 6 
False Negative: 54 
True Positive Rate: 0.7440758293838863 
True Negative Rate: 0.9325842696629213 
Negative Predictive Value: 0.6058394160583942 
False Positive Rate: 0.06741573033707865 
False Discovery Rate: 0.03680981595092025 
Accuracy: 0.8 
Precision: 0.9631901840490797 
Recall: 0.7440758293838863


Another implementation of FCA follows the logic of dividing the number of intersections with one or another context by the the number of all elements in the context. Thus, we can evade the problem of differences of contexts' sizes and get more correct results.

In [12]:
def FCA_2(databin):
    splits = split(databin, seed=1)
    TPl = []
    FPl = []
    TNl = []
    FNl = []
    def Intersect(s, context1, context2):
        intersections = 0
        for i in context1:
            intersections += ((s * i).sum() / context1.shape[0])
        return intersections
    for train, test in splits:
        plus_train, minus_train = contexts(train)
        plus_test, minus_test = contexts(test)
        tp = 0
        fp = 0
        for s in plus_test:
            plus = Intersect(s, plus_train, minus_train)
            minus = Intersect(s, minus_train, plus_train)
            if plus > minus:
                tp += 1
            else:
                fp += 1
        TPl.append(tp)
        FPl.append(fp)
        tn = 0
        fn = 0
        for s in minus_test:
            plus = Intersect(s, plus_train, minus_train)
            minus = Intersect(s, minus_train, plus_train)
            if minus > plus:
                tn += 1
            else:
                fn += 1
        TNl.append(tn)
        FNl.append(fn)
    TP = sum(TPl)
    FP = sum(FPl)
    TN = sum(TNl)
    FN = sum(FNl)
    total = TP + FP + TN + FN
    print ("True positive:", TP ,"\n" 
        "True Negative:", TN,"\n"
        "False Positive:", FP,"\n"
        "False Negative:", FN,"\n"
        "True Positive Rate:", float(TP) / (TP + FN),"\n"
        "True Negative Rate:", float(TN) / (TN + FP),"\n"
        "Negative Predictive Value:", float(TN) / (TN + FN),"\n"
        "False Positive Rate:",float(FP) / (FP + TN),"\n"
        "False Discovery Rate:",float(FP) / (TP + FP),"\n"
        "Accuracy:" ,(TP+TN)/total,"\n"
        "Precision:" ,TP/(TP + FP),"\n"
        "Recall:" ,TP/(TP + FN))

In [13]:
FCA_2(databin)

True positive: 143 
True Negative: 113 
False Positive: 22 
False Negative: 22 
True Positive Rate: 0.8666666666666667 
True Negative Rate: 0.837037037037037 
Negative Predictive Value: 0.837037037037037 
False Positive Rate: 0.16296296296296298 
False Discovery Rate: 0.13333333333333333 
Accuracy: 0.8533333333333334 
Precision: 0.8666666666666667 
Recall: 0.8666666666666667


### State of art algorithms

In [14]:
def RandomForest(databin):
    splits = split(databin, seed=1)
    from sklearn.ensemble import RandomForestClassifier
    for train, test in splits:
        plus_train, minus_train = contexts(train)
        plus_test, minus_test = contexts(test)
        X = np.concatenate([plus_train, minus_train])
        Y = np.concatenate([np.ones(shape=(plus_train.shape[0],)), np.zeros(shape=(minus_train.shape[0],))])
        clf = RandomForestClassifier(n_estimators=100, max_depth=15)
        clf = clf.fit(X, Y)
    TP= int(clf.predict(plus_test).sum())
    FN=(1 - clf.predict(plus_test)).sum()
    TN=(1 - clf.predict(minus_test)).sum()
    FP= int(clf.predict(minus_test).sum())
    total = TP + FN + TN + FP
    print ("True positive:", TP ,"\n" 
    "True Negative:", TN,"\n"
    "False Positive:", FP,"\n"
    "False Negative:", FN,"\n"
    "True Positive Rate:", float(TP) / (TP + FN),"\n"
    "True Negative Rate:", float(TN) / (TN + FP),"\n"
    "Negative Predictive Value:", float(TN) / (TN + FN),"\n"
    "False Positive Rate:",float(FP) / (FP + TN),"\n"
    "False Discovery Rate:",float(FP) / (TP + FP),"\n"
    "Accuracy:" ,(TP+TN)/total,"\n"
    "Precision:" ,TP/(TP + FP),"\n"
    "Recall:" ,TP/(TP + FN))

In [15]:
RandomForest(databin)

True positive: 37 
True Negative: 25.0 
False Positive: 7 
False Negative: 6.0 
True Positive Rate: 0.8604651162790697 
True Negative Rate: 0.78125 
Negative Predictive Value: 0.8064516129032258 
False Positive Rate: 0.21875 
False Discovery Rate: 0.1590909090909091 
Accuracy: 0.8266666666666667 
Precision: 0.8409090909090909 
Recall: 0.8604651162790697


In [16]:
def Knn(databin):
    splits = split(databin, seed=1)
    from sklearn.neighbors import KNeighborsClassifier
    for train, test in splits:
        plus_train, minus_train = contexts(train)
        plus_test, minus_test = contexts(test)
        X = np.concatenate([plus_train, minus_train])
        Y = np.concatenate([np.ones(shape=(plus_train.shape[0],)), np.zeros(shape=(minus_train.shape[0],))])
        clf = KNeighborsClassifier(n_neighbors=5)
        clf = clf.fit(X, Y)
    TP= int(clf.predict(plus_test).sum())
    FN=(1 - clf.predict(plus_test)).sum()
    TN=(1 - clf.predict(minus_test)).sum()
    FP= int(clf.predict(minus_test).sum())
    total = TP + FN + TN + FP
    print ("True positive:", TP ,"\n" 
    "True Negative:", TN,"\n"
    "False Positive:", FP,"\n"
    "False Negative:", FN,"\n"
    "True Positive Rate:", float(TP) / (TP + FN),"\n"
    "True Negative Rate:", float(TN) / (TN + FP),"\n"
    "Negative Predictive Value:", float(TN) / (TN + FN),"\n"
    "False Positive Rate:",float(FP) / (FP + TN),"\n"
    "False Discovery Rate:",float(FP) / (TP + FP),"\n"
    "Accuracy:" ,(TP+TN)/total,"\n"
    "Precision:" ,TP/(TP + FP),"\n"
    "Recall:" ,TP/(TP + FN))

In [32]:
Knn(databin)

True positive: 36 
True Negative: 25.0 
False Positive: 11 
False Negative: 3.0 
True Positive Rate: 0.9230769230769231 
True Negative Rate: 0.6944444444444444 
Negative Predictive Value: 0.8928571428571429 
False Positive Rate: 0.3055555555555556 
False Discovery Rate: 0.23404255319148937 
Accuracy: 0.8133333333333334 
Precision: 0.7659574468085106 
Recall: 0.9230769230769231


In [17]:
def LogisticRegression(databin):   
    splits = split(databin, seed=1)
    from sklearn.linear_model import LogisticRegression
    for train, test in splits:
        plus_train, minus_train = contexts(train)
        plus_test, minus_test = contexts(test)
        X = np.concatenate([plus_train, minus_train])
        Y = np.concatenate([np.ones(shape=(plus_train.shape[0],)), np.zeros(shape=(minus_train.shape[0],))])
        clf = LogisticRegression(random_state=2)
        clf = clf.fit(X, Y)
    TP= int(clf.predict(plus_test).sum())
    FN=(1 - clf.predict(plus_test)).sum()
    TN=(1 - clf.predict(plus_test)).sum()
    FP= int(clf.predict(minus_test).sum())
    total = TP + FN + TN + FP
    print ("True positive:", TP ,"\n" 
    "True Negative:", TN,"\n"
    "False Positive:", FP,"\n"
    "False Negative:", FN,"\n"
    "True Positive Rate:", float(TP) / (TP + FN),"\n"
    "True Negative Rate:", float(TN) / (TN + FP),"\n"
    "Negative Predictive Value:", float(TN) / (TN + FN),"\n"
    "False Positive Rate:",float(FP) / (FP + TN),"\n"
    "False Discovery Rate:",float(FP) / (TP + FP),"\n"
    "Accuracy:" ,(TP+TN)/total,"\n"
    "Precision:" ,TP/(TP + FP),"\n"
    "Recall:" ,TP/(TP + FN))

In [18]:
LogisticRegression(databin)

True positive: 25 
True Negative: 12.0 
False Positive: 9 
False Negative: 12.0 
True Positive Rate: 0.6756756756756757 
True Negative Rate: 0.5714285714285714 
Negative Predictive Value: 0.5 
False Positive Rate: 0.42857142857142855 
False Discovery Rate: 0.2647058823529412 
Accuracy: 0.6379310344827587 
Precision: 0.7352941176470589 
Recall: 0.6756756756756757


### Trying my own dataset on coronavirus behavioral patterns

Independent variables are: <br >  <br > 
Believe in covid-19 <br >
Frequency of meetings with friends <br >
Frequency of hands washing <br >
Believe that covid-19 will affect the person <br >
Probability that friend will be offended by lack of physical contact <br >
Frequency of making an official pass <br >
Frequency if joking about covid <br >
Frequency of going to public places <br > <br > 
Target is the frequency of going out, I recoded this variable into a dichotomic one, 1-5 frequency was transformed into 0 and 6-10 - into 1. 

It will be also necessary to compare these models with non-binary raw data taken as independent variables.



In [19]:
df=pd.read_spss('Corona.sav', convert_categoricals=False)


In [20]:
df_initial = pd.read_spss('Corona.sav', convert_categoricals=False)

In [21]:
df_numeric = pd.read_spss('Corona.sav', convert_categoricals=False)

In [22]:
#Let's recode our variables, all of them excpet for gender are scale
positive = 'gndr_1'
negative = 'gndr_0'
df[positive] = 0
df[positive][df['gndr'] == 1] = 1
df[negative] = 0
df[negative][df['gndr'] == 0] = 1
df = df.drop(['gndr'], axis='columns')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


In [23]:
target = 'target'
df[target] = 0
df[target][df['chastoulitsa'] == 6] = 1
df[target][df['chastoulitsa'] == 7] = 1
df[target][df['chastoulitsa'] == 8] = 1
df[target][df['chastoulitsa'] == 9] = 1
df[target][df['chastoulitsa'] == 10] = 1
df[target][df['chastoulitsa'] == 1] = 0
df[target][df['chastoulitsa'] == 2] = 0
df[target][df['chastoulitsa'] == 3] = 0
df[target][df['chastoulitsa'] == 4] = 0
df[target][df['chastoulitsa'] == 5] = 0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a sl

In [24]:
interval_parameters = ['age', 'vera', 'vstrechdruz', 'mitruki', 'coronakosnetsya', 'drugobid',
                     'propusk', 'shutka', 'obshestvmesta']
for feature in interval_parameters:
    bins = sorted({x for x in pd.cut(df[feature], 5)})
    for bucket in range(len(bins)):
        column_name = '{}_{}'.format(feature, bucket+1)
        df[column_name] = 0
        df[column_name][(df[feature] > bins[bucket].left) & (df[feature] <= bins[bucket].right)] = 1
        
df = df.drop(interval_parameters, axis='columns')
#df = df[[x for x in data.columns if x != 'target'] + ['target']]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/ind

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/ind

In [25]:
features = list(df)
df1 = df.drop(['target'],1)
col_name="target"
first_col = df.pop(col_name)
df1.insert(48,col_name, first_col)
df = df1

In [26]:
df = df.drop('chastoulitsa', axis='columns')

In [27]:
FCA_1(df)

True positive: 226 
True Negative: 130 
False Positive: 62 
False Negative: 122 
True Positive Rate: 0.6494252873563219 
True Negative Rate: 0.6770833333333334 
Negative Predictive Value: 0.5158730158730159 
False Positive Rate: 0.3229166666666667 
False Discovery Rate: 0.2152777777777778 
Accuracy: 0.6592592592592592 
Precision: 0.7847222222222222 
Recall: 0.6494252873563219


In [28]:
FCA_2(df)

True positive: 136 
True Negative: 218 
False Positive: 152 
False Negative: 34 
True Positive Rate: 0.8 
True Negative Rate: 0.5891891891891892 
Negative Predictive Value: 0.8650793650793651 
False Positive Rate: 0.41081081081081083 
False Discovery Rate: 0.5277777777777778 
Accuracy: 0.6555555555555556 
Precision: 0.4722222222222222 
Recall: 0.8


In [29]:
RandomForest(df)

True positive: 51 
True Negative: 45.0 
False Positive: 23 
False Negative: 16.0 
True Positive Rate: 0.7611940298507462 
True Negative Rate: 0.6617647058823529 
Negative Predictive Value: 0.7377049180327869 
False Positive Rate: 0.3382352941176471 
False Discovery Rate: 0.3108108108108108 
Accuracy: 0.7111111111111111 
Precision: 0.6891891891891891 
Recall: 0.7611940298507462


In [30]:
Knn(df)

True positive: 47 
True Negative: 48.0 
False Positive: 22 
False Negative: 18.0 
True Positive Rate: 0.7230769230769231 
True Negative Rate: 0.6857142857142857 
Negative Predictive Value: 0.7272727272727273 
False Positive Rate: 0.3142857142857143 
False Discovery Rate: 0.3188405797101449 
Accuracy: 0.7037037037037037 
Precision: 0.6811594202898551 
Recall: 0.7230769230769231


In [31]:
LogisticRegression(df)

True positive: 48 
True Negative: 27.0 
False Positive: 19 
False Negative: 27.0 
True Positive Rate: 0.64 
True Negative Rate: 0.5869565217391305 
Negative Predictive Value: 0.5 
False Positive Rate: 0.41304347826086957 
False Discovery Rate: 0.2835820895522388 
Accuracy: 0.6198347107438017 
Precision: 0.7164179104477612 
Recall: 0.64


## FCA №3

Next, let's make the algorithm more strict introducing the rule for determining the positive or negative class. If the proportion of plus intersections over minus intersections exceeds some value then it will be classified as positive. Now being just less or more is not enough. The same logic will be applied to the minus context.

In [32]:
def FCA_3(databin, P, M):
    splits = split(databin, seed=1)
    TPl = []
    FPl = []
    TNl = []
    FNl = []
    def Intersect(s, context1, context2):
        intersections = 0
        for i in context1:
            intersections += ((s * i).sum() / context1.shape[0])
        return intersections
    for train, test in splits:
        plus_train, minus_train = contexts(train)
        plus_test, minus_test = contexts(test)
        tp = 0
        fp = 0
        for s in plus_test:
            plus = Intersect(s, plus_train, minus_train)
            minus = Intersect(s, minus_train, plus_train)
            if plus/minus > P:
                tp += 1
            else:
                fp += 1
        TPl.append(tp)
        FPl.append(fp)
        tn = 0
        fn = 0
        for s in minus_test:
            plus = Intersect(s, plus_train, minus_train)
            minus = Intersect(s, minus_train, plus_train)
            if minus/plus > M:
                tn += 1
            else:
                fn += 1
        TNl.append(tn)
        FNl.append(fn)
    TP = sum(TPl)
    FP = sum(FPl)
    TN = sum(TNl)
    FN = sum(FNl)
    total = TP + FP + TN + FN
    print ("True positive:", TP ,"\n" 
        "True Negative:", TN,"\n"
        "False Positive:", FP,"\n"
        "False Negative:", FN,"\n"
        "True Positive Rate:", float(TP) / (TP + FN),"\n"
        "True Negative Rate:", float(TN) / (TN + FP),"\n"
        "Negative Predictive Value:", float(TN) / (TN + FN),"\n"
        "False Positive Rate:",float(FP) / (FP + TN),"\n"
        "False Discovery Rate:",float(FP) / (TP + FP),"\n"
        "Accuracy:" ,(TP+TN)/total,"\n"
        "Precision:" ,TP/(TP + FP),"\n"
        "Recall:" ,TP/(TP + FN))

In [33]:
FCA_3(databin, 1.1 , 1.1)

True positive: 129 
True Negative: 98 
False Positive: 35 
False Negative: 38 
True Positive Rate: 0.7724550898203593 
True Negative Rate: 0.7368421052631579 
Negative Predictive Value: 0.7205882352941176 
False Positive Rate: 0.2631578947368421 
False Discovery Rate: 0.21341463414634146 
Accuracy: 0.7566666666666667 
Precision: 0.7865853658536586 
Recall: 0.7724550898203593


In [34]:
FCA_3(databin, 1.25, 1.25)

True positive: 106 
True Negative: 76 
False Positive: 58 
False Negative: 60 
True Positive Rate: 0.6385542168674698 
True Negative Rate: 0.5671641791044776 
Negative Predictive Value: 0.5588235294117647 
False Positive Rate: 0.43283582089552236 
False Discovery Rate: 0.35365853658536583 
Accuracy: 0.6066666666666667 
Precision: 0.6463414634146342 
Recall: 0.6385542168674698


Less strict requirements for minus/plus, to have more correct negative classifications

In [35]:
FCA_3(databin, 1.25 , 1.1)

True positive: 102 
True Negative: 97 
False Positive: 61 
False Negative: 40 
True Positive Rate: 0.7183098591549296 
True Negative Rate: 0.6139240506329114 
Negative Predictive Value: 0.708029197080292 
False Positive Rate: 0.3860759493670886 
False Discovery Rate: 0.37423312883435583 
Accuracy: 0.6633333333333333 
Precision: 0.6257668711656442 
Recall: 0.7183098591549296


In [36]:
FCA_3(databin, 0.8, 1)

True positive: 158 
True Negative: 113 
False Positive: 6 
False Negative: 23 
True Positive Rate: 0.8729281767955801 
True Negative Rate: 0.9495798319327731 
Negative Predictive Value: 0.8308823529411765 
False Positive Rate: 0.05042016806722689 
False Discovery Rate: 0.036585365853658534 
Accuracy: 0.9033333333333333 
Precision: 0.9634146341463414 
Recall: 0.8729281767955801


Coronavirus data

In [37]:
FCA_3(df, 0.7, 1)

True positive: 288 
True Negative: 221 
False Positive: 0 
False Negative: 31 
True Positive Rate: 0.9028213166144201 
True Negative Rate: 1.0 
Negative Predictive Value: 0.876984126984127 
False Positive Rate: 0.0 
False Discovery Rate: 0.0 
Accuracy: 0.9425925925925925 
Precision: 1.0 
Recall: 0.9028213166144201
