In [1]:
from sklearn.pipeline import Pipeline
from sklearn.metrics import r2_score,confusion_matrix
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_openml
import pandas as pd
from sklearn.utils import resample
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import PoissonRegressor
from sklearn.neural_network import MLPClassifier
import numpy as np

import matplotlib.pyplot as plt

In [2]:
from sklearn import datasets

iris = datasets.load_iris(return_X_y=True,as_frame=True)

In [3]:
X = iris[0]
y = iris[1]

In [4]:
X

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [5]:
y

0      0
1      0
2      0
3      0
4      0
      ..
145    2
146    2
147    2
148    2
149    2
Name: target, Length: 150, dtype: int32

In [7]:
Xtrain,Xtest,ytrain,ytest = train_test_split(X,y,test_size=.2,random_state=42,shuffle=True)

model = Pipeline(
    steps=[
        ('scaler',MinMaxScaler()),
        ('learner',
         MLPClassifier(hidden_layer_sizes=(15,15,15),activation='relu',
                       alpha=.0001,
                       batch_size=len(Xtrain),
                       learning_rate_init=1e-2,
                       learning_rate='constant',
                       max_iter=100000,
                       tol=1e-12,
                       early_stopping=False,
                       verbose=False,
                       n_iter_no_change=150
                       )
         )
    ]
).fit(Xtrain,ytrain)

In [8]:
ypred = pd.Series(model.predict(Xtest),index=ytest.index)

In [9]:
confusion_matrix(ytest,ypred)

array([[10,  0,  0],
       [ 0,  9,  0],
       [ 0,  0, 11]], dtype=int64)

In [10]:
Xtrain,ytrain

(     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
 22                 4.6               3.6                1.0               0.2
 15                 5.7               4.4                1.5               0.4
 65                 6.7               3.1                4.4               1.4
 11                 4.8               3.4                1.6               0.2
 42                 4.4               3.2                1.3               0.2
 ..                 ...               ...                ...               ...
 71                 6.1               2.8                4.0               1.3
 106                4.9               2.5                4.5               1.7
 14                 5.8               4.0                1.2               0.2
 92                 5.8               2.6                4.0               1.2
 102                7.1               3.0                5.9               2.1
 
 [120 rows x 4 columns],
 22     0
 15     0
 65  

In [34]:
#Explicit negation
x_sets = []
targets = []

for c in ytrain.unique():

    subset = Xtrain[ytrain!=c].copy()

    subset_target = pd.Series(np.repeat(c,len(subset)),index=subset.index)

    x_sets.append(subset)
    targets.append(subset_target)

neg_Xtrain = pd.concat(x_sets)
neg_Xtrain['neg'] = 1

Xtrain2 = Xtrain.copy()
Xtrain2['neg'] = 0

Xtrain_new = pd.concat([Xtrain2,neg_Xtrain])

neg_ytrain = pd.concat(targets)

ytrain_new = pd.concat([ytrain,neg_ytrain])

Xtest_new = Xtest.copy()
Xtest_new['neg'] = 0
model = Pipeline(
    steps=[
        ('scaler',MinMaxScaler()),
        ('learner',
         MLPClassifier(hidden_layer_sizes=(15,15,15),activation='relu',
                       alpha=.0001,
                       batch_size=len(Xtest_new),
                       learning_rate_init=1e-2,
                       learning_rate='constant',
                       max_iter=100000,
                       tol=1e-12,
                       early_stopping=False,
                       verbose=False,
                       n_iter_no_change=150
                       )
         )
    ]
).fit(Xtrain_new,ytrain_new)

ypred = pd.Series(model.predict(Xtest_new),index=Xtest_new.index)

predicted_probabilities = model.predict_proba(
    np.array([
        [4.6,3.6,1.0,0.2,1],#Negation is on, the arg max class should either be 1 or 2
        [4.6,3.6,1.0,0.2,1],#Negation is on, the arg max class should either be 1 or 2
        [4.6,3.6,1.0,0.2,1],#Negation is on, the arg max class should either be 1 or 2
        [4.6,3.6,1.0,0.2,1],#Negation is on, the arg max class should either be 1 or 2
        [4.6,3.6,1.0,0.2,1],#Negation is on, the arg max class should either be 1 or 2
        [4.6,3.6,1.0,0.2,0],#Negation is off, the arg max class should be 0
        [4.6,3.6,1.0,0.2,1],#Negation on true 0
        [6.7,3.1,4.4,1.4,1],#Negation on true 1
        [7.1,3.0,5.9,2.1,1]#Negation on true 2
    ])
)


print(
    "Confusion Matrix on testing holdout:\n",
    confusion_matrix(ytest,ypred),"\n\n",
    "Prediction Proba:\n",
    "True 0 records, negated:\n",
      np.round(predicted_probabilities,3)[:5],"\n\n",
    "True 0 record, not negated:\n",
      np.round(predicted_probabilities,3)[5],"\n\n",
    "Negated predictions for classes 0, 1 and 2:\n",
      np.round(predicted_probabilities,3)[6:],"\n\n",
      "Prediction (sampled) from the probabilities above:\n",
      np.apply_along_axis(axis=1, arr=predicted_probabilities,
        func1d = lambda x: np.random.choice(len(x), size=1, p=x)
    )
)

Confusion Matrix on testing holdout:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]] 

 Prediction Proba:
 True 0 records, negated:
 [[0.    0.509 0.491]
 [0.    0.509 0.491]
 [0.    0.509 0.491]
 [0.    0.509 0.491]
 [0.    0.509 0.491]] 

 True 0 record, not negated:
 [1. 0. 0.] 

 Negated predictions for classes 0, 1 and 2:
 [[0.    0.509 0.491]
 [0.507 0.001 0.492]
 [0.484 0.516 0.001]] 

 Prediction (sampled) from the probabilities above:
 [[2]
 [1]
 [1]
 [2]
 [1]
 [0]
 [2]
 [0]
 [0]]




In [36]:
#Implicit negation
x_sets = []
targets = []

for c in [0,1]:

    subset = Xtrain[ytrain!=c].copy()

    subset_target = pd.Series(np.repeat(c,len(subset)),index=subset.index)

    x_sets.append(subset)
    targets.append(subset_target)

neg_Xtrain = pd.concat(x_sets)
neg_Xtrain['neg'] = 1

Xtrain2 = Xtrain.copy()
Xtrain2['neg'] = 0

Xtrain_new = pd.concat([Xtrain2,neg_Xtrain])

neg_ytrain = pd.concat(targets)

ytrain_new = pd.concat([ytrain,neg_ytrain])

Xtest_new = Xtest.copy()
Xtest_new['neg'] = 0


model = Pipeline(
    steps=[
        ('scaler',MinMaxScaler()),
        ('learner',
         MLPClassifier(hidden_layer_sizes=(15,15,15),activation='relu',
                       alpha=.0001,
                       batch_size=len(Xtest_new),
                       learning_rate_init=1e-2,
                       learning_rate='constant',
                       max_iter=100000,
                       tol=1e-12,
                       early_stopping=False,
                       verbose=False,
                       n_iter_no_change=150
                       )
         )
    ]
).fit(Xtrain_new,ytrain_new)

ypred = pd.Series(model.predict(Xtest_new),index=Xtest_new.index)

predicted_probabilities = model.predict_proba(
    np.array([
        [4.6,3.6,1.0,0.2,1],#Negation is on, the arg max class should either be 1 or 2
        [4.6,3.6,1.0,0.2,1],#Negation is on, the arg max class should either be 1 or 2
        [4.6,3.6,1.0,0.2,1],#Negation is on, the arg max class should either be 1 or 2
        [4.6,3.6,1.0,0.2,1],#Negation is on, the arg max class should either be 1 or 2
        [4.6,3.6,1.0,0.2,1],#Negation is on, the arg max class should either be 1 or 2
        [4.6,3.6,1.0,0.2,0],#Negation is off, the arg max class should be 0
        [4.6,3.6,1.0,0.2,1],#Negation on true 0
        [6.7,3.1,4.4,1.4,1],#Negation on true 1
        [7.1,3.0,5.9,2.1,1]#Negation on true 2
    ])
)


print(
    "Confusion Matrix on testing holdout:\n",
    confusion_matrix(ytest,ypred),"\n\n",
    "Prediction Proba:\n",
    "True 0 records, negated:\n",
      np.round(predicted_probabilities,3)[:5],"\n\n",
    "True 0 record, not negated:\n",
      np.round(predicted_probabilities,3)[5],"\n\n",
    "Negated predictions for classes 0, 1 and 2:\n",
      np.round(predicted_probabilities,3)[6:],"\n\n",
      "Prediction (sampled) from the probabilities above:\n",
      np.apply_along_axis(axis=1, arr=predicted_probabilities,
        func1d = lambda x: np.random.choice(len(x), size=1, p=x)
    )
)

Confusion Matrix on testing holdout:
 [[ 0 10  0]
 [ 0  9  0]
 [ 0 11  0]] 

 Prediction Proba:
 True 0 records, negated:
 [[0.429 0.436 0.135]
 [0.429 0.436 0.135]
 [0.429 0.436 0.135]
 [0.429 0.436 0.135]
 [0.429 0.436 0.135]] 

 True 0 record, not negated:
 [0.429 0.436 0.135] 

 Negated predictions for classes 0, 1 and 2:
 [[0.429 0.436 0.135]
 [0.429 0.436 0.135]
 [0.429 0.436 0.135]] 

 Prediction (sampled) from the probabilities above:
 [[1]
 [2]
 [1]
 [0]
 [0]
 [2]
 [1]
 [1]
 [1]]




In [43]:
#Implicit negation
x_sets = []
targets = []

for c in [0,1]:

    subset = Xtrain[ytrain!=c].copy()

    subset_target = pd.Series(np.repeat(c,len(subset)),index=subset.index)

    x_sets.append(subset)
    targets.append(subset_target)

neg_Xtrain = pd.concat(x_sets)
neg_Xtrain['neg'] = 1

Xtrain2 = Xtrain.copy()
Xtrain2['neg'] = 0

Xtrain_new = pd.concat([Xtrain2,neg_Xtrain])

neg_ytrain = pd.concat(targets)

ytrain_new = pd.concat([ytrain,neg_ytrain])

Xtest_new = Xtest.copy()
Xtest_new['neg'] = 0


model = Pipeline(
    steps=[
        ('scaler',MinMaxScaler()),
        ('learner',
         MLPClassifier(hidden_layer_sizes=(10,10),activation='logistic',
                       alpha=.0001,
                       batch_size=len(Xtest_new),
                       learning_rate_init=1e-2,
                       learning_rate='constant',
                       max_iter=100000,
                       tol=1e-12,
                       early_stopping=False,
                       verbose=False,
                       n_iter_no_change=150
                       )
         )
    ]
).fit(Xtrain_new,ytrain_new)

ypred = pd.Series(model.predict(Xtest_new),index=Xtest_new.index)

predicted_probabilities = model.predict_proba(
    np.array([
        [4.6,3.6,1.0,0.2,1],#Negation is on, the arg max class should either be 1 or 2
        [4.6,3.6,1.0,0.2,1],#Negation is on, the arg max class should either be 1 or 2
        [4.6,3.6,1.0,0.2,1],#Negation is on, the arg max class should either be 1 or 2
        [4.6,3.6,1.0,0.2,1],#Negation is on, the arg max class should either be 1 or 2
        [4.6,3.6,1.0,0.2,1],#Negation is on, the arg max class should either be 1 or 2
        [4.6,3.6,1.0,0.2,0],#Negation is off, the arg max class should be 0
        [4.6,3.6,1.0,0.2,1],#Negation on true 0
        [6.7,3.1,4.4,1.4,1],#Negation on true 1
        [7.1,3.0,5.9,2.1,1]#Negation on true 2
    ])
)


print(
    "Confusion Matrix on testing holdout:\n",
    confusion_matrix(ytest,ypred),"\n\n",
    "Prediction Proba:\n",
    "True 0 records, negated:\n",
      np.round(predicted_probabilities,3)[:5],"\n\n",
    "True 0 record, not negated:\n",
      np.round(predicted_probabilities,3)[5],"\n\n",
    "Negated predictions for classes 0, 1 and 2:\n",
      np.round(predicted_probabilities,3)[6:],"\n\n",
      "Prediction (sampled) from the probabilities above:\n",
      np.apply_along_axis(axis=1, arr=predicted_probabilities,
        func1d = lambda x: np.random.choice(len(x), size=1, p=x)
    )
)

Confusion Matrix on testing holdout:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]] 

 Prediction Proba:
 True 0 records, negated:
 [[0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]] 

 True 0 record, not negated:
 [0.999 0.001 0.   ] 

 Negated predictions for classes 0, 1 and 2:
 [[0.    1.    0.   ]
 [0.999 0.001 0.   ]
 [0.538 0.462 0.   ]] 

 Prediction (sampled) from the probabilities above:
 [[1]
 [1]
 [1]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]]


