In [74]:
import pyswarms as ps
from pyswarms.utils.plotters import (plot_cost_history)



In [75]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [76]:
from keras import Sequential
from keras.layers import Dense, Dropout
from sklearn import preprocessing 
from scipy.io import arff
#from sklearn.metrics import mean_squared_error
#from sklearn.metrics import mean_absolute_error


In [77]:
data = arff.loadarff("./dataset/Medicaldataset.arff")

In [78]:
df = pd.DataFrame(data[0])

df.head()

Unnamed: 0,age,gender,impluse,pressurehight,pressurelow,glucose,kcm,troponin,class
0,64.0,1.0,66.0,160.0,83.0,160.0,1.8,0.012,b'negative'
1,21.0,1.0,94.0,98.0,46.0,296.0,6.75,1.06,b'positive'
2,55.0,1.0,64.0,160.0,77.0,270.0,1.99,0.003,b'negative'
3,64.0,1.0,70.0,120.0,55.0,270.0,13.87,0.122,b'positive'
4,55.0,1.0,64.0,112.0,65.0,300.0,1.08,0.003,b'negative'


In [79]:
X = df.iloc[:,df.columns !='class'] #select all features except for class
X_1 = preprocessing.normalize(X, axis=1) #normalizing the features
X_1 = pd.DataFrame(X_1, columns=df.columns[:-1])
y = df['class'].str.decode('utf-8') #transform target to 0 and 1
mapping = {'negative': 0, 'positive': 1}
y = y.replace(mapping).to_numpy()


In [80]:
X_train, X_test, y_train, y_test = train_test_split(X_1, y, test_size=0.33, random_state=42) 

In [81]:
#perform feature extraction
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectFromModel
from sklearn.model_selection import cross_val_score

# Loop over different values of n_estimators
for n_trees in [50, 100, 200, 500]:
    # Create RandomForestClassifier with current n_estimators
    forest = RandomForestClassifier(n_estimators=n_trees, random_state=42)
    
    # Use cross_val_score for evaluation
    scores = cross_val_score(forest, X, y, cv=5)  # 5-fold cross-validation
    mean_score = scores.mean()
    
    print(f"n_estimators={n_trees}, Mean Cross-Validation Score: {mean_score}")



n_estimators=50, Mean Cross-Validation Score: 0.9863607558474479
n_estimators=100, Mean Cross-Validation Score: 0.9863607558474479
n_estimators=200, Mean Cross-Validation Score: 0.9863607558474479
n_estimators=500, Mean Cross-Validation Score: 0.9856002995736837


In [82]:
#create and train a random forest
forest= RandomForestClassifier(n_estimators= 200, random_state= 42)
forest.fit(X_train, y_train)

#get the most important features
forest_feats= SelectFromModel(forest, threshold= 'median')
forest_feats.fit(X_train, y_train)

#get training and development sets that have only the most important features
x_train_forest= forest_feats.transform(X_train)
x_devel_forest= forest_feats.transform(X_test)

#see which features were retained
for i in forest_feats.get_support(indices= True):
    print(X_train.columns[i])

age
glucose
kcm
troponin


In [83]:
X_2 = X_1[["age", "glucose", "kcm", "troponin"]]
print(X_2)

           age   glucose       kcm  troponin
0     0.248097  0.620242  0.006978  0.000047
1     0.063706  0.897957  0.020477  0.003216
2     0.164671  0.808387  0.005958  0.000009
3     0.202879  0.855896  0.043968  0.000387
4     0.162973  0.888944  0.003200  0.000009
...        ...       ...       ...       ...
1314  0.164247  0.761509  0.006085  0.000022
1315  0.288692  0.651745  0.005818  0.000752
1316  0.187620  0.400255  0.005170  0.017720
1317  0.114900  0.942603  0.012341  0.000764
1318  0.203388  0.534391  0.202949  0.007059

[1319 rows x 4 columns]


In [84]:
X_train, X_test, y_train, y_test = train_test_split(X_2, y, test_size=0.33, random_state=42) 

In [85]:
y_train = np.array(y_train) #make it numpy array for math operations
type(y_train)

numpy.ndarray

In [86]:
#set up NN architecture
n_inputs = 4 #input layer
n_hidden = 20 #hidden layer
n_classes = 2 #output layer

num_samples = 1319 #number of samples

In [87]:
def logits_function(p):
    """ Calculate roll-back the weights and biases

    Inputs
    ------
    p: np.ndarray
        The dimensions should include an unrolled version of the
        weights and biases.

    Returns
    -------
    numpy.ndarray of logits for layer 2

    """
    # Roll-back the weights and biases
    W1 = p[0:80].reshape((n_inputs,n_hidden))
    b1 = p[80:100].reshape((n_hidden,))
    W2 = p[100:160].reshape((n_hidden,n_classes))
    b2 = p[160:163].reshape((n_classes,))

    # Perform forward propagation
    z1 = X_train.dot(W1) + b1  # Pre-activation in Layer 1
    a1 = np.tanh(z1)           # Activation in Layer 1
    logits = a1.dot(W2) + b2   # Pre-activation in Layer 2
    return logits              # Logits for Layer 2

In [88]:
# Forward propagation
def forward_prop(params):
    """Forward propagation as objective function

    This computes for the forward propagation of the neural network, as
    well as the loss.

    Inputs
    ------
    params: np.ndarray
        The dimensions should include an unrolled version of the
        weights and biases.

    Returns
    -------
    float
        The computed negative log-likelihood loss given the parameters
    """
    logits = logits_function(params)
   
    # Compute for the softmax of the logits
    exp_scores = np.exp(logits)
    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
    
    # Compute for the negative log likelihood

    corect_logprobs = -np.log(probs[range(num_samples), y_train])
    
    loss = np.sum(corect_logprobs) / num_samples
    return loss


In [89]:
def f(x):
    """Higher-level method to do forward_prop in the
    whole swarm.

    Inputs
    ------
    x: numpy.ndarray of shape (n_particles, dimensions)
        The swarm that will perform the search

    Returns
    -------
    numpy.ndarray of shape (n_particles, )
        The computed loss for each particle
    """
    n_particles = x.shape[0]
    j = [forward_prop(x[i]) for i in range(n_particles)] #for loop to calculate loss for each particle
    return np.array(j)

In [90]:
%%time
# Initialize swarm
options = {'c1': 0.5, 'c2': 0.3, 'w':0.9}

# Call instance of PSO
dimensions = (n_inputs * n_hidden) + (n_hidden * n_classes) + n_hidden + n_classes
optimizer = ps.single.GlobalBestPSO(n_particles=100, dimensions=dimensions, options=options)

# Perform optimization
cost, pos = optimizer.optimize(f, iters=2000)

2023-11-22 23:32:16,053 - pyswarms.single.global_best - INFO - Optimize for 2000 iters with {'c1': 0.5, 'c2': 0.3, 'w': 0.9}
pyswarms.single.global_best:   0%|          |0/2000


ValueError: cannot reshape array of size 42 into shape (20,2)

In [None]:
def predict(pos):
    """
    Use the trained weights to perform class predictions.

    Inputs
    ------
    pos: numpy.ndarray
        Position matrix found by the swarm. Will be rolled
        into weights and biases.
    """
    logits = logits_function(pos)
    y_pred = np.argmax(logits, axis=1)
    return y_pred

In [None]:
(predict(pos) == y_train).mean()

In [None]:
#visualization
#import modules

#import additional libraries
from pyswarms.utils.plotters import (plot_cost_history, plot_contour, plot_surface)


In [None]:
plot_cost_history(cost_history= optimizer.cost_history)
plt.show()

In [None]:
#animated swarm
from pyswarms.utils.plotters.formatters import Mesher
from pyswarms.utils.functions import single_obj as fx


In [None]:
# Initialize mesher with sphere function
m = Mesher(func=fx.sphere)
                     
#set the position of the particles swarm in one of the dimension 
position = np.array(optimizer.pos_history)
position = position[:,:, 220:222]

In [None]:
#Plot 2D Contour
#Make animation
anim = plot_contour(pos_history=position,
                            mesher=m,
                            mark=(0,0)) #red cross in the middle  

anim.save('2d.gif')                          

In [None]:
#plot 2D contour with optimizer based on velocity history
position = np.array(optimizer.velocity_history)
position = position[:,:, 220:222]
anim = plot_contour(pos_history=position,
                            mesher=m,
                            mark=(0,0)) #red cross in the middle  

anim.save('2d_velocity.gif')  