In [1]:
import arff
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
# Load the ARFF file
file_path = 'HIGGS.arff'
with open(file_path, 'r') as f:
    dataset = arff.load(f)


In [3]:
# Extract data and labels
data = np.array(dataset['data'])
data = data[~np.isnan(data.astype('float')).any(axis=1)] # Remove rows with NaN values

X = data[:, 1:]
y = data[:, 0]

In [4]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# Scale the features
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [6]:
print(X_test.shape)
print(np.isnan(X_test).any())

(19610, 28)
False


In [7]:
import numpy as np

In [8]:
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression

def objective_function(X, y, solution):
    # Extract the weights from the solution
    weights = solution[:-1]
    bias = solution[-1]

    # Fit the logistic regression model
    clf = LogisticRegression(C=1e-4, solver='lbfgs', max_iter=1000)
    clf.coef_ = weights.reshape(1, -1)
    clf.intercept_ = np.array([bias])
    clf.fit(X, y)

    # Compute the predicted probabilities
    y_pred_proba = clf.predict_proba(X)[:, 1]

    # Compute the accuracy score
    y_pred = (y_pred_proba > 0.5).astype(int).astype(str)
    accuracy = accuracy_score(y, y_pred)

    # Return the negative accuracy to maximize the objective function
    return -accuracy


In [11]:

def crow_search_algorithm(X, y, n_crows=20, n_iterations=50, fl=2, ap=0.1):

    # Initialize the crow positions and memory
    n_features = X.shape[1]
    crows_pos = np.random.uniform(-1, 1, (n_crows, n_features))
    memory = np.copy(crows_pos)

    # Evaluate the initial crow positions
    fitness = np.array([objective_function(X, y, crow) for crow in crows_pos])
    memory_fitness = np.copy(fitness)

    for iteration in range(n_iterations):
        print('Iteration {}/{}'.format(iteration + 1, n_iterations))
        # Calculate the center position of the crows
        center = np.mean(memory, axis=0)

        for i, crow in enumerate(crows_pos):

            # Generate a random crow index different from the current one
            random_crow_idx = i
            while random_crow_idx == i:
                random_crow_idx = np.random.randint(0, n_crows)

            # Calculate the new position of the crow
            if np.random.uniform(0, 1) < ap:
                new_pos = crow + fl * (memory[random_crow_idx] - crow)
            else:
                new_pos = crow + np.random.uniform(-1, 1) * (center - crow)

            # Evaluate the new position
            new_fitness = objective_function(X, y, new_pos)

            # Update the crow position and memory if necessary
            if new_fitness < fitness[i]:
                crows_pos[i] = new_pos
                fitness[i] = new_fitness

                if new_fitness < memory_fitness[i]:
                    memory[i] = new_pos
                    memory_fitness[i] = new_fitness

    # Return the best solution found
    best_solution_idx = np.argmin(memory_fitness)
    return memory[best_solution_idx]



In [12]:
# Call the crow search algorithm
best_solution = crow_search_algorithm(X_train, y_train)

Iteration 1/50
Iteration 2/50
Iteration 3/50
Iteration 4/50
Iteration 5/50
Iteration 6/50
Iteration 7/50
Iteration 8/50
Iteration 9/50
Iteration 10/50
Iteration 11/50
Iteration 12/50
Iteration 13/50
Iteration 14/50
Iteration 15/50
Iteration 16/50
Iteration 17/50
Iteration 18/50
Iteration 19/50
Iteration 20/50
Iteration 21/50
Iteration 22/50
Iteration 23/50
Iteration 24/50
Iteration 25/50
Iteration 26/50
Iteration 27/50
Iteration 28/50
Iteration 29/50
Iteration 30/50
Iteration 31/50
Iteration 32/50
Iteration 33/50
Iteration 34/50
Iteration 35/50
Iteration 36/50
Iteration 37/50
Iteration 38/50
Iteration 39/50
Iteration 40/50
Iteration 41/50
Iteration 42/50
Iteration 43/50
Iteration 44/50
Iteration 45/50
Iteration 46/50
Iteration 47/50
Iteration 48/50
Iteration 49/50
Iteration 50/50


In [13]:
print('Best solution: {}'.format(best_solution))

Best solution: [ 0.36944354  0.41155158  0.75470761  0.352415   -0.9722137   0.16761389
  0.05485435  0.48887086  0.71799335 -0.10894728 -0.40152416 -0.91269988
  0.52146914  0.62835077 -0.58086433 -0.64373499  0.40755629  0.87712117
  0.54574711 -0.74930399  0.55780576  0.28983667  0.34892579 -0.90776447
 -0.69027327  0.68612954 -0.78480384  0.45884059]


In [14]:
print(X_test.shape)
print(np.isnan(X_test).any())

(19610, 28)
False


In [15]:
# Create a logistic regression model using the best solution
# model = LogisticRegression(fit_intercept=False,solver='lbfgs')
model = LogisticRegression(C=1e-4, solver='lbfgs', max_iter=1000,fit_intercept=False)
model.coef_ = best_solution[:-1].reshape(1, -1)
model.intercept_ = best_solution[-1]

# Fit the model to the training data
model.fit(X_train, y_train)

# Make predictions on the test data
# y_pred = model.predict(X_test)

# Evaluate the performance of the model
# accuracy = accuracy_score(y_test, y_pred)
# precision = precision_score(y_test, y_pred)
# recall = recall_score(y_test, y_pred)
# f1 = f1_score(y_test, y_pred)

LogisticRegression(C=0.0001, fit_intercept=False, max_iter=1000)

In [16]:
y_pred = model.predict(X_test)

In [17]:
print('Accuracy: {}'.format(accuracy_score(y_test, y_pred)))

Accuracy: 0.6057113717491076


In [18]:
#logistic regression model
# Create a logistic regression model
model2 = LogisticRegression(solver='lbfgs',max_iter=100)
model2.fit(X_train, y_train)
y2_pred = model2.predict(X_test)
print('Accuracy: {}'.format(accuracy_score(y_test, y2_pred)))

Accuracy: 0.5895461499235084


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
