In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

In [2]:
class Ant:
    def __init__(self):
        self._tour = []
        self._cost = []
        self._out = []

    def __str__(self):
        return f"""
        tours: {self.tour}\n
        costs: {self.cost}\n
        outs: {self.out}\n
        """
        
    @property
    def tour(self):
        return self._tour
        
    def append_tour(self, val):
        self._tour.append(val)

    @property
    def cost(self):
        return self._cost
    
    def append_cost(self, val):
        self._cost.append(val)

    @property
    def out(self):
        return self._out
    
    def append_out(self, val):
        self._out.append(val)
        

class Colony:
    def __init__(self, X, Y, epochs=15, num_sampled_features=15, num_ants=3, Q=1, tau_0=1, alpha=1, beta=1, rho=0.05):
        # must be a 1024 x 100 matrix
        self.X = X
        
        # must be 1 x 100 matrix
        self.Y = Y
        
        # 1024 features
        self.num_features = X.shape[0]
        
        # 100 instances
        self.num_instances = X.shape[1]
        
        # desired number of selected feaures
        self.num_sampled_features = num_sampled_features
        
        # ACO algorithm hyper parameters
        self.epochs = epochs
        self.num_ants = num_ants
        self.Q = Q
        
        # initial intensity of pheromone values in pheromone matrix 'tau'
        self.tau_0 = tau_0
        self.alpha = alpha
        self.beta = beta
        self.rho = rho
        
        # initialize heuristic info matrix to be 1024 x 1024
        self.eta = np.ones((X.shape[0], X.shape[0]))
        
        # init pheromone matrix to be 1024 x 1024
        # multiplied by initialized tau_0 value
        self.tau = tau_0 * np.ones((X.shape[0], X.shape[0]))
        
        # list to hold best cost values out of all ants in each iteration
        # e.g. ant 1 out of all ants holds best path/cost of iteration/epoch 1
        self.best_cost = []
        self.ants = np.empty(shape=(num_ants, 1), dtype=np.dtype(Ant))
        
        # initially best ant cost is an infinite value
        self.best_ant_cost = np.inf
        
    def run(self):
        # loop from 0 to 14
        for epoch in range(self.epochs):
            
            # move ants
            print(f'epoch {epoch}')
            
            # loop from 0 to 2
            for k in range(self.num_ants):
                
                # instantiate an Ant object
                temp_ant = Ant()
                
                # since we have 1024 features for ex, generate a random
                # number from 0 to 1023 inclusively, 1024 is excluded
                temp_ant.tour = np.random.randint(0, self.num_features)
                self.ants[k, 1] = temp_ant
                
                # loop from 1 to 1023, instead of 0 to 1023, but stop at 1024
                for l in range(1, self.num_features):
                    
                    # since we are accessing last element of tour
                    # attribute of ant make sure, .tour is never 
                    # empty or statemetn will raise error
                    i = self.ants[k, 1].tour[-1]
                    
                    # P when calculated is a 1 x 1024 row vector
                    # or will always be a 1 x num_features row vector
                    P = np.pow(self.tau[i, :], self.alpha) * np.pow(self.eta[i, :], self.beta)
                    
                    # sets the visited spots of the ants in the P matrix to 0
                    # e.g. [1000] accesses P[[1000]], or element at 1000th index
                    # [1000, 241] accesses elements at 1000th and 241st index and
                    # sets them to 0
                    P[self.ants[k, 1].tour] = 0
                    
                    # sum all elements in P row vector and use as denominator
                    P = P / np.sum(P)
                    
                    j = self.roulette(P)
                    self.ants[k, 1].append_tour(j)
                    print(self.ants[k, 1])
                    
    def roulette(self, P):
        # generate random float between (0, 1) exclusively
        r_num = np.random.uniform()
        
        # since P is a 1 x num_features matrix
        # np.cumsum(P) will be same shape as P
        p_cum_sum = np.cumsum(P)
        
        bools = (r_num <= p_cum_sum).astype(int)
        
        # return the index of the first occurence of 
        # a true/1 value in the bools array 
        return np.where(bools == 1)[0][0]
        
        
                
        
        

In [3]:
df = pd.read_csv('./data.csv')

In [8]:
# delete id diagnosis
Y = df['diagnosis']
X = df.drop(['id', 'Unnamed: 32', 'diagnosis'], axis=1, inplace=False)

In [9]:
X

Unnamed: 0,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,...,25.380,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890
1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,...,24.990,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902
2,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999,...,23.570,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744,...,14.910,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300
4,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,...,22.540,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623,...,25.450,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115
565,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,...,23.690,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637
566,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648,...,18.980,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820
567,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016,...,25.740,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400


In [10]:
Y = Y.to_numpy().reshape(Y.shape[0], -1)
print(Y[:5])
print(Y.shape)

[['M']
 ['M']
 ['M']
 ['M']
 ['M']]
(569, 1)


In [11]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score

In [12]:
X_trains, X_cross, Y_trains, Y_cross = train_test_split(X, Y, test_size=0.3, random_state=0)
print(X_trains.shape)
print(Y_trains.shape)
print(X_cross.shape)
print(Y_cross.shape)
print(X_trains)
print(Y_trains)

(398, 30)
(398, 1)
(171, 30)
(171, 1)
     radius_mean  texture_mean  perimeter_mean  area_mean  smoothness_mean  \
478       11.490         14.59           73.99      404.9          0.10460   
303       10.490         18.61           66.86      334.3          0.10680   
155       12.250         17.94           78.27      460.3          0.08654   
186       18.310         18.58          118.60     1041.0          0.08588   
101        6.981         13.43           43.79      143.5          0.11700   
..           ...           ...             ...        ...              ...   
277       18.810         19.98          120.90     1102.0          0.08923   
9         12.460         24.04           83.97      475.9          0.11860   
359        9.436         18.32           59.82      278.6          0.10090   
192        9.720         18.22           60.73      288.1          0.06950   
559       11.510         23.93           74.52      403.5          0.09261   

     compactness_mean  co

In [13]:
colony = Colony(X_trains.T, Y_trains)
colony.run()

AttributeError: 'numpy.ndarray' object has no attribute 'trains'