## Preparation
### Install
```bash
$ sudo -H pip3 install numpy pyswarm
```

## Objective function to be *minimized*
The first argument is the weight vector

In [None]:
import numpy as np

def accuracy_error(weights, *args):
    # In this example, we assume [1, 2, 3, 4] is the optimal weight vector.
    # We do not use the 
    t = np.array([1, 2, 3, 4])
    w = np.array(weights)
    diff = np.sum((w - t)**2)
    # args can be used to pass other data
    assert(args[0] == 'hi')
    return diff

print(accuracy_error([1, 1, 4, 2], 'hi'))

## Searching parameters

In [None]:
lower_bound = [1, 1, 1, 1] # Min of each weight
upper_bound = [10, 10, 10, 10] # Max of each weight

# A number of particles will search in the space together.
num_particles = 100

# A particle tends to move towards the best position it has ever seen (bp) and
# the best position other particles have ever seen (bg).
# In each step, a particle moves v distance from its current position x,
# where v is determined by:
# v = omega * v + phip * (pb - x) + phig * (pg - x)
omega = 0.5 # Particle velocity scaling factor
phip = 0.5 # Factor to search away from the particle’s best known position
phig = 0.5 # Factor to search away from the global best known position

# Searching config.
maxiter = 100 # The max number of iterations to search
minstep = 1e-8 # The min step of the best position before the search terminates
mindiff = 1e-8 # The min change of the objective value before the search terminates
debug = True # Whether to show progress for every iteration

In [None]:
from pyswarm import pso

weights, error = pso(accuracy_error, lower_bound, upper_bound, args=['hi'],
                     swarmsize=num_particles, omega=omega, phip=phip, phig=phig,
                     maxiter=maxiter, minstep=minstep, minfunc=mindiff, debug=debug)

print(weights, error)

## Persper-specific processing

### Load JIRA issue data sets

In [None]:
import pickle

# Download the test pickle file from https://share.weiyun.com/9ecbf5799502577312baa6b17a584b17
datasets = pickle.load(open('../data/jira-issues-test.pickle', 'rb'))

### Convert type string to index in the weight vector
This facilitates using the weight vector.

In [None]:
import sys
sys.path.append('../lib')
import labeler
from labeler import apache_type

def weight_index(commit_type):
    if commit_type == labeler.BUG:
        return 0
    elif commit_type == labeler.IMPROVEMENT:
        return 1
    elif commit_type == labeler.FEATURE:
        return 2
    else:
        assert(commit_type == labeler.MAINTENANCE)
        return 3

print(weight_index('bug'))
print(weight_index('feature'))
print(weight_index('maintenance'))
print(weight_index('improvement'))

### Get commit-to-type mappings
**NOTE:** The type here refers to the index of a type in the weight vector.
Run the above cells first.

In [None]:
def get_commit_type(datasets, repo_name):
    commit_type = { }
    for issue_id, issue in datasets[repo_name].items():
        for commit in issue['commits']:
            std_type = apache_type[issue['type']]
            if std_type == labeler.SKIP:
                continue
            commit_type[commit] = weight_index(std_type)
    return commit_type

get_commit_type(datasets, 'zookeeper')