# Guide sequence prediction and aggregate score

### Step 0

If you did not adjust `PYTHONPATH` on the command line when starting Jupyter Notebook, use a hack so Python can find Elevation modules.

In [None]:
import sys
sys.path.insert(0, '../')

### Step 1
Import Elevation's data loader and predictor. _sklearn (scikit-learn) will emit `DeprecationWarning`, do not worry._

In [None]:
import elevation.load_data
from elevation.cmds.predict import Predict

### Step 2

Load the Haussler data from `CRISPR/data/offtarget/Haeussler/fig2-crisporData_withReadFraction.tab`.

In [None]:
num_x = 100
roc_data = elevation.load_data.load_HauesslerFig2(1)[0]
wildtype = list(roc_data['30mer'])[:num_x]
offtarget = list(roc_data['30mer_mut'])[:num_x]

### Step 3

Compute the predictions.  Depending on your system, this may take awhile.

_sklearn (scikit-learn) will emit `DataConversionWarning` and `ConvergenceWarning`, do not worry._

In [None]:
predictions = Predict().execute(wildtype, offtarget)

### Step 4: Optional

Pretty print the predictions.

In [None]:
for i in range(num_x):
    print(wildtype[i], offtarget[i], map(lambda kv: kv[0] + "=" + str(kv[1][i]), predictions.iteritems()))

### Step 5

Import settings and load the aggregation model. _sklearn (scikit-learn) will emit `DeprecationWarning`, do not worry._

In [None]:
from elevation import settings
import pickle

with open(settings.agg_model_file) as fh:
    final_model, other = pickle.load(fh)

### Step 6

Compute the aggregate score.

In [None]:
from elevation import aggregation
import numpy

isgenic = numpy.zeros(num_x, dtype=numpy.bool)
aggregate_score = aggregation.get_aggregated_score(predictions['linear-raw-stacker'],
                                                   predictions['CFD'],
                                                   isgenic,
                                                   final_model)

### Step 7: Optional

Pretty print the aggregate score.

In [None]:
print aggregate_score