Skip to content

Commit

Permalink
Merge pull request #18 from Gravity-Spy/develop
Browse files Browse the repository at this point in the history
Merge develop into master
  • Loading branch information
scottcoughlin2014 committed Apr 19, 2018
2 parents 2a35a6f + f87c2d7 commit c85c105
Show file tree
Hide file tree
Showing 21 changed files with 9,876 additions and 312 deletions.
60 changes: 22 additions & 38 deletions GravitySpy-init
Original file line number Diff line number Diff line change
@@ -1,45 +1,29 @@
#!/bin/bash -e
#
# Set up a new python2.7 virtualenv for the GWpy software stack

# get name
target=$1
[[ -z ${target} ]] && target=${HOME}/opt/gwpysoft
packagefile=$2
rm -rf ~/opt/GravitySpy-py27
virtualenv-2.7 ~/opt/GravitySpy-py27
. ~/opt/GravitySpy-py27/bin/activate
python -m pip install --upgrade --quiet pip setuptools
pip install .
pip install cython

# -- install dependencies for virtualenv itself
# get python2.7 version
if [[ -z ${PYTHON_VERSION} ]]; then
PYTHON_VERSION=`python2.7 -c '
import sys;
print(".".join(map(str, sys.version_info[:2])))'`
fi
if [[ -z ${PYTHON_USER_BASE} ]]; then
PYTHON_USER_BASE=`python2.7 -c 'import site; print(site.USER_BASE)'`
PYTHON_USER_BASE=${HOME}/python
fi
if [[ -z ${PYTHON_USER_SITE} ]]; then
PYTHON_USER_SITE=`python2.7 -c 'import site; print(site.USER_SITE)'`
PYTHON_USER_SITE=${HOME}/python/lib/python2.7/site-packages
fi
# create local directories
mkdir -p ${PYTHON_USER_SITE} 1>/dev/null
export PATH=$PATH:/usr/local/cuda/bin/

echo ${PYTHON_USER_BASE}
echo ${PYTHON_USER_SITE}
git clone https://github.com/Theano/libgpuarray.git
cd libgpuarray
rm -rf build Build
mkdir Build
cd Build
cmake .. -DCMAKE_INSTALL_PREFIX=~/opt/GravitySpy-py27 -DCMAKE_BUILD_TYPE=Release
make
make install

# install pip
which pip &>/dev/null || easy_install -U --prefix=${PYTHON_USER_BASE} pip
export PATH=${PATH}:${PYTHON_USER_BASE}/bin
cd ..

# install virtualenv
pip install "virtualenv" --prefix=${PYTHON_USER_BASE}
echo "Virtualenv is now installed"
# Run the following export and add them in your ~/.bashrc file
export CPATH=$CPATH:~/opt/GravitySpy-py27/include
export LIBRARY_PATH=$LIBRARY_PATH:~/opt/GravitySpy-py27/lib
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:~/opt/GravitySpy-py27/lib

# -- create virtualenv
virtualenv $target --system-site-packages --clear
. $target/bin/activate

# Upgrading pip
pip install pip --upgrade
pip install -r $packagefile
python setup.py build
python setup.py install --prefix=~/opt/GravitySpy-py27/
14 changes: 2 additions & 12 deletions bin/gravityspyDBs
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,6 @@ For LIGO users please see `Gravity Spy Authentication <https://secrets.ligo.org/
"""


O1GlitchClassificationUpdateTableSubHeader = """
This table contains infomration concerning the O1 glitch classification paper
"""

TrainingSetTableSubHeader = """
In order to obtain the training set described here: https://dcc.ligo.org/LIGO-P1700227
Expand All @@ -93,10 +89,6 @@ There are three ways.
* `/home/scott.coughlin/public_html/GravitySpy/TrainingSet2/H1L1`. I recommend this only in the short term because it will be deprecated very soon and the preferred way will be using the software above because it is robust, has correctly labelled samples, and will be automatically updated to reflect when the trainingset changes/improves. In this folder there are 22 folders that all contain the images for that class.
"""

classificationsTableSubHeader = """
This table contains information about classification performed by users before the addition of the two new classes. Therefore, columns such as `annotations_value_choiceINT` = 1 being a `Blip` is true in this DB but not in classificationsdev
"""

classificationsdevTableSubHeader = """
This table contains information about classification performed by users after the addition of the two new classes. Therefore, columns such as `annotations_value_choiceINT` = 3 being a `Blip` is true in this DB but not in classifications. Versioning controls for this type of thing will be implemented in the near future
"""
Expand All @@ -113,9 +105,7 @@ userStatusTableSubHeader = """
This table contains information about what level agiven user should be on.
"""

TableSubHeaderDict = {'trainingset' : TrainingSetTableSubHeader,
'O1GlitchClassificationUpdate' : O1GlitchClassificationUpdateTableSubHeader,
'classifications' : classificationsTableSubHeader,
TableSubHeaderDict = {'trainingsetv1d1' : TrainingSetTableSubHeader,
'classificationsdev' : classificationsdevTableSubHeader,
'glitches' : glitchesTableSubHeader,
'goldenimages' : goldenimagesTableSubHeader,
Expand All @@ -134,7 +124,7 @@ engine = create_engine('postgresql://{0}:{1}@gravityspy.ciera.northwestern.edu:5
with open('DBs/index.rst', 'w') as f:
f.write(BeginningText)
for table in sorted(engine.table_names()):
if table not in ['goldenimages', 'classifications', 'O1GlitchClassificationUpdate', 'classificationsdev', 'userStatus', 'glitches', 'trainingset']:
if table not in ['goldenimages', 'classificationsdev', 'userStatus', 'glitches', 'trainingsetv1d1']:
continue

TableHeader = """
Expand Down
89 changes: 65 additions & 24 deletions bin/trainmodel
Original file line number Diff line number Diff line change
@@ -1,37 +1,78 @@
#!/usr/bin/env python

import ConfigParser
import optparse
import argparse
import os

from matplotlib import use
use('agg')
from matplotlib import (pyplot as plt, cm)

import gravityspy.ML.make_pickle_for_linux as make_pickle
import gravityspy.ML.train_classifier as train_classifier

# Definite Command line arguments here

def parse_commandline():
"""Parse the options given on the command-line.
"""
parser = optparse.OptionParser()
parser.add_option("--path-to-golden", help="folder where labeled images live")
parser.add_option("--path-to-pickle", help="folder where pickled files will live")
parser.add_option("--path-to-trained-model", help="folder where the training model will live.")
parser.add_option("--batch-size", type=int, default=30,help="defines the batch size, 30 is a reasonable size")
parser.add_option("--nb-epoch", type=int, default=130,help="defines the number of iterations, 130 is reasonable. You can set it to 100 or below, if you have time concern for training.")
parser.add_option("--train-flag", type=int, default=0,help="a flag that shows all the golden set should be used for training the ML classifier, if 1, use training, validation and test set from golden set (ML experiments)")
parser.add_option("--number-of-classes", type=int, help="")
parser.add_option("--verbose", action="store_true", default=False,help="Run in Verbose Mode")
opts, args = parser.parse_args()


return opts

# example script

#THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 trainmodel --path-to-golden /home/scoughlin/public_html/GravitySpy/TrainingSet2/H1L1/ --path-to-pickle ./pickleddata/ --path-to-trained-model ./model --number-of-classes 22 --batch-size 500
opts = parse_commandline()
make_pickle.main(opts.path_to_golden,opts.path_to_pickle,0,opts.verbose)
train_classifier.main(opts.batch_size,opts.nb_epoch,opts.train_flag,opts.path_to_pickle,opts.path_to_trained_model,opts.number_of_classes,opts.verbose)
parser = argparse.ArgumentParser(description=
"An examples commandline of how to obtain a model is given below: "
"THEANO_FLAGS=mode=FAST_RUN,device=cuda,floatX=float32 trainmodel "
"--path-to-trainingset='somedir' --number-of-classes='somenum'")
parser.add_argument("--path-to-trainingset",
help="folder where labeled images live", required=True)
parser.add_argument("--number-of-classes", type=int,
help="How many classes do you have", required=True)
parser.add_argument("--path-to-pickle",
help="folder where the entire pickled training set "
"will live. This pickle file should be read in "
"by pandas", default='pickeleddata')
parser.add_argument("--path-to-trained-model",
help="folder where the final trained model will go.",
default='model')
parser.add_argument("--batch-size", type=int, default=30,
help="defines the batch size, 30 is a reasonable size")
parser.add_argument("--nb-epoch", type=int, default=20,
help="defines the number of iterations, "
"130 is reasonable. You can set it to 100 or below, "
"if you have time concern for training.")
parser.add_argument("--fraction-validation", type=float, default=0.125,
help="Perentage of trianing set to save for validation")
parser.add_argument("--fraction-testing", type=float, default=0,
help="Percentage of training set to save for testing")
parser.add_argument("--randomseed", type=int, default=1986,
help="Set random seed")
parser.add_argument("--verbose", action="store_true", default=False,
help="Run in Verbose Mode")
args = parser.parse_args()

return args

# Parse commandline
args = parse_commandline()

# Pixelate and pickle the traiing set images
train_classifier.pickle_trainingset(
path_to_trainingset=args.path_to_trainingset,
save_address=args.path_to_pickle,
verbose=args.verbose
)

# Check if teting percentage is 0, set to None
if not args.fraction_testing:
fraction_testing = None
else:
fraction_testing = args.fraction_testing

# Train model
train_classifier.make_model(
data=os.path.join(args.path_to_pickle, 'trainingset.pkl'),
model_folder=args.path_to_trained_model,
batch_size=args.batch_size,
nb_epoch=args.nb_epoch,
nb_classes=args.number_of_classes,
fraction_validation=args.fraction_validation,
fraction_testing=fraction_testing,
best_model_based_validset=0,
image_size=[140, 170],
random_seed=args.randomseed,
verbose=True
)
66 changes: 48 additions & 18 deletions bin/wscan
Original file line number Diff line number Diff line change
Expand Up @@ -113,22 +113,27 @@ def main(inifile, eventTime, ID, outDir,
alwaysPlotFlag = cp.getint('parameters', 'alwaysPlotFlag')
sampleFrequency = cp.getint('parameters', 'sampleFrequency')
blockTime = cp.getint('parameters', 'blockTime')
searchFrequencyRange = json.loads(cp.get('parameters', 'searchFrequencyRange'))
searchFrequencyRange = json.loads(cp.get('parameters',
'searchFrequencyRange'))
searchQRange = json.loads(cp.get('parameters', 'searchQRange'))
searchMaximumEnergyLoss = cp.getfloat('parameters', 'searchMaximumEnergyLoss')
searchMaximumEnergyLoss = cp.getfloat('parameters',
'searchMaximumEnergyLoss')
searchWindowDuration = cp.getfloat('parameters', 'searchWindowDuration')
whiteNoiseFalseRate = cp.getfloat('parameters', 'whiteNoiseFalseRate')
plotTimeRanges = json.loads(cp.get('parameters', 'plotTimeRanges'))
plotFrequencyRange = json.loads(cp.get('parameters', 'plotFrequencyRange'))
plotNormalizedERange = json.loads(cp.get('parameters', 'plotNormalizedERange'))
plotNormalizedERange = json.loads(cp.get('parameters',
'plotNormalizedERange'))
frameCacheFile = cp.get('channels', 'frameCacheFile')
frameType = cp.get('channels', 'frameType')
channelName = cp.get('channels', 'channelName')
detectorName = channelName.split(':')[0]
det = detectorName.split('1')[0]

logger.info('You have chosen the following Q range: {0}'.format(searchQRange))
logger.info('You have chosen the following search range: {0}'.format(searchFrequencyRange))
logger.info('You have chosen the following Q range: '
'{0}'.format(searchQRange))
logger.info('You have chosen the following search range: '
'{0}'.format(searchFrequencyRange))

###########################################################################
# create output directory #
Expand All @@ -138,7 +143,7 @@ def main(inifile, eventTime, ID, outDir,
if outDir is None:
outDirtmp = './scans'
else:
outDirtmp = outDir + '/' + ID + '/' + ID
outDirtmp = os.path.join(outDir, ID, ID)
outDirtmp += '/'

# report status
Expand Down Expand Up @@ -190,32 +195,53 @@ def main(inifile, eventTime, ID, outDir,
durForPlot = iTimeWindow/2
try:
outseg = Segment(centerTime - durForPlot, centerTime + durForPlot)
qScan = data.q_transform(qrange=tuple(searchQRange), frange=tuple(searchFrequencyRange),
qScan = data.q_transform(qrange=tuple(searchQRange),
frange=tuple(searchFrequencyRange),
gps=centerTime, search=0.5, tres=0.002,
fres=0.5, outseg=outseg, whiten=True)
qValue = qScan.q
qScan = qScan.crop(centerTime-iTimeWindow/2, centerTime+iTimeWindow/2)
qScan = qScan.crop(centerTime-iTimeWindow/2,
centerTime+iTimeWindow/2)
except:
outseg = Segment(centerTime - 2*durForPlot, centerTime + 2*durForPlot)
qScan = data.q_transform(qrange=tuple(searchQRange), frange=tuple(searchFrequencyRange),
outseg = Segment(centerTime - 2*durForPlot,
centerTime + 2*durForPlot)
qScan = data.q_transform(qrange=tuple(searchQRange),
frange=tuple(searchFrequencyRange),
gps=centerTime, search=0.5, tres=0.002,
fres=0.5, outseg=outseg, whiten=True)
qValue = qScan.q
qScan = qScan.crop(centerTime-iTimeWindow/2, centerTime+iTimeWindow/2)
qScan = qScan.crop(centerTime-iTimeWindow/2,
centerTime+iTimeWindow/2)
specsgrams.append(qScan)

logger.info('The most significant q value is {0}'.format(qValue))

# Plot q_scans
logger.info('Plotting q scans...')
plot_qtransform(specsgrams, plotNormalizedERange, plotTimeRanges, detectorName, startTime, outDirtmp, IDstring)
indFigAll, superFig = plot_qtransform(specsgrams,
plotNormalizedERange, plotTimeRanges,
detectorName, startTime)

for idx, indFig in enumerate(indFigAll):
dur = float(plotTimeRanges[idx])
indFig.save(os.path.join(
outDirtmp,
detectorName + '_' + IDstring
+ '_spectrogram_' + str(dur) +'.png'
)
)

superFig.save(os.path.join(outDirtmp, IDstring + '.png'),
bbox_inches='tight')

if runML:
# load the API gravityspy project cached class
gspyproject = GravitySpyProject.load_project_from_cache('1104.pkl')
# Since we created the images in a special temporary directory we can run os.listdir to get there full
# Since we created the images in a
# special temporary directory we can run os.listdir to get there full
# names so we can convert the images into ML readable format.
list_of_images = [ifile for ifile in os.listdir(outDirtmp) if 'spectrogram' in ifile]
list_of_images = [ifile for ifile in os.listdir(outDirtmp)
if 'spectrogram' in ifile]

logger.info('Converting image to ML readable...')
image_dataDF = pd.DataFrame()
Expand Down Expand Up @@ -274,11 +300,14 @@ def main(inifile, eventTime, ID, outDir,
classes = sorted(workflowDictSubjectSets['2117'].keys())

# Add on columns that are Gravity Spy specific
classes.extend(["uniqueID","Label","workflow","subjectset","Filename1","Filename2","Filename3","Filename4","UploadFlag", "qvalue"])
classes.extend(["uniqueID", "Label", "workflow", "subjectset",
"Filename1", "Filename2", "Filename3", "Filename4",
"UploadFlag", "qvalue"])

# Determine label
Label = classes[MLlabel]
logger.info('This image has received the following label: {0} with {1} percent confidence'.format(Label, confidence))
logger.info('This image has received the following label: '
'{0} with {1} percent confidence'.format(Label, confidence))

# determine confidence values from ML
scores = scores[0].tolist()
Expand Down Expand Up @@ -335,8 +364,9 @@ def main(inifile, eventTime, ID, outDir,

system_call = "mv {0}*.png {1}".format(outDirtmp, finalPath)
os.system(system_call)
shutil.rmtree(outDir + ID)
shutil.rmtree(os.path.join(outDir, ID))

if __name__ == '__main__':
args = parse_commandline()
main(args.inifile, args.eventTime, args.ID, args.outDir, args.pathToModel, args.uniqueID, args.runML, args.HDF5, args.PostgreSQL, args.verbose)
main(args.inifile, args.eventTime, args.ID, args.outDir, args.pathToModel,
args.uniqueID, args.runML, args.HDF5, args.PostgreSQL, args.verbose)
Loading

0 comments on commit c85c105

Please sign in to comment.