Merge pull request #18 from Gravity-Spy/develop

Merge develop into master
Gravity-Spy · Apr 19, 2018 · c85c105 · c85c105
2 parents 2a35a6f + f87c2d7
commit c85c105
Show file tree

Hide file tree

Showing 21 changed files with 9,876 additions and 312 deletions.
diff --git a/GravitySpy-init b/GravitySpy-init
@@ -1,45 +1,29 @@
 #!/bin/bash -e
-#
-# Set up a new python2.7 virtualenv for the GWpy software stack
 
-# get name
-target=$1
-[[ -z ${target} ]] && target=${HOME}/opt/gwpysoft
-packagefile=$2
+rm -rf ~/opt/GravitySpy-py27
+virtualenv-2.7 ~/opt/GravitySpy-py27
+. ~/opt/GravitySpy-py27/bin/activate
+python -m pip install --upgrade --quiet pip setuptools
+pip install .
+pip install cython
 
-# -- install dependencies for virtualenv itself
-# get python2.7 version
-if [[ -z ${PYTHON_VERSION} ]]; then
-    PYTHON_VERSION=`python2.7 -c '
-import sys;
-print(".".join(map(str, sys.version_info[:2])))'`
-fi
-if [[ -z ${PYTHON_USER_BASE} ]]; then
-    PYTHON_USER_BASE=`python2.7 -c 'import site; print(site.USER_BASE)'`
-    PYTHON_USER_BASE=${HOME}/python
-fi
-if [[ -z ${PYTHON_USER_SITE} ]]; then
-    PYTHON_USER_SITE=`python2.7 -c 'import site; print(site.USER_SITE)'`
-    PYTHON_USER_SITE=${HOME}/python/lib/python2.7/site-packages
-fi
-# create local directories
-mkdir -p ${PYTHON_USER_SITE} 1>/dev/null
+export PATH=$PATH:/usr/local/cuda/bin/
 
-echo ${PYTHON_USER_BASE}
-echo ${PYTHON_USER_SITE}
+git clone https://github.com/Theano/libgpuarray.git
+cd libgpuarray
+rm -rf build Build
+mkdir Build
+cd Build
+cmake .. -DCMAKE_INSTALL_PREFIX=~/opt/GravitySpy-py27 -DCMAKE_BUILD_TYPE=Release
+make
+make install
 
-# install pip
-which pip &>/dev/null || easy_install -U --prefix=${PYTHON_USER_BASE} pip
-export PATH=${PATH}:${PYTHON_USER_BASE}/bin
+cd ..
 
-# install virtualenv
-pip install "virtualenv"  --prefix=${PYTHON_USER_BASE}
-echo "Virtualenv is now installed"
+# Run the following export and add them in your ~/.bashrc file
+export CPATH=$CPATH:~/opt/GravitySpy-py27/include
+export LIBRARY_PATH=$LIBRARY_PATH:~/opt/GravitySpy-py27/lib
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:~/opt/GravitySpy-py27/lib
 
-# -- create virtualenv
-virtualenv $target --system-site-packages --clear
-. $target/bin/activate
-
-# Upgrading pip
-pip install pip --upgrade
-pip install -r $packagefile
+python setup.py build
+python setup.py install --prefix=~/opt/GravitySpy-py27/
diff --git a/bin/gravityspyDBs b/bin/gravityspyDBs
@@ -63,10 +63,6 @@ For LIGO users please see `Gravity Spy Authentication <https://secrets.ligo.org/
 """
 
 
-O1GlitchClassificationUpdateTableSubHeader = """
-This table contains infomration concerning the O1 glitch classification paper
-"""
-
 TrainingSetTableSubHeader = """
 In order to obtain the training set described here: https://dcc.ligo.org/LIGO-P1700227
 
@@ -93,10 +89,6 @@ There are three ways.
     * `/home/scott.coughlin/public_html/GravitySpy/TrainingSet2/H1L1`. I recommend this only in the short term because it will be deprecated very soon and the preferred way will be using the software above because it is robust, has correctly labelled samples, and will be automatically updated to reflect when the trainingset changes/improves. In this folder there are 22 folders that all contain the images for that class.
 """
 
-classificationsTableSubHeader = """
-This table contains information about classification performed by users before the addition of the two new classes. Therefore, columns such as `annotations_value_choiceINT` = 1 being a `Blip` is true in this DB but not in classificationsdev
-"""
-
 classificationsdevTableSubHeader = """
 This table contains information about classification performed by users after the addition of the two new classes. Therefore, columns such as `annotations_value_choiceINT` = 3 being a `Blip` is true in this DB but not in classifications. Versioning controls for this type of thing will be implemented in the near future
 """
@@ -113,9 +105,7 @@ userStatusTableSubHeader = """
 This table contains information about what level agiven user should be on.
 """
 
-TableSubHeaderDict = {'trainingset' : TrainingSetTableSubHeader,
-                      'O1GlitchClassificationUpdate' : O1GlitchClassificationUpdateTableSubHeader,
-                      'classifications' : classificationsTableSubHeader,
+TableSubHeaderDict = {'trainingsetv1d1' : TrainingSetTableSubHeader,
                       'classificationsdev' : classificationsdevTableSubHeader,
                       'glitches' : glitchesTableSubHeader,
                       'goldenimages' : goldenimagesTableSubHeader,
@@ -134,7 +124,7 @@ engine = create_engine('postgresql://{0}:{1}@gravityspy.ciera.northwestern.edu:5
 with open('DBs/index.rst', 'w') as f:
     f.write(BeginningText)
     for table in sorted(engine.table_names()):
-        if table not in ['goldenimages', 'classifications', 'O1GlitchClassificationUpdate', 'classificationsdev', 'userStatus', 'glitches', 'trainingset']:
+        if table not in ['goldenimages', 'classificationsdev', 'userStatus', 'glitches', 'trainingsetv1d1']:
             continue
 
         TableHeader = """

diff --git a/bin/trainmodel b/bin/trainmodel
@@ -1,37 +1,78 @@
 #!/usr/bin/env python
 
-import ConfigParser
-import optparse
+import argparse
+import os
 
 from matplotlib import use
 use('agg')
-from matplotlib import (pyplot as plt, cm)
 
-import gravityspy.ML.make_pickle_for_linux as make_pickle
 import gravityspy.ML.train_classifier as train_classifier
 
 # Definite Command line arguments here
 
 def parse_commandline():
     """Parse the options given on the command-line.
     """
-    parser = optparse.OptionParser()
-    parser.add_option("--path-to-golden", help="folder where labeled images live")
-    parser.add_option("--path-to-pickle", help="folder where pickled files will live")
-    parser.add_option("--path-to-trained-model", help="folder where the training model will live.")
-    parser.add_option("--batch-size", type=int, default=30,help="defines the batch size, 30 is a reasonable size")
-    parser.add_option("--nb-epoch", type=int, default=130,help="defines the number of iterations, 130 is reasonable. You can set it to 100 or below, if you have time concern for training.")
-    parser.add_option("--train-flag", type=int, default=0,help="a flag that shows all the golden set should be used for training the ML classifier, if 1, use training, validation and test set from golden set (ML experiments)")
-    parser.add_option("--number-of-classes", type=int, help="")
-    parser.add_option("--verbose", action="store_true", default=False,help="Run in Verbose Mode")
-    opts, args = parser.parse_args()
-
-
-    return opts
-
-# example script
-
-#THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 trainmodel --path-to-golden /home/scoughlin/public_html/GravitySpy/TrainingSet2/H1L1/ --path-to-pickle ./pickleddata/ --path-to-trained-model ./model --number-of-classes 22 --batch-size 500
-opts = parse_commandline()
-make_pickle.main(opts.path_to_golden,opts.path_to_pickle,0,opts.verbose)
-train_classifier.main(opts.batch_size,opts.nb_epoch,opts.train_flag,opts.path_to_pickle,opts.path_to_trained_model,opts.number_of_classes,opts.verbose)
+    parser = argparse.ArgumentParser(description=
+       "An examples commandline of how to obtain a model is given below: "
+       "THEANO_FLAGS=mode=FAST_RUN,device=cuda,floatX=float32 trainmodel "
+       "--path-to-trainingset='somedir' --number-of-classes='somenum'")
+    parser.add_argument("--path-to-trainingset",
+                        help="folder where labeled images live", required=True)
+    parser.add_argument("--number-of-classes", type=int,
+                        help="How many classes do you have", required=True)
+    parser.add_argument("--path-to-pickle",
+                        help="folder where the entire pickled training set "
+                             "will live. This pickle file should be read in "
+                             "by pandas", default='pickeleddata')
+    parser.add_argument("--path-to-trained-model",
+                        help="folder where the final trained model will go.",
+                        default='model')
+    parser.add_argument("--batch-size", type=int, default=30,
+                        help="defines the batch size, 30 is a reasonable size")
+    parser.add_argument("--nb-epoch", type=int, default=20,
+                        help="defines the number of iterations, "
+                        "130 is reasonable. You can set it to 100 or below, "
+                        "if you have time concern for training.")
+    parser.add_argument("--fraction-validation", type=float, default=0.125,
+                        help="Perentage of trianing set to save for validation")
+    parser.add_argument("--fraction-testing", type=float, default=0,
+                        help="Percentage of training set to save for testing")
+    parser.add_argument("--randomseed", type=int, default=1986,
+                        help="Set random seed")
+    parser.add_argument("--verbose", action="store_true", default=False,
+                        help="Run in Verbose Mode")
+    args = parser.parse_args()
+
+    return args
+
+# Parse commandline
+args = parse_commandline()
+
+# Pixelate and pickle the traiing set images
+train_classifier.pickle_trainingset(
+    path_to_trainingset=args.path_to_trainingset,
+    save_address=args.path_to_pickle,
+    verbose=args.verbose
+    )
+
+# Check if teting percentage is 0, set to None
+if not args.fraction_testing:
+    fraction_testing = None
+else:
+    fraction_testing = args.fraction_testing
+
+# Train model
+train_classifier.make_model(
+    data=os.path.join(args.path_to_pickle, 'trainingset.pkl'),
+    model_folder=args.path_to_trained_model,
+    batch_size=args.batch_size,
+    nb_epoch=args.nb_epoch,
+    nb_classes=args.number_of_classes,
+    fraction_validation=args.fraction_validation,
+    fraction_testing=fraction_testing,
+    best_model_based_validset=0,
+    image_size=[140, 170],
+    random_seed=args.randomseed,
+    verbose=True
+    )
diff --git a/bin/wscan b/bin/wscan
@@ -113,22 +113,27 @@ def main(inifile, eventTime, ID, outDir,
     alwaysPlotFlag = cp.getint('parameters', 'alwaysPlotFlag')
     sampleFrequency = cp.getint('parameters', 'sampleFrequency')
     blockTime = cp.getint('parameters', 'blockTime')
-    searchFrequencyRange = json.loads(cp.get('parameters', 'searchFrequencyRange'))
+    searchFrequencyRange = json.loads(cp.get('parameters',
+                                             'searchFrequencyRange'))
     searchQRange = json.loads(cp.get('parameters', 'searchQRange'))
-    searchMaximumEnergyLoss = cp.getfloat('parameters', 'searchMaximumEnergyLoss')
+    searchMaximumEnergyLoss = cp.getfloat('parameters',
+                                          'searchMaximumEnergyLoss')
     searchWindowDuration = cp.getfloat('parameters', 'searchWindowDuration')
     whiteNoiseFalseRate = cp.getfloat('parameters', 'whiteNoiseFalseRate')
     plotTimeRanges = json.loads(cp.get('parameters', 'plotTimeRanges'))
     plotFrequencyRange = json.loads(cp.get('parameters', 'plotFrequencyRange'))
-    plotNormalizedERange = json.loads(cp.get('parameters', 'plotNormalizedERange'))
+    plotNormalizedERange = json.loads(cp.get('parameters',
+                                             'plotNormalizedERange'))
     frameCacheFile = cp.get('channels', 'frameCacheFile')
     frameType = cp.get('channels', 'frameType')
     channelName = cp.get('channels', 'channelName')
     detectorName = channelName.split(':')[0]
     det = detectorName.split('1')[0]
 
-    logger.info('You have chosen the following Q range: {0}'.format(searchQRange))
-    logger.info('You have chosen the following search range: {0}'.format(searchFrequencyRange))
+    logger.info('You have chosen the following Q range: '
+                '{0}'.format(searchQRange))
+    logger.info('You have chosen the following search range: '
+                '{0}'.format(searchFrequencyRange))
 
     ###########################################################################
     #                           create output directory                       #
@@ -138,7 +143,7 @@ def main(inifile, eventTime, ID, outDir,
     if outDir is None:
         outDirtmp = './scans'
     else:
-        outDirtmp = outDir + '/' + ID + '/' + ID
+        outDirtmp = os.path.join(outDir, ID, ID)
     outDirtmp += '/'
 
     # report status
@@ -190,32 +195,53 @@ def main(inifile, eventTime, ID, outDir,
         durForPlot = iTimeWindow/2
         try:
             outseg = Segment(centerTime - durForPlot, centerTime + durForPlot)
-            qScan = data.q_transform(qrange=tuple(searchQRange), frange=tuple(searchFrequencyRange),
+            qScan = data.q_transform(qrange=tuple(searchQRange),
+                                 frange=tuple(searchFrequencyRange),
                                  gps=centerTime, search=0.5, tres=0.002,
                                  fres=0.5, outseg=outseg, whiten=True)
             qValue = qScan.q
-            qScan = qScan.crop(centerTime-iTimeWindow/2, centerTime+iTimeWindow/2)
+            qScan = qScan.crop(centerTime-iTimeWindow/2,
+                               centerTime+iTimeWindow/2)
         except:
-            outseg = Segment(centerTime - 2*durForPlot, centerTime + 2*durForPlot)
-            qScan = data.q_transform(qrange=tuple(searchQRange), frange=tuple(searchFrequencyRange),
+            outseg = Segment(centerTime - 2*durForPlot,
+                             centerTime + 2*durForPlot)
+            qScan = data.q_transform(qrange=tuple(searchQRange),
+                                 frange=tuple(searchFrequencyRange),
                                  gps=centerTime, search=0.5, tres=0.002,
                                  fres=0.5, outseg=outseg, whiten=True)
             qValue = qScan.q
-            qScan = qScan.crop(centerTime-iTimeWindow/2, centerTime+iTimeWindow/2)
+            qScan = qScan.crop(centerTime-iTimeWindow/2,
+                               centerTime+iTimeWindow/2)
         specsgrams.append(qScan)
 
     logger.info('The most significant q value is {0}'.format(qValue))
 
     # Plot q_scans
     logger.info('Plotting q scans...')
-    plot_qtransform(specsgrams, plotNormalizedERange, plotTimeRanges, detectorName, startTime, outDirtmp, IDstring)
+    indFigAll, superFig = plot_qtransform(specsgrams,
+                          plotNormalizedERange, plotTimeRanges,
+                          detectorName, startTime)
+
+    for idx, indFig in enumerate(indFigAll):
+        dur = float(plotTimeRanges[idx])
+        indFig.save(os.path.join(
+                                 outDirtmp,
+                                 detectorName + '_' + IDstring
+                                 + '_spectrogram_' + str(dur) +'.png'
+                                )
+                   )
+
+    superFig.save(os.path.join(outDirtmp, IDstring + '.png'),
+                  bbox_inches='tight')
 
     if runML:
         # load the API gravityspy project cached class
         gspyproject = GravitySpyProject.load_project_from_cache('1104.pkl')
-        # Since we created the images in a special temporary directory we can run os.listdir to get there full
+        # Since we created the images in a
+        # special temporary directory we can run os.listdir to get there full
         # names so we can convert the images into ML readable format.
-        list_of_images = [ifile for ifile in os.listdir(outDirtmp) if 'spectrogram' in ifile]
+        list_of_images = [ifile for ifile in os.listdir(outDirtmp) 
+                          if 'spectrogram' in ifile]
 
         logger.info('Converting image to ML readable...')
         image_dataDF = pd.DataFrame()
@@ -274,11 +300,14 @@ def main(inifile, eventTime, ID, outDir,
         classes = sorted(workflowDictSubjectSets['2117'].keys())
 
         # Add on columns that are Gravity Spy specific
-        classes.extend(["uniqueID","Label","workflow","subjectset","Filename1","Filename2","Filename3","Filename4","UploadFlag", "qvalue"])
+        classes.extend(["uniqueID", "Label", "workflow", "subjectset",
+                        "Filename1", "Filename2", "Filename3", "Filename4",
+                        "UploadFlag", "qvalue"])
 
         # Determine label
         Label = classes[MLlabel]
-        logger.info('This image has received the following label: {0} with {1} percent confidence'.format(Label, confidence))
+        logger.info('This image has received the following label: '
+                    '{0} with {1} percent confidence'.format(Label, confidence))
 
         # determine confidence values from ML
         scores = scores[0].tolist()
@@ -335,8 +364,9 @@ def main(inifile, eventTime, ID, outDir,
 
         system_call = "mv {0}*.png {1}".format(outDirtmp, finalPath)
         os.system(system_call)
-        shutil.rmtree(outDir + ID)
+        shutil.rmtree(os.path.join(outDir, ID))
 
 if __name__ == '__main__':
     args = parse_commandline()
-    main(args.inifile, args.eventTime, args.ID, args.outDir, args.pathToModel, args.uniqueID, args.runML, args.HDF5, args.PostgreSQL, args.verbose)
+    main(args.inifile, args.eventTime, args.ID, args.outDir, args.pathToModel,
+         args.uniqueID, args.runML, args.HDF5, args.PostgreSQL, args.verbose)