Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

We’re showing branches in this repository, but you can also compare across forks.

base fork: SciLifeLab/bcbb
base: 55d5d632ed
...
head fork: SciLifeLab/bcbb
compare: 31a8bb6d97
  • 3 commits
  • 6 files changed
  • 0 commit comments
  • 1 contributor
1  nextgen/bcbio/google/__init__.py
View
@@ -17,6 +17,7 @@ def _to_unicode(str, encoding='utf-8'):
str = unicode(str, encoding)
return str
+
def get_credentials(config):
"""Get the encoded credentials specified in the post process configuration file"""
29 nextgen/bcbio/google/bc_metrics.py
View
@@ -55,7 +55,7 @@ def _create_header(header, columns):
return names
-def get_spreadsheet(ssheet_title,encoded_credentials):
+def get_spreadsheet(ssheet_title, encoded_credentials):
"""Connect to Google docs and get a spreadsheet"""
# Convert the spreadsheet title to unicode
@@ -71,17 +71,17 @@ def get_spreadsheet(ssheet_title,encoded_credentials):
# Check that we got a result back
if not ssheet:
logger2.warn("No document with specified title '%s' found in GoogleDocs repository" % ssheet_title)
- return (None,None)
-
- return (client,ssheet)
-
+ return (None, None)
+
+ return (client, ssheet)
+
def _write_project_report_to_gdocs(client, ssheet, flowcell):
# Get the spreadsheet if it exists
# Otherwise, create it
- wsheet_title = "%s_%s" % (flowcell.get_fc_date(),flowcell.get_fc_name())
-
+ wsheet_title = "%s_%s" % (flowcell.get_fc_date(), flowcell.get_fc_name())
+
# Flatten the project_data structure into a list
samples = {}
for sample in flowcell.get_samples():
@@ -89,23 +89,23 @@ def _write_project_report_to_gdocs(client, ssheet, flowcell):
samples[sample.get_name()].add_sample(sample)
else:
samples[sample.get_name()] = sample
-
+
rows = []
for sample in samples.values():
row = (sample.get_name(),wsheet_title,sample.get_lane(),sample.get_read_count(),sample.get_rounded_read_count(),sample.get_comment(),"")
rows.append(row)
-
+
# Write the data to the worksheet
return _write_to_worksheet(client,ssheet,wsheet_title,rows,[col_header[0] for col_header in SEQUENCING_RESULT_HEADER],False)
def _write_project_report_summary_to_gdocs(client, ssheet):
"""Summarize the data from the worksheets and write them to a "Summary" worksheet"""
-
+
# Summary data
flowcells = {}
samples = {}
# Get the list of worksheets in the spreadsheet
- wsheet_feed = bcbio.google.spreadsheet.get_worksheets_feed(client,ssheet)
+ wsheet_feed = bcbio.google.spreadsheet.get_worksheets_feed(client, ssheet)
# Loop over the worksheets and parse the data from the ones that contain flowcell data
for wsheet in wsheet_feed.entry:
wsheet_title = wsheet.title.text
@@ -143,14 +143,15 @@ def _write_project_report_summary_to_gdocs(client, ssheet):
# Write the data to the worksheet
return _write_to_worksheet(client,ssheet,wsheet_title,rows,[col_header[0] for col_header in SEQUENCING_RESULT_HEADER],False)
+
def write_run_report_to_gdocs(fc, fc_date, fc_name, ssheet_title, encoded_credentials, wsheet_title=None, append=False, split_project=False):
"""Upload the barcode read distribution for a run to google docs"""
-
+
# Connect to google and get the spreadsheet
- client, ssheet = get_spreadsheet(ssheet_title,encoded_credentials)
+ client, ssheet = get_spreadsheet(ssheet_title, encoded_credentials)
if not client or not ssheet:
return False
-
+
# Get the projects in the run
projects = fc.get_project_names()
logger2.info("Will write data from the run %s_%s for projects: '%s'" % (fc_date,fc_name,"', '".join(projects)))
23 nextgen/bcbio/google/connection.py
View
@@ -1,26 +1,29 @@
#!/usr/bin/env python
-"""Wrapper functions around the python gdata api for connecting and authenticating with the Google Docs service"""
+"""Wrapper functions around the python gdata api for
+connecting and authenticating with the Google Docs service.
+"""
import base64
-def authenticate(client,credentials):
-
- login,pwd = _decode_credentials(credentials)
+
+def authenticate(client, credentials):
+
+ login, pwd = _decode_credentials(credentials)
if not login or not pwd:
return False
-
+
client.email = login
client.password = pwd
client.source = 'bcbb_nextgen_pipeline'
client.ProgrammaticLogin()
-
+
return True
+
def _decode_credentials(credentials):
-
+
if not credentials:
return None
-
+
# Split the username and password
- return base64.b64decode(credentials).split(':',1);
-
+ return base64.b64decode(credentials).split(':', 1)
9 nextgen/bcbio/google/document.py
View
@@ -46,15 +46,18 @@ def get_client(encoded_credentials=None):
authenticate(client,encoded_credentials)
return client
-
-def get_folder(client,folder_name):
+
+
+def get_folder(client, folder_name):
"""Get a folder if it exists"""
q = gdata.docs.service.DocumentQuery(categories=['folder'], params={'showfolders': 'true'})
for entry in (client.Query(q.ToUri()).entry or []):
if entry.title.text == folder_name:
return entry
+
return None
-
+
+
def move_to_folder(client,doc,folder):
"""Move a document into the supplied folder"""
moved_doc = client.MoveIntoFolder(doc,folder)
80 nextgen/bcbio/google/sequencing_report.py
View
@@ -1,12 +1,11 @@
-"""
-Create reports on google docs
+"""Create reports on google docs
"""
import copy
import logbook
import time
import yaml
-from bcbio.google import (_from_unicode,_to_unicode,get_credentials)
+from bcbio.google import (_from_unicode, _to_unicode, get_credentials)
import bcbio.google.bc_metrics
import bcbio.google.qc_metrics
from bcbio.pipeline.qcsummary import RTAQCMetrics
@@ -14,7 +13,8 @@
from bcbio.log import create_log_handler
from bcbio.log import logger2 as log
-def create_report_on_gdocs(fc_date,fc_name,run_info_yaml,dirs,config):
+
+def create_report_on_gdocs(fc_date, fc_name, run_info_yaml, dirs, config):
"""
Create reports on gdocs containing both demultiplexed read counts and QC data
"""
@@ -22,9 +22,9 @@ def create_report_on_gdocs(fc_date,fc_name,run_info_yaml,dirs,config):
success = True
try:
# Parse the run_info.yaml file
- with open(run_info_yaml,"r") as fh:
+ with open(run_info_yaml, "r") as fh:
run_info = yaml.load(fh)
-
+
# Get the gdocs account credentials
encoded_credentials = get_credentials(config)
if not encoded_credentials:
@@ -32,8 +32,8 @@ def create_report_on_gdocs(fc_date,fc_name,run_info_yaml,dirs,config):
return False
# Get the required parameters from the post_process.yaml configuration file
- gdocs = config.get("gdocs_upload",None)
-
+ gdocs = config.get("gdocs_upload", None)
+
# Add email notification
email = gdocs.get("gdocs_email_notification",None)
smtp_host = config.get("smtp_host","")
@@ -44,22 +44,22 @@ def create_report_on_gdocs(fc_date,fc_name,run_info_yaml,dirs,config):
# Inject the fc_date and fc_name in the email subject
with logbook.Processor(lambda record: record.extra.__setitem__('run', "%s_%s" % (fc_date,fc_name))):
-
+
try:
- log.info("Started creating sequencing report on Google docs for %s_%s on %s" % (fc_date,fc_name,time.strftime("%x @ %X")))
-
- # Get a flowcell object
- fc = Flowcell(fc_name,fc_date,run_info,dirs.get("work",None))
+ log.info("Started creating sequencing report on Google docs for %s_%s on %s" % (fc_date, fc_name, time.strftime("%x @ %X")))
+
+ # Get a flowcell object
+ fc = Flowcell(fc_name, fc_date, run_info, dirs.get("work", None))
# Get the GDocs demultiplex result file title
- gdocs_dmplx_spreadsheet = gdocs.get("gdocs_dmplx_file",None)
+ gdocs_dmplx_spreadsheet = gdocs.get("gdocs_dmplx_file", None)
# Get the GDocs QC file title
- gdocs_qc_spreadsheet = gdocs.get("gdocs_qc_file",None)
-
+ gdocs_qc_spreadsheet = gdocs.get("gdocs_qc_file", None)
+
# FIXME: Make the bc stuff use the Flowcell module
- if gdocs_dmplx_spreadsheet is not None:
+ if gdocs_dmplx_spreadsheet is not None:
# Upload the data
- success &= bcbio.google.bc_metrics.write_run_report_to_gdocs(fc, fc_date, fc_name, gdocs_dmplx_spreadsheet, encoded_credentials)
+ success &= bcbio.google.bc_metrics.write_run_report_to_gdocs(fc, fc_date, fc_name, gdocs_dmplx_spreadsheet, encoded_credentials)
else:
log.warn("Could not find Google Docs demultiplex results file title in configuration. No demultiplex counts were written to Google Docs for %s_%s" % (fc_date,fc_name))
@@ -76,11 +76,12 @@ def create_report_on_gdocs(fc_date,fc_name,run_info_yaml,dirs,config):
# Get the projects parent folder
projects_folder = gdocs.get("gdocs_projects_folder",None)
-
+
# Write the bc project summary report
if projects_folder is not None:
- success &= create_project_report_on_gdocs(fc,qc,encoded_credentials,projects_folder)
-
+ success &= create_project_report_on_gdocs(fc, qc, \
+ encoded_credentials, projects_folder)
+
except Exception as e:
success = False
raise
@@ -93,38 +94,43 @@ def create_report_on_gdocs(fc_date,fc_name,run_info_yaml,dirs,config):
except Exception as e:
success = False
log.warn("Encountered exception when writing sequencing report to Google Docs: %s" % e)
-
+
return success
-
-def create_project_report_on_gdocs(fc,qc,encoded_credentials,gdocs_folder):
+
+def create_project_report_on_gdocs(fc, qc, encoded_credentials, gdocs_folder):
"""Upload the sample read distribution for a project to google docs"""
-
+
success = True
-
+
# Create a client class which will make HTTP requests with Google Docs server.
client = bcbio.google.spreadsheet.get_client(encoded_credentials)
doc_client = bcbio.google.document.get_client(encoded_credentials)
-
+
# Get a reference to the parent folder
- parent_folder = bcbio.google.document.get_folder(doc_client,gdocs_folder)
-
+ parent_folder = bcbio.google.document.get_folder(doc_client, gdocs_folder)
+
+ if not parent_folder:
+ parent_folder_title = "root directory"
+ else:
+ parent_folder_title = _from_unicode(parent_folder.title.text)
+
# Loop over the projects
for project_name in fc.get_project_names():
-
+
# Get a flowcell object containing just the data for the project
project_fc = fc.prune_to_project(project_name)
-
+
folder_name = project_name
- folder = bcbio.google.document.get_folder(doc_client,folder_name)
+ folder = bcbio.google.document.get_folder(doc_client, folder_name)
if not folder:
- folder = bcbio.google.document.add_folder(doc_client,folder_name,parent_folder)
- log.info("Folder '%s' created under '%s'" % (_from_unicode(folder_name),_from_unicode(parent_folder.title.text)))
-
+ folder = bcbio.google.document.add_folder(doc_client, folder_name, parent_folder)
+ log.info("Folder '%s' created under '%s'" % (_from_unicode(folder_name), parent_folder_title))
+
ssheet_title = project_name + "_sequencing_results"
- ssheet = bcbio.google.spreadsheet.get_spreadsheet(client,ssheet_title)
+ ssheet = bcbio.google.spreadsheet.get_spreadsheet(client, ssheet_title)
if not ssheet:
- bcbio.google.document.add_spreadsheet(doc_client,ssheet_title)
+ bcbio.google.document.add_spreadsheet(doc_client, ssheet_title)
ssheet = bcbio.google.spreadsheet.get_spreadsheet(client,ssheet_title)
ssheet = bcbio.google.document.move_to_folder(doc_client,ssheet,folder)
ssheet = bcbio.google.spreadsheet.get_spreadsheet(client,ssheet_title)
71 nextgen/bcbio/google/spreadsheet.py
View
@@ -4,32 +4,37 @@
import gdata.spreadsheet.service
import gdata.docs.service
from bcbio.google.connection import authenticate
-from bcbio.google import (_from_unicode,_to_unicode)
+from bcbio.google import (_from_unicode, _to_unicode)
-def add_worksheet(client,ssheet,title,rows=0,cols=0,append=False):
- """Add a new worksheet with the specified title to the specified spreadsheet.
+
+def add_worksheet(client, ssheet, title, rows=0, cols=0, append=False):
+ """Add a new worksheet with the specified title to the specified spreadsheet.
Will overwrite an existing worksheet with the same title unless append is True
"""
# Check if a worksheet with the same title exists
- ws = get_worksheet(client,ssheet,title)
+ ws = get_worksheet(client, ssheet, title)
if ws:
# If we're appending, just return the first object in the feed
if append:
return ws
-
+
# Otherwise, drop the existing worksheet
client.DeleteWorksheet(ws)
-
+
# Add the desired worksheet
- return client.AddWorksheet(_to_unicode(title),rows,cols,get_key(ssheet))
+ return client.AddWorksheet(_to_unicode(title), rows, cols, get_key(ssheet))
+
def column_count(wsheet):
"""Get the number of columns in the worksheet"""
return int(wsheet.col_count.text)
+
def get_cell_content(client, ssheet, wsheet, row_start=0, col_start=0, row_end=0, col_end=0):
- """Get the text contents of the cells from the supplied spreadsheet and worksheet and from the specified cell range as a two-dimensional list"""
-
+ """Get the text contents of the cells from the supplied spreadsheet and
+ worksheet and from the specified cell range as a two-dimensional list.
+ """
+
if str(row_start) == '0':
row_start = '1'
if str(col_start) == '0':
@@ -38,27 +43,27 @@ def get_cell_content(client, ssheet, wsheet, row_start=0, col_start=0, row_end=0
row_end = str(row_count(wsheet))
if str(col_end) == '0':
col_end = str(column_count(wsheet))
-
- feed = (get_cell_feed(client,ssheet,wsheet,row_start,col_start,row_end,col_end) or [])
-
+
+ feed = (get_cell_feed(client, ssheet, wsheet, row_start, col_start, row_end, col_end) or [])
+
# Get the dimensions of the 2D-list
rows = int(row_end) - int(row_start) + 1
cols = int(col_end) - int(col_start) + 1
content = []
for i, cell in enumerate(feed.entry):
- r = i//cols
- c = i - r*cols
+ r = i // cols
+ c = i - r * cols
if c == 0:
row = []
content.append(row)
row.append(_to_unicode((cell.content.text or "")))
-
+
return content
-
+
def get_cell_feed(client, ssheet, wsheet, row_start=0, col_start=0, row_end=0, col_end=0):
"""Get a cell feed from the supplied spreadsheet and worksheet and from the specified cell range"""
-
+
if str(row_start) == '0':
row_start = '1'
if str(col_start) == '0':
@@ -67,7 +72,7 @@ def get_cell_feed(client, ssheet, wsheet, row_start=0, col_start=0, row_end=0, c
row_end = str(row_count(wsheet))
if str(col_end) == '0':
col_end = str(column_count(wsheet))
-
+
p = {'min-row': str(row_start),
'min-col': str(col_start),
'max-row': str(row_end),
@@ -76,7 +81,8 @@ def get_cell_feed(client, ssheet, wsheet, row_start=0, col_start=0, row_end=0, c
}
query = gdata.spreadsheet.service.CellQuery(params=p)
return client.GetCellsFeed(get_key(ssheet),get_key(wsheet),query=query)
-
+
+
def get_client(encoded_credentials=None):
"""Get a SpreadsheetsService client"""
# Create a client class which will make HTTP requests with Google Docs server.
@@ -84,15 +90,17 @@ def get_client(encoded_credentials=None):
# If credentials were supplied, authenticate the client as well
if encoded_credentials:
authenticate(client,encoded_credentials)
-
+
return client
+
def get_column(client, ssheet, wsheet, column, constraint={}):
"""Get the content of a specified column, optionally filtering on other columns"""
-
+
values = get_rows_columns_with_constraint(client,ssheet,wsheet,[column],constraint)
return [row[0] for row in values]
-
+
+
def get_column_index(client,ssheet,wsheet,name):
"""Get the index of the column with the specified name, or 0 if no column matches"""
@@ -102,16 +110,19 @@ def get_column_index(client,ssheet,wsheet,name):
return int(i+1)
return 0
+
def get_header(client, ssheet, wsheet):
"""Return the column header of the supplied worksheet as a list"""
header = get_row(client,ssheet,wsheet,1)
return header
+
def get_key(object):
"""Get the unique gdocs key identifier for the supplied object"""
return object.id.text.split('/')[-1]
+
def _get_query(title,exact_match):
"""Get a query object for the supplied parameters"""
@@ -126,12 +137,14 @@ def _get_query(title,exact_match):
return gdata.spreadsheet.service.DocumentQuery(params=p)
+
def get_row(client, ssheet, wsheet, row):
"""Get the content of a specified row index"""
content = (get_cell_content(client, ssheet, wsheet, row, 0, row, 0) or [[]])
return content[0]
+
def get_rows_columns_with_constraint(client, ssheet, wsheet, columns, constraint={}):
"""Get the content of specified columns from the rows filtered by some column values"""
@@ -154,7 +167,8 @@ def get_rows_columns_with_constraint(client, ssheet, wsheet, columns, constraint
values.append([row[i] for i in column_indexes])
return values
-
+
+
def get_rows_with_constraint(client, ssheet, wsheet, constraint={}):
"""Get the content of the rows filtered by some column values"""
@@ -175,7 +189,8 @@ def get_rows_with_constraint(client, ssheet, wsheet, constraint={}):
content.append(row)
return content
-
+
+
def get_spreadsheet(client,title):
"""Get an exact match for a spreadsheet"""
feed = get_spreadsheets_feed(client,title,True)
@@ -183,6 +198,7 @@ def get_spreadsheet(client,title):
return None
return feed.entry[0]
+
def get_spreadsheets_feed(client, title=None, exact_match=False):
"""Get a feed of all available spreadsheets, optionally restricted by title"""
@@ -191,6 +207,7 @@ def get_spreadsheets_feed(client, title=None, exact_match=False):
# Query the server for an Atom feed containing a list of your documents.
return client.GetSpreadsheetsFeed(query=q)
+
def get_worksheet(client,ssheet,title):
"""Get an exact match for a worksheet within a spreadsheet"""
feed = get_worksheets_feed(client,ssheet,title,True)
@@ -198,6 +215,7 @@ def get_worksheet(client,ssheet,title):
return None
return feed.entry[0]
+
def get_worksheets_feed(client, ssheet, title=None, exact_match=False):
"""Get a feed of all worksheets in the supplied spreadsheet, optionally restricted by title"""
@@ -208,10 +226,12 @@ def get_worksheets_feed(client, ssheet, title=None, exact_match=False):
# Query the server for an Atom feed containing a list of your documents.
return client.GetWorksheetsFeed(key=k,query=q)
+
def row_count(wsheet):
"""Get the number of rows in the worksheet"""
return int(wsheet.row_count.text)
+
def write_rows(client,ssheet,wsheet,header,rows):
"""Write the supplied data rows to the worksheet, using the supplied column headers"""
@@ -238,6 +258,3 @@ def write_rows(client,ssheet,wsheet,header,rows):
return False
return True
-
-
-

No commit comments for this range

Something went wrong with that request. Please try again.