Skip to content

Commit

Permalink
fixed issue #31
Browse files Browse the repository at this point in the history
  • Loading branch information
JonnyTran committed Feb 17, 2021
1 parent 593975a commit 2e89102
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 154 deletions.
2 changes: 1 addition & 1 deletion openomics/database/ontology.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class Ontology(Dataset):

def __init__(self, path, file_resources=None, col_rename=None, npartitions=0, verbose=False):
"""
Manages dataset input processing from tables and construct an ontology network from obo file. There ontology
Manages dataset input processing from tables and construct an ontology network from .obo file. There ontology
network is G(V,E) where there exists e_ij for child i to parent j to present "node i is_a node j".
Args:
Expand Down
146 changes: 0 additions & 146 deletions openomics/utils/GTF.py

This file was deleted.

13 changes: 6 additions & 7 deletions openomics/utils/read_gtf.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from collections import OrderedDict
from os.path import exists

import dask
import dask.dataframe as dd
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -292,16 +291,16 @@ def parse_gtf_and_expand_attributes(filepath_or_buffer, npartitions=None, compre
features (set or None): Ignore entries which don't correspond to one of the supplied features
"""
if npartitions:
ddf = parse_gtf_dask(filepath_or_buffer, npartitions=npartitions, compression=compression, features=features)
ddf = ddf.reset_index(drop=False)
ddf = ddf.set_index("index")
df = parse_gtf_dask(filepath_or_buffer, npartitions=npartitions, compression=compression, features=features)
df = df.reset_index(drop=False)
df = df.set_index("index")

attribute_values = ddf.pop("attribute")
attribute_values = df.pop("attribute")

for column_name, values in expand_attribute_strings(attribute_values,
usecols=restrict_attribute_columns).items():
series = dd.from_array(np.array(values, dtype=np.str))
ddf[column_name] = series
df[column_name] = series
else:
df = parse_gtf(filepath_or_buffer, chunksize=chunksize, features=features)

Expand All @@ -311,7 +310,7 @@ def parse_gtf_and_expand_attributes(filepath_or_buffer, npartitions=None, compre
usecols=restrict_attribute_columns).items():
df[column_name] = values

return ddf
return df


def read_gtf(filepath_or_buffer, npartitions=None, compression=None, expand_attribute_column=True,
Expand Down

0 comments on commit 2e89102

Please sign in to comment.