fixed issue #31

JonnyTran · Feb 17, 2021 · 2e89102 · 2e89102
1 parent 593975a
commit 2e89102
Show file tree

Hide file tree

Showing 3 changed files with 7 additions and 154 deletions.
diff --git a/openomics/database/ontology.py b/openomics/database/ontology.py
@@ -13,7 +13,7 @@ class Ontology(Dataset):
 
     def __init__(self, path, file_resources=None, col_rename=None, npartitions=0, verbose=False):
         """
-        Manages dataset input processing from tables and construct an ontology network from obo file. There ontology
+        Manages dataset input processing from tables and construct an ontology network from .obo file. There ontology
         network is G(V,E) where there exists e_ij for child i to parent j to present "node i is_a node j".
 
         Args:

diff --git a/openomics/utils/GTF.py b/openomics/utils/GTF.py
diff --git a/openomics/utils/read_gtf.py b/openomics/utils/read_gtf.py
@@ -16,7 +16,6 @@
 from collections import OrderedDict
 from os.path import exists
 
-import dask
 import dask.dataframe as dd
 import numpy as np
 import pandas as pd
@@ -292,16 +291,16 @@ def parse_gtf_and_expand_attributes(filepath_or_buffer, npartitions=None, compre
         features (set or None): Ignore entries which don't correspond to one of the supplied features
     """
     if npartitions:
-        ddf = parse_gtf_dask(filepath_or_buffer, npartitions=npartitions, compression=compression, features=features)
-        ddf = ddf.reset_index(drop=False)
-        ddf = ddf.set_index("index")
+        df = parse_gtf_dask(filepath_or_buffer, npartitions=npartitions, compression=compression, features=features)
+        df = df.reset_index(drop=False)
+        df = df.set_index("index")
 
-        attribute_values = ddf.pop("attribute")
+        attribute_values = df.pop("attribute")
 
         for column_name, values in expand_attribute_strings(attribute_values,
                                                             usecols=restrict_attribute_columns).items():
             series = dd.from_array(np.array(values, dtype=np.str))
-            ddf[column_name] = series
+            df[column_name] = series
     else:
         df = parse_gtf(filepath_or_buffer, chunksize=chunksize, features=features)
 
@@ -311,7 +310,7 @@ def parse_gtf_and_expand_attributes(filepath_or_buffer, npartitions=None, compre
                                                             usecols=restrict_attribute_columns).items():
             df[column_name] = values
 
-    return ddf
+    return df
 
 
 def read_gtf(filepath_or_buffer, npartitions=None, compression=None, expand_attribute_column=True,