Skip to content

Commit

Permalink
DenseBoW.select bug
Browse files Browse the repository at this point in the history
  • Loading branch information
mgraffg committed Jul 24, 2023
1 parent 1659d49 commit 77ca14b
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 2 deletions.
2 changes: 1 addition & 1 deletion EvoMSA/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__version__ = '1.9.9'
__version__ = '1.9.10'

try:
from EvoMSA.text_repr import BoW, TextRepresentations, StackGeneralization, DenseBoW
Expand Down
20 changes: 19 additions & 1 deletion EvoMSA/tests/test_text_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,4 +449,22 @@ def test_DenseBoW_extend2():
def test_DenseBoW_dataset():
from EvoMSA.text_repr import DenseBoW
dense = DenseBoW(lang='it', emoji=False, keyword=False)




def test_DenseBoW_select_bug():
from EvoMSA.text_repr import DenseBoW
from EvoMSA.utils import MICROTC
D = list(tweet_iterator(TWEETS))
pos = [x for x in D if x['klass'] == 'P']
neg = [x for x in D if x['klass'] == 'N']
lang = 'es'
name = 'emojis'
func = 'most_common_by_type'
d = 13
text_repr = DenseBoW(lang=lang,
keyword=False,
voc_size_exponent=13,
emoji=True, dataset=False,
n_jobs=-1)
text_repr.select(D=pos + neg[:1]).fit(D)
2 changes: 2 additions & 0 deletions EvoMSA/text_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -678,6 +678,8 @@ def select(self, subset: Union[list, None]=None,
"""
assert subset is not None or D is not None
if subset is not None:
if len(subset) == 0:
return self
tr = self.text_representations
self.text_representations = [tr[i] for i in subset]
names = self.names
Expand Down

0 comments on commit 77ca14b

Please sign in to comment.