Skip to content

Commit

Permalink
Merge pull request #75 from AntoineSimoulin/patch-5
Browse files Browse the repository at this point in the history
馃敡 Add top n extensions as class param
  • Loading branch information
farthur committed Dec 3, 2020
2 parents 53d7f99 + 43fdf70 commit 48c631a
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions melusine/prepare_email/metadata_engineering.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@ class MetaExtension(BaseEstimator, TransformerMixin):
Compatible with scikit-learn API.
"""

def __init__(self):
def __init__(self, topn_extension=100):
self.le_extension = preprocessing.LabelEncoder()
self.topn_extension = topn_extension

def fit(self, X, y=None):

Expand All @@ -27,7 +28,7 @@ def fit(self, X, y=None):

""" Fit LabelEncoder on encoded extensions."""
X["extension"] = X.apply(self.get_extension, axis=1)
self.top_extension = self.get_top_extension(X, n=100)
self.top_extension = self.get_top_extension(X, n=self.topn_extension)
X["extension"] = X.apply(
self.encode_extension, args=(self.top_extension,), axis=1
)
Expand Down Expand Up @@ -287,8 +288,9 @@ class MetaAttachmentType(BaseEstimator, TransformerMixin):
Compatible with scikit-learn API.
"""

def __init__(self):
def __init__(self, topn_extension=100):
self.le_extension = preprocessing.LabelEncoder()
self.topn_extension = topn_extension

def fit(self, X, y=None):

Expand All @@ -299,7 +301,7 @@ def fit(self, X, y=None):

""" Fit LabelEncoder on encoded extensions."""
X["attachment_type"] = X.apply(self.get_attachment_type, axis=1)
self.top_attachment_type = self.get_top_attachment_type(X, n=100)
self.top_attachment_type = self.get_top_attachment_type(X, n=self.topn_extension)
X["attachment_type"] = X.apply(
self.encode_type, args=(self.top_attachment_type,), axis=1
)
Expand Down

0 comments on commit 48c631a

Please sign in to comment.