In [None]:
import featuretools as ft
from featuretools.primitives import TransformPrimitive
from featuretools.variable_types import NaturalLanguage, Numeric

In [None]:
class StringCount(TransformPrimitive):
    '''Count the number of times the string value occurs.'''
    name = 'string_count'
    input_types = [NaturalLanguage]
    return_type = Numeric

    def __init__(self, string=None):
        self.string = string

    def get_function(self):
        def string_count(column):
            assert self.string is not None, "string to count needs to be defined"
            # this is a naive implementation used for clarity
            counts = [text.lower().count(self.string) for text in column]
            return counts

        return string_count

In [None]:
from featuretools.tests.testing_utils import make_ecommerce_entityset

es = make_ecommerce_entityset()

feature_matrix, features = ft.dfs(
    entityset=es,
    target_entity="sessions",
    agg_primitives=["sum", "mean", "std"],
    trans_primitives=[StringCount(string="the")],
)

feature_matrix[[
    'STD(log.STRING_COUNT(comments, string=the))',
    'SUM(log.STRING_COUNT(comments, string=the))',
    'MEAN(log.STRING_COUNT(comments, string=the))',
]]

In [None]:
import featuretools as ft
import numpy as np
import re
from featuretools.primitives import make_trans_primitive
from featuretools.variable_types import NaturalLanguage, Numeric

In [None]:
def case_count(array):
    '''Return the count of upper case and lower case letters in text'''
    # this is a naive implementation used for clarity
    upper = np.array([len(re.findall('[A-Z]', i)) for i in array])
    lower = np.array([len(re.findall('[a-z]', i)) for i in array])
    ret = [upper, lower]
    return ret

In [None]:
CaseCount = make_trans_primitive(
    function=case_count,
    input_types=[NaturalLanguage],
    return_type=Numeric,
    number_output_features=2,
)

es = make_ecommerce_entityset()

In [None]:
feature_matrix, features = ft.dfs(
    entityset=es,
    target_entity="sessions",
    agg_primitives=[],
    trans_primitives=[CaseCount],
)

feature_matrix[[
    'customers.CASE_COUNT(favorite_quote)[0]',
    'customers.CASE_COUNT(favorite_quote)[1]',
]]