# **Text Classification**

### **Dataset: CLINC 150**

In [None]:
text_train_file = "clinc_train.csv"
text_test_file = "clinc_test.csv"

with open(text_train_file) as f:
    for _ in range(5):
        print(next(f))

### **Define the model**

In [None]:
from thirdai import bolt

text_model = bolt.UniversalDeepTransformer(
    data_types={
        "text": bolt.types.text(force_atttention=True),
        "label": bolt.types.categorical()
    },
    target="label"
)

### **Train and evaluate**

In [None]:
train_config = (bolt.TrainConfig(epochs=3, learning_rate=0.0001)
                    .with_metrics(["categorical_accuracy"]))

text_model.train(text_train_file, train_config)

test_config = (bolt.EvalConfig()
                   .with_metrics(["categorical_accuracy"]))

text_model.evaluate(text_test_file, test_config)

# **Tabular Classification**

### **Dataset: Census Income**

In [None]:
tabular_train_file = "census_train.csv"
tabular_test_file = "census_test.csv"

with open(tabular_train_file) as f:
    for _ in range(5):
        print(next(f))

In [None]:
from thirdai import bolt

tabular_model = bolt.UniversalDeepTransformer(
    data_types={
        "age": bolt.types.numerical(range=(17, 90)),
        "workclass": bolt.types.categorical(n_unique_classes=9),
        "fnlwgt": bolt.types.numerical(range=(12285, 1484705)),
        "education": bolt.types.categorical(n_unique_classes=16),
        "education-num": bolt.types.categorical(n_unique_classes=16),
        "marital-status": bolt.types.categorical(n_unique_classes=7),
        "occupation": bolt.types.categorical(n_unique_classes=15),
        "relationship": bolt.types.categorical(n_unique_classes=6),
        "race": bolt.types.categorical(n_unique_classes=5),
        "sex": bolt.types.categorical(n_unique_classes=2),
        "capital-gain": bolt.types.numerical(range=(0, 99999)),
        "capital-loss": bolt.types.numerical(range=(0, 4356)),
        "hours-per-week": bolt.types.numerical(range=(1, 99)),
        "native-country": bolt.types.categorical(n_unique_classes=42),
        "label": bolt.types.categorical(n_unique_classes=2),
    },
    target="label"
)

train_config = (bolt.TrainConfig(epochs=5, learning_rate=0.01)
                    .with_metrics(["categorical_accuracy"]))

tabular_model.train(tabular_train_file, train_config)

test_config = (bolt.EvalConfig()
                   .with_metrics(["categorical_accuracy"]))

tabular_model.evaluate(tabular_test_file, test_config)

# **Product Recommendation**

### **Dataset: Movielens 1M**

In [None]:
movie_train_file = "movielens_train.csv"
movie_test_file = "movielens_test.csv"

with open(movie_train_file) as f:
    for _ in range(5):
        print(next(f))

In [None]:
from thirdai import bolt

movie_model = bolt.UniversalDeepTransformer(
    data_types={
        "userId": bolt.types.categorical(n_unique_classes=6040),
        "movieId": bolt.types.categorical(n_unique_classes=3706),
        "timestamp": bolt.types.date(),
    },
    temporal_tracking_relationships={
        "userId": ["movieId"]
    },
    target="movieId",
)

train_config = (bolt.TrainConfig(epochs=3, learning_rate=0.0001)
                    .with_metrics(["recall@10"]))

movie_model.train(movie_train_file, train_config)

test_config = (bolt.EvalConfig()
                   .with_metrics(["recall@1", "recall@10", "recall@100"]))

movie_model.evaluate(movie_test_file, test_config)

In [None]:
import time

a = time.time()
text_model.predict({"text": "what expression would i use to say i love you if i were an italian"})
tabular_model.predict({"age": "39", "workclass": "State-gov", "fnlwgt": "77516", "education": "Bachelors", "education-num": "13", "marital-status": "Never-married", "occupation": "Adm-clerical", "relationship": "Not-in-family", "race": "White", "sex": "Male", "capital-gain": "2174", "capital-loss": "0", "hours-per-week": "40", "native-country": "United-States"})
movie_model.predict({"userId": "4958", "timestamp": "2003-02-28"})
b = time.time()

print((b-a) * 1000, "ms")