# **Text Classification**
*"Map a sentence to the right class"*

### **Dataset: CLINC 150**

In [None]:
text_train_file = "clinc_train.txt"
text_test_file = "clinc_test.txt"

with open(text_train_file) as f:
    for _ in range(5):
        print(next(f))

In [None]:
from thirdai import bolt, deployment

text_model = deployment.UniversalDeepTransformer(
    data_types={},
    target=""
)

In [None]:
train_config = (bolt.graph.TrainConfig
                    .make(epochs=3, learning_rate=0.0001)
                    .with_metrics(["recall@10"]))
    

text_model.train(text_train_file, train_config)

test_config = (bolt.graph.PredictConfig.make()
                   .with_metrics(["recall@1", "recall@10", "recall@100"]))

text_model.evaluate(text_test_file, test_config)

# **Tabular Classification**

### **Dataset: Census Income**

In [None]:
tabular_train_file = "census_train.txt"
tabular_test_file = "census_test.txt"

with open(tabular_train_file) as f:
    for _ in range(5):
        print(next(f))

In [None]:
tabular_model = deployment.UniversalDeepTransformer(
    data_types={
        "age":bolt.types.numerical(),
        "workclass":bolt.types.categorical(n_unique_classes=9),
        "fnlwgt":bolt.types.numerical(),
        "education":bolt.types.categorical(n_unique_classes=16),
        "education-num":bolt.types.categorical(n_unique_classes=16),
        "marital-status":bolt.types.categorical(n_unique_classes=7),
        "occupation":bolt.types.categorical(n_unique_classes=15),
        "relationship":bolt.types.categorical(n_unique_classes=6),
        "race":bolt.types.categorical(n_unique_classes=5),
        "sex":bolt.types.categorical(n_unique_classes=2),
        "capital-gain":bolt.types.numerical(),
        "capital-loss":bolt.types.numerical(),
        "hours-per-week":bolt.types.numerical(),
        "native-country":bolt.types.categorical(n_unique_classes=42),
        "label":bolt.types.categorical(n_unique_classes=2),
    },
    target="label"
)

train_config = (bolt.graph.TrainConfig
                    .make(epochs=3, learning_rate=0.0001)
                    .with_metrics(["categorical_accuracy"]))

tabular_model.train(tabular_train_file, train_config)

test_config = (bolt.graph.PredictConfig.make()
                   .with_metrics(["categorical_accuracy"]))

tabular_model.evaluate(tabular_test_file, test_config)

# **Movie Recommendation**
*"Recommend the next movie for a user to watch"*

### **Dataset: Movielens 1M**

In [None]:
movie_train_file = "movielens_train.txt"
movie_test_file = "movielens_test.txt"

with open(movie_train_file) as f:
    for _ in range(5):
        print(next(f))

In [None]:
movie_model = deployment.UniversalDeepTransformer(
    data_types={
        "userId": bolt.types.categorical(n_unique_classes=6040),
        "movieId": bolt.types.categorical(n_unique_classes=3706),
        "timestamp": bolt.types.date(),
    },
    target="movieId",
)

train_config = (bolt.graph.TrainConfig
                    .make(epochs=3, learning_rate=0.0001)
                    .with_metrics(["recall@10"]))

movie_model.train(movie_train_file, train_config)

test_config = (bolt.graph.PredictConfig.make()
                   .with_metrics(["recall@1", "recall@10", "recall@100"]))

movie_model.evaluate(movie_test_file, test_config)