# The Data

In [1]:
import os

# setting working dir
os.chdir("../src")

from prefect import Flow, task, context
import numpy as np

from data_pipeline.data_sourcing import get_data
from data_pipeline.data_preprocessing import fill_missing_examples, clean_examples
from data_pipeline.feature_extracting import encode_features
from data_pipeline.data_splitting import split
from elasticsearch_db.elasticsearch import elastic_conection
from elasticsearch_db.elasticsearch import get_nlp_model

es = elastic_conection()
workspace_id = "dc1e7b3d-9137-4a20-a99c-d0d2029ef170"
workspace, exist = get_nlp_model(es, workspace_id=workspace_id)
exist

True

In [2]:
df = get_data(
        workspace=workspace, 
        es=es
    )

df = clean_examples(df)

df, x_transformer, y_transformer = encode_features(df)
X_train, X_test, y_train, y_test = split(df=df)

# LogisticRegression

![](imgs/logit.gif)

To understand what these hyperparameter mean read [sklearn.linear_model.LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) and [Don’t Sweat the Solver Stuff](https://towardsdatascience.com/dont-sweat-the-solver-stuff-aea7cddc3451)

In [21]:
hyper_parameters = {
                "multi_class": "multinomial",
                "fit_intercept": False,
                "C": 5,
                "random_state": 42,
                "max_iter": 1000,
                "n_jobs":-1}

In [22]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
import numpy as np

def train_logistic_regression(X, y, **hyper_parameters):
    model = LogisticRegression(
                    **hyper_parameters
            )
        

    model.fit(X, y.intents)
    return model

In [24]:
logreg_clf = train_logistic_regression(X=X_train, y=y_train, **hyper_parameters)

# Poly SVM

![](imgs/svm.gif)

To understand what these hyperparameter mean read:

* [sklearn.svm.SVC](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html)
* [Support Vector Machines](https://scikit-learn.org/stable/modules/svm.html#svm)
* [Support Vector Classifier - Chris Albun](https://chrisalbon.com/machine_learning/support_vector_machines/support_vector_classifier/)

In [26]:
hyper_parameters = {
          "kernel": "poly", 
          "degree": 1,
          "coef0": 0,
          "random_state": 42,
          "probability": True
        }

In [25]:
from sklearn import svm

def train_svm(X, y, **hyper_parameters):
    model = svm.SVC(
                    **hyper_parameters
              )
        
    model.fit(X, y.intents)
    return model

In [27]:
svm_clf = train_svm(X=X_train, y=y_train, **hyper_parameters)