[Reference](https://medium.com/@kenanekici/make-predictions-in-tableau-using-python-13ef3a1571c3)

In [1]:
!pip install tabpy sklearn pandas seaborn pickle-mixin 

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tabpy
  Downloading tabpy-2.5.0-py2.py3-none-any.whl (110 kB)
[K     |████████████████████████████████| 110 kB 5.2 MB/s 
Collecting pickle-mixin
  Downloading pickle-mixin-1.0.2.tar.gz (5.1 kB)
Collecting twisted
  Downloading Twisted-22.4.0-py3-none-any.whl (3.1 MB)
[K     |████████████████████████████████| 3.1 MB 38.9 MB/s 
[?25hCollecting genson
  Downloading genson-1.2.2.tar.gz (34 kB)
Collecting simplejson
  Downloading simplejson-3.17.6-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (130 kB)
[K     |████████████████████████████████| 130 kB 48.2 MB/s 
Collecting mock
  Downloading mock-4.0.3-py3-none-any.whl (28 kB)
Collecting configparser
  Downloading configparser-5.2.0-py3-none-any.whl (19 kB)
Collecting pytest-cov
  Downloading pytest_cov-3.0.0-py3-none-any.whl (20 kB)
Collecting hypothesis
  Downloading hypothesis-

In [2]:
!tabpy

2022-07-02,05:58:49 [INFO] (app.py:app:244): Parsing config file /usr/local/lib/python3.7/dist-packages/tabpy/tabpy_server/app/../common/default.conf
2022-07-02,05:58:49 [INFO] (app.py:app:436): Loading state from state file /usr/local/lib/python3.7/dist-packages/tabpy/tabpy_server/state.ini
2022-07-02,05:58:49 [INFO] (app.py:app:333): Password file is not specified: Authentication is not enabled
2022-07-02,05:58:49 [INFO] (app.py:app:347): Call context logging is disabled
2022-07-02,05:58:49 [INFO] (app.py:app:125): Initializing TabPy...
2022-07-02,05:58:49 [INFO] (callbacks.py:callbacks:43): Initializing TabPy Server...
2022-07-02,05:58:49 [INFO] (app.py:app:129): Done initializing TabPy.
2022-07-02,05:58:49 [INFO] (app.py:app:83): Setting max request size to 104857600 bytes
2022-07-02,05:58:49 [INFO] (callbacks.py:callbacks:64): Initializing models...
2022-07-02,05:58:49 [INFO] (app.py:app:107): Web service listening on port 9004
2022-07-02,06:02:04 [CRITICAL] (app.py:app:117): Exit

# Training the model and deploying the predict function


In [3]:
import pandas as pd
import seaborn as sns
from sklearn.svm import SVC
from sklearn.metrics import f1_score, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_selection import chi2
from sklearn.metrics import f1_score
import pickle

def clean_text(X):
    # remove mentions
    X = X.str.lower().str.replace('@[A-Za-z0-9]+', ' ', regex=True)
    # remove links
    X = X.str.replace('http\S+', ' ', regex=True)
    # remove non alphabet
    X = X.str.replace('[^a-zA-Z]+', ' ', regex=True)
    # remove extra whitespaces
    X = X.str.replace('\s+', ' ', regex=True).str.strip()
    return X

def train_model():
    # load and prepare data
    tweets = pd.read_csv("tweets_train_test.csv")
    y = tweets["airline_sentiment"]
    X = tweets["text"]
    n_positives = len(y[y=="positive"])
    y_neg = y[y=="negative"].sample(n_positives)
    y_pos = y[y=="positive"]
    y = pd.concat([y_neg,y_pos])
    X = X.loc[y.index].reset_index(drop=True)
    y = y.reset_index(drop=True)

    # clean
    X = clean_text(X)

    # feature selection
    temp_vectorizer = TfidfVectorizer(stop_words= 'english')
    X_ = temp_vectorizer.fit_transform(X)
    n_features=700
    chi2score = chi2(X_,y)[0]
    wscores = zip(temp_vectorizer.get_feature_names(),chi2score)
    wchi2 = sorted(wscores,key=lambda x:x[1]) 
    topchi2 = wchi2[-n_features:]
    labels = [t for t, ch in topchi2]
    
    # fit and transform with selected features 
    vectorizer = TfidfVectorizer(vocabulary=labels)
    X = vectorizer.fit_transform(X)

    # train classifier
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)
    sentiment_model = SVC(random_state=0)
    sentiment_model.fit(X_train, y_train)
    preds = sentiment_model.predict(X_test)
    true = list(y_test)

    # evaluate
    conf_matr = confusion_matrix(true, preds, labels=["positive", "negative"])
    ax = sns.heatmap(conf_matr, annot=True, cbar=False, fmt='g', cmap='Blues')
    ax.set_ylabel("True label")
    ax.set_xlabel("Predicted")
    ax.set_xticklabels(["Positive", "Negative"])
    ax.set_yticklabels(["Positive",  "Negative"])
    print(f1_score(preds, y_test, average="macro"))

    # export vectorizer and model
    # Store data (serialize)
    with open('vectorizer.pickle', 'wb') as v:
        pickle.dump(vectorizer, v)

    # Load data (deserialize)
    with open('model.pickle', 'wb') as m:
        pickle.dump(sentiment_model, m)

# this is the function that we deploy to TabPy
def predict(list_of_strings):
    X = pd.DataFrame(list_of_strings, columns=["text"])
    X = clean_text(X["text"])
    with open('vectorizer.pickle', 'rb') as vectorizer:
        vectorizer = pickle.load(vectorizer)
    with open('model.pickle', 'rb') as model:
        model = pickle.load(model)

    X = vectorizer.fit_transform(X)
    return list(model.predict(X))

train_model()

# deploy predict function to TabPy
from tabpy.tabpy_tools.client import Client
client = Client('http://localhost:9004/')
client.deploy('predict', predict, 'predict', override=True)