# Import requirements

In [None]:
import pandas as pd
import pickle
import os
from sklearn.pipeline import make_pipeline
from embetter.text import SentenceEncoder
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import (accuracy_score,
                             classification_report,
                             confusion_matrix, 
                             accuracy_score)

# Step 1: Extract phrases and labels

In [None]:
path_to_csv = "../dataset/Tweets.csv"
dataframe = pd.read_csv(path_to_csv)
dataframe = dataframe[dataframe['airline_sentiment_confidence'] >= 0.75].drop_duplicates()
filtered_df = dataframe.filter(items=["text","airline_sentiment"])
phrases = filtered_df["text"].to_numpy()
labels = filtered_df["airline_sentiment"].to_numpy()

# Step 2: Split data in train and val

In [None]:
x_train, x_val, y_train, y_val = train_test_split(phrases, 
                                                  labels,
                                                  test_size = 0.30, 
                                                  random_state = 1, 
                                                  stratify = labels)

# =============== Classification algorithm ===============

# Step 3: Make & fit pipeline

In [None]:
classifier = make_pipeline(SentenceEncoder("distiluse-base-multilingual-cased-v2"),
                           SVC(kernel='linear', 
                               probability=True))

classifier.fit(x_train, y_train)

# Step 4: Model evaluation

In [None]:
y_pred = classifier.predict(x_val)
print(accuracy_score(y_val,y_pred))
print(classification_report(y_val,y_pred))
print(confusion_matrix(y_val,y_pred))
print()
print(accuracy_score(y_val,y_pred))

# =============== Save model to disk ===============

# Paths

In [None]:
path_to_model = "../models/"
model_name = 'tfm_svm'

# Save as pickle

In [None]:
def save_as_pickle(obj, dir_name, file_name):
    is_exist = os.path.exists(dir_name)
    if not is_exist:
        os.makedirs(dir_name)
    pickle.dump(obj, open(dir_name+file_name, 'wb'))
    
save_as_pickle(classifier, path_to_model, model_name)
print("model object is successfully saved...")