In [1]:
import pandas as pd
import joblib
import torch
from sklearn.feature_extraction.text import TfidfVectorizer


# ---------- DATA LOADING ----------
def load_data(path):
    """
    Load CSV dataset
    """
    return pd.read_csv(path)


# ---------- VECTORIZER ----------
def create_vectorizer():
    """
    Create TF-IDF vectorizer
    """
    return TfidfVectorizer(
        max_features=5000,
        ngram_range=(1, 2),
        stop_words="english"
    )


# ---------- SAVE / LOAD ML MODELS ----------
def save_model(model, path):
    joblib.dump(model, path)


def load_model(path):
    return joblib.load(path)


# ---------- SAVE TORCH MODEL ----------
def save_torch_model(model, path):
    torch.save(model.state_dict(), path)

