In [4]:
def tfidf_process_text(X_train_processed, X_test_processed):
    """
    Perform TF-IDF processing on the train and test text datasets ready for sentiment analysis.

    The function takes preprocessed text data (X_train_processed and X_test_processed) as inputs,
    inputs are vectorized with the TFID function from scikit-learn to convert text into 
    numerical feature vectors, and returns the vectorized train and test data.
    
    Parameters:
    X_train_processed (pd.Series, column format, lemmatized/stemmatized): Preprocessed textual data for training.
    X_test_processed (pd.Series, column format, lemmatized/stemmatized): Preprocessed textual data for testing.
    
    Returns:
    train_X (scipy.sparse matrix): Vectorized training data.
    test_X (scipy.sparse matrix): Vectorized testing data.

    The sparse matrix type is handled by most classification/regression models without using the deprecated dense array types.
    """

    from sklearn.feature_extraction.text import TfidfVectorizer

    # Create a TfidfVectorizer instance
    tfidf_vectorizer = TfidfVectorizer()

    # Fit and transform the specified text data
    train_X = tfidf_vectorizer.fit_transform(X_train_processed)
    test_X = tfidf_vectorizer.transform(X_test_processed)

    return train_X, test_X

In [5]:
import pandas as pd
import numpy as np

train_test = pd.Series({
    0: 'favorit book android dream electr sheep',
    1: 'arriv 1 00pm 4 30pm'
})

test_test = pd.Series({
    0: 'favorit book android dream electr sheep'
})

X_train, X_test = tfidf_process_text(train_test, test_test)

print(X_train)
print()
print(X_test)

  (0, 8)	0.408248290463863
  (0, 6)	0.408248290463863
  (0, 5)	0.408248290463863
  (0, 2)	0.408248290463863
  (0, 4)	0.408248290463863
  (0, 7)	0.408248290463863
  (1, 1)	0.5773502691896257
  (1, 0)	0.5773502691896257
  (1, 3)	0.5773502691896257

  (0, 8)	0.408248290463863
  (0, 7)	0.408248290463863
  (0, 6)	0.408248290463863
  (0, 5)	0.408248290463863
  (0, 4)	0.408248290463863
  (0, 2)	0.408248290463863
