In [33]:
from joblib import dump, load
from sklearn.base import BaseEstimator, TransformerMixin
import pandas as pd
from nltk.tokenize import word_tokenize
import numpy as np


In [14]:
class TextPreprocessor(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.lemmatizer = WordNetLemmatizer()
        self.stop_words = set(stopwords.words('english'))
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X, y=None):
        if isinstance(X, pd.Series):
            return X.apply(self.preprocess_text)
        else:  # handle other iterables
            return [self.preprocess_text(text) for text in X]
    
    def preprocess_text(self, text):
        tokens = word_tokenize(text)
        tokens = [self.lemmatizer.lemmatize(token.lower()) for token in tokens if token.isalnum()]
        tokens = [token for token in tokens if token not in self.stop_words]
        return " ".join(tokens)

In [15]:
# Load the pipeline and predict new sentences
loaded_pipeline = load('sentiment_analysis_pipeline.joblib')

In [37]:
# Get a sentence from user input
# user_sentence = input("Enter a sentence for sentiment analysis: ")
user_sentence = "tejas love's suits"
# Use the pipeline to make a prediction
prediction = loaded_pipeline.predict([user_sentence])
confidence_scores = loaded_pipeline.predict_proba([user_sentence])

print(f"The sentiment of the input sentence is: {prediction[0]}")
np.max(confidence_scores)

The sentiment of the input sentence is: positive


0.6543002212720058