In [1]:
import pandas as pd
import joblib
from sklearn.feature_extraction.text import TfidfVectorizer
import re



In [2]:
# Load the saved model and vectorizer
model = joblib.load('../Desktop/text_classification_model8.pkl')
vectorizer = joblib.load('../Desktop/tfidf_vectorizer8.pkl')



In [3]:
def clean_text(text):
    text = re.sub(r'http\S+', '', text)  # Remove links
    text = re.sub(r'@\w+', '', text)  # Remove mentions
    text = re.sub(r'#\w+', '', text)  # Remove hashtags
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    text = re.sub(r'\d+', '', text)  # Remove digits
    text = text.lower()  # Convert to lowercase
    return text



In [4]:
# Function to make predictions
def predict_category(text):
    cleaned_text = clean_text(text)
    text_tfidf = vectorizer.transform([cleaned_text])
    prediction = model.predict(text_tfidf)
    return prediction[0]

In [5]:
# Test the prediction function
sample_texts = [
    "Winter 2023 sunglasses for men and women",
    "India’s Premium English Learning Destination",
    "Gaming Power with Lenovo",
    "Book unforgettable experience",
    "Ultimate robot shooter",
    "Breaking news on the presidential election",
    "The latest trends in healthcare technology",
    "Investment tips for 2023",
    "Top 10 travel destinations for 2023",
    "Highlights from last night's game"
]

for text in sample_texts:
    category = predict_category(text)
    print(f'Text: {text}\nPredicted Category: {category}\n')


Text: Winter 2023 sunglasses for men and women
Predicted Category: Ecommerce

Text: India’s Premium English Learning Destination
Predicted Category: Education

Text: Gaming Power with Lenovo
Predicted Category: Technology

Text: Book unforgettable experience
Predicted Category: Travel

Text: Ultimate robot shooter
Predicted Category: Other

Text: Breaking news on the presidential election
Predicted Category: News

Text: The latest trends in healthcare technology
Predicted Category: Healthcare

Text: Investment tips for 2023
Predicted Category: Finance

Text: Top 10 travel destinations for 2023
Predicted Category: Technology

Text: Highlights from last night's game
Predicted Category: Other

