In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report, accuracy_score
from sklearn import metrics
import joblib

In [2]:
# Create a DataFrame with labeled examples
data = {
    'text': [
        "Winter 2023 sunglasses for men and women - Ecommerce", 
        "India’s Premium English Learning Destination - Education", 
        "Gaming Power with Lenovo - Technology", 
        "Book unforgettable experience - Travel", 
        "Ultimate robot shooter - Other"
    ],
    'category': ['Ecommerce', 'Education', 'Technology', 'Travel', 'Other']
}

df = pd.DataFrame(data)

In [3]:
# Save the DataFrame to a CSV file (you can adjust this based on your data format)
df.to_csv('labeled_data.csv', index=False)

In [4]:
# Load the data
df = pd.read_csv("E:\\dataset.csv")

In [5]:
# Assume you have a column called 'text' in your DataFrame
X = data['text']

In [6]:
# For simplicity, let's assume all data is labeled as 'Other'
y = ['Other'] * len(X)

In [7]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Create a TF-IDF vectorizer and transform the data
vectorizer = TfidfVectorizer(stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

In [9]:
# Create a Naive Bayes classifier
clf = MultinomialNB()

In [10]:
# Train the classifier
clf.fit(X_train_tfidf, y_train)

MultinomialNB()

In [11]:
# Make predictions on the test set
y_pred = clf.predict(X_test_tfidf)

In [12]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy}")

Model Accuracy: 1.0


In [13]:
# Print classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

       Other       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1



In [14]:
# Save the trained model to a file
joblib.dump((vectorizer, clf), 'text_classification_model.pkl')

['text_classification_model.pkl']

In [15]:
# Load the trained model
vectorizer, clif = joblib.load('text_classification_model.pkl')

In [16]:
# Example sentences for prediction
test_sentences = [
    "New gaming laptop from Lenovo", 
    "Explore the wonders of India", 
    "Latest news on technology trends"
]

In [17]:
# Transform the test sentences using the vectorizer
X_test_tfidf = vectorizer.transform(test_sentences)

In [18]:
# Make predictions
predictions = clf.predict(X_test_tfidf)
print("Predictions:")
for sentence, prediction in zip(test_sentences, predictions):
    print(f"{sentence} - {prediction}")

Predictions:
New gaming laptop from Lenovo - Other
Explore the wonders of India - Other
Latest news on technology trends - Other
