<a href="https://colab.research.google.com/github/Rivi9/Laptop-Recommendation-System/blob/NLP-Model/Text_Classification_Others.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
# Import necessary libraries
import re
import nltk
import numpy as np
import pandas as pd
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

In [11]:
# Download necessary NLTK data
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [12]:
# Load the text classsification dataset
df = pd.read_csv("/content/Sentences - Sheet1 (3).csv",names = ["inputs", "category"], header=None)
print(df.shape)
df.head(6)

(989, 2)


Unnamed: 0,inputs,category
0,Travel Blogger,Basic
1,Food Blogging,Basic
2,digital content designer,Basic
3,content writer,Basic
4,food blogger,Basic
5,social media assistant,Basic


In [13]:
# Drop NA values
df.dropna(inplace=True)

In [14]:
# Preprocess the text
def preprocess_with_stemming(text):
    stop_words = set(stopwords.words('english'))
    text = re.sub(r'[^\w\s]', '', text).lower()
    text = re.sub(' +', ' ', text)
    word_tokens = word_tokenize(text)
    filtered_text = [word for word in word_tokens if word not in stop_words]
    stemmer = PorterStemmer()
    stemmed_words = [stemmer.stem(word) for word in filtered_text]
    return ' '.join(stemmed_words)

# Preprocess text data
df['processed_inputs'] = df['inputs'].apply(preprocess_with_stemming)

In [15]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(df['processed_inputs'], df['category'], test_size=0.2, random_state=40)

# Feature extraction using TF-IDF
tfidf_vectorizer = TfidfVectorizer(max_features=1000)
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

# Training the Logistic Regression model
logistic_model = LogisticRegression(max_iter=1000)
logistic_model.fit(X_train_tfidf, y_train)
y_pred_log_reg = logistic_model.predict(X_test_tfidf)

In [16]:
# Evaluate Logistic Regression model
print("Logistic Regression Accuracy:", accuracy_score(y_test, y_pred_log_reg))
print("Classification Report for Logistic Regression:\n", classification_report(y_test, y_pred_log_reg))

Logistic Regression Accuracy: 0.7828282828282829
Classification Report for Logistic Regression:
                        precision    recall  f1-score   support

                Basic       0.77      0.91      0.83        33
Business_Professional       0.73      0.79      0.76        28
      Creative_Design       0.87      0.72      0.79        36
               Gaming       0.86      0.76      0.81        25
                   IT       0.67      0.85      0.75        33
    Student_Education       0.86      0.70      0.77        43

             accuracy                           0.78       198
            macro avg       0.79      0.79      0.78       198
         weighted avg       0.80      0.78      0.78       198



In [17]:
# Training the Naive Bayes model
nb_model = MultinomialNB()
nb_model.fit(X_train_tfidf, y_train)
y_pred_nb = nb_model.predict(X_test_tfidf)

# Evaluate Naive Bayes model
print("Naive Bayes Accuracy:", accuracy_score(y_test, y_pred_nb))
print("Classification Report for Naive Bayes:\n", classification_report(y_test, y_pred_nb))

Naive Bayes Accuracy: 0.7525252525252525
Classification Report for Naive Bayes:
                        precision    recall  f1-score   support

                Basic       0.88      0.85      0.86        33
Business_Professional       0.70      0.75      0.72        28
      Creative_Design       0.87      0.72      0.79        36
               Gaming       0.84      0.64      0.73        25
                   IT       0.56      0.94      0.70        33
    Student_Education       0.84      0.63      0.72        43

             accuracy                           0.75       198
            macro avg       0.78      0.75      0.75       198
         weighted avg       0.79      0.75      0.75       198



In [19]:
# Function to preprocess and predict the category of new inputs
def predict_category(new_texts, model, vectorizer):

    # Preprocess the texts
    preprocessed_texts = [preprocess_with_stemming(text) for text in new_texts]

    # Transform texts to TF-IDF vectors
    texts_tfidf = vectorizer.transform(preprocessed_texts)

    # Predict and return the categories
    predictions = model.predict(texts_tfidf)
    return predictions

# Example texts to classify
new_texts = [
    "I need a powerful laptop for gaming",
    "Looking for a budget-friendly option for college",
    "A professional software developer needing a robust system",
    "Interested in a lightweight laptop for travel",
    "Seeking a device suitable for graphic design work",
    "I am a digital artist looking for a laptop that can handle heavy graphic design software.",
    "As a student studying computer science, I need a laptop with great battery life for coding sessions.",
    "I love playing the latest PC games, so I'm searching for a laptop with a high-performance GPU.",
    "Being a freelance writer, I need a lightweight and portable laptop with a comfortable keyboard.",
    "As an engineering student, I require a laptop that can run CAD software smoothly.",
    "I'm a business analyst often working with large datasets, so I need a laptop with plenty of RAM and a fast processor.",
    "As a video editor, I'm in need of a laptop with a high-resolution display and powerful graphics capabilities.",
    "I teach online courses and need a laptop with a high-quality webcam and microphone for lectures.",
    "As a travel blogger, I'm looking for a durable laptop that can handle being on the road.",
    "I am an AI researcher requiring a laptop with advanced computing power for machine learning tasks.",
    "for coding activities"
]

# Predict categories using Logistic Regression
log_reg_predictions = predict_category(new_texts, logistic_model, tfidf_vectorizer)
print("Logistic Regression Predictions:")
for text, prediction in zip(new_texts, log_reg_predictions):
    print(f"Text: {text}, Predicted Category: {prediction}")

# Predict categories using Naive Bayes
nb_predictions = predict_category(new_texts, nb_model, tfidf_vectorizer)
print("\nNaive Bayes Predictions:")
for text, prediction in zip(new_texts, nb_predictions):
    print(f"Text: {text}, Predicted Category: {prediction}")


Logistic Regression Predictions:
Text: I need a powerful laptop for gaming, Predicted Category: Gaming
Text: Looking for a budget-friendly option for college, Predicted Category: Student_Education
Text: A professional software developer needing a robust system, Predicted Category: IT
Text: Interested in a lightweight laptop for travel, Predicted Category: Basic
Text: Seeking a device suitable for graphic design work, Predicted Category: Creative_Design
Text: I am a digital artist looking for a laptop that can handle heavy graphic design software., Predicted Category: Creative_Design
Text: As a student studying computer science, I need a laptop with great battery life for coding sessions., Predicted Category: Student_Education
Text: I love playing the latest PC games, so I'm searching for a laptop with a high-performance GPU., Predicted Category: Gaming
Text: Being a freelance writer, I need a lightweight and portable laptop with a comfortable keyboard., Predicted Category: Basic
Text: 