<a href="https://colab.research.google.com/github/HarshBarnwal2004/simple-content-classifier/blob/main/simple_content_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [23]:
# Import necessary libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

In [24]:
# sample dataset
data = {
    'description': [
        'A high school girl gets caught in a love triangle between her childhood friend and a handsome new student.',
        'A group of super-powered individuals fight to protect their city from an alien invasion.',
        'A young prince must navigate a dangerous fantasy world filled with dragons and magic.',
        'Two rivals in the fashion industry find themselves falling for each other despite their differences.',
        'A team of heroes is tasked with saving the universe from a powerful evil sorcerer.',
        'A shy girl discovers her ability to control time and must stop a disaster from happening.',
        'A young man seeks revenge after his family is betrayed by a trusted friend in a kingdom of war.',
        'A love story between a vampire and a human girl, filled with supernatural events and suspense.',
        'A girl discovers she is the last of an ancient warrior clan and must fight to save her people.',
        'Two best friends navigate the ups and downs of high school while hiding their magical abilities.',
        'A detective with extraordinary abilities solves mysterious crimes in a futuristic city.',
        'An ordinary college student gets pulled into an online gaming world where he must fight to survive.',
        'A romance between a prince and a commoner as they face opposition from the royal family.',
        'A legendary martial artist returns from exile to reclaim his rightful place in the world of warriors.',
        'A drama-filled love triangle between a pop star, a fan, and her best friend in the music industry.'
    ],
    'category': [
        'romance',
        'action',
        'fantasy',
        'romance',
        'action',
        'fantasy',
        'action',
        'romance',
        'fantasy',
        'fantasy',
        'action',
        'fantasy',
        'romance',
        'action',
        'romance'
    ]
}

In [25]:
# Creating a DataFrame
df = pd.DataFrame(data)

In [26]:
# Text Preprocessing
vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(df['description'])  # Convert text to numeric features
y = df['category']  # Label

In [27]:
print(df['category'].value_counts())

category
romance    5
action     5
fantasy    5
Name: count, dtype: int64


In [28]:
# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [29]:
# Step 5: Model Selection - Let's use both Decision Tree and Logistic Regression

# Decision Tree Classifier
dt_classifier = DecisionTreeClassifier(random_state=42)
dt_classifier.fit(X_train, y_train)
y_pred_dt = dt_classifier.predict(X_test)

# Logistic Regression Classifier
lr_classifier = LogisticRegression(max_iter=1000, random_state=42, class_weight='balanced')
lr_classifier.fit(X_train, y_train)
y_pred_lr = lr_classifier.predict(X_test)

In [30]:
# Step 6: Evaluate the models
print("Decision Tree Classifier Performance:")
print(classification_report(y_test, y_pred_dt, zero_division=1))
print("Accuracy:", accuracy_score(y_test, y_pred_dt))

print("\nLogistic Regression Classifier Performance:")
print(classification_report(y_test, y_pred_lr, zero_division=1))
print("Accuracy:", accuracy_score(y_test, y_pred_lr))

Decision Tree Classifier Performance:
              precision    recall  f1-score   support

      action       0.50      1.00      0.67         1
     fantasy       0.67      0.67      0.67         3
     romance       1.00      0.00      0.00         1

    accuracy                           0.60         5
   macro avg       0.72      0.56      0.44         5
weighted avg       0.70      0.60      0.53         5

Accuracy: 0.6

Logistic Regression Classifier Performance:
              precision    recall  f1-score   support

      action       0.33      1.00      0.50         1
     fantasy       1.00      0.00      0.00         3
     romance       0.50      1.00      0.67         1

    accuracy                           0.40         5
   macro avg       0.61      0.67      0.39         5
weighted avg       0.77      0.40      0.23         5

Accuracy: 0.4


In [32]:
new_descriptions = [
    'A group of friends must fight against evil forces trying to take over their town.',  # action
    'A young girl must choose between two boys who have both fallen in love with her.', # romance
    'A young warrior embarks on a perilous journey to retrieve a lost artifact that holds the power to reshape the world.' #fantasy
]

# Transform the new data using the same vectorizer
X_new = vectorizer.transform(new_descriptions)

# Predict the categories for the new descriptions
new_predictions = lr_classifier.predict(X_new)

# Show the predictions
for desc, category in zip(new_descriptions, new_predictions):
    print(f"Description: '{desc}' => Predicted Category: {category}")

Description: 'A group of friends must fight against evil forces trying to take over their town.' => Predicted Category: action
Description: 'A young girl must choose between two boys who have both fallen in love with her.' => Predicted Category: romance
Description: 'A young warrior embarks on a perilous journey to retrieve a lost artifact that holds the power to reshape the world.' => Predicted Category: fantasy
