In [1]:
!pip install datasets pandas scikit-learn streamlit




In [2]:
from datasets import load_dataset
import pandas as pd

# Load dataset from Hugging Face
ds = load_dataset("jquigl/imdb-genres")
df = pd.DataFrame(ds['train'])

# Keep only relevant columns
df = df[['description', 'genre']].dropna()
df.head()

Unnamed: 0,description,genre
0,Flaming Ears is a pop sci-fi lesbian fantasy f...,Fantasy
1,Six people - three couples - meet at random at...,Romance
2,"In a small unnamed town, in year 2025, Krsto w...",Thriller
3,The legendary Gulliver returns to the Kingdom ...,Fantasy
4,"Seminal silent historical film, the story feat...",Biography


In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer

X = df['description']
y = df['genre']

# Convert text to TF-IDF vectors
tfidf = TfidfVectorizer(stop_words='english', max_features=5000)
X_tfidf = tfidf.fit_transform(X)

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)

# Train model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, zero_division=0))

              precision    recall  f1-score   support

      Action       0.32      0.37      0.34      6256
   Adventure       0.24      0.17      0.20      3200
   Animation       0.35      0.12      0.18      1042
   Biography       0.46      0.31      0.37      1077
       Crime       0.32      0.29      0.30      4732
      Family       0.29      0.24      0.26      2119
     Fantasy       0.23      0.13      0.16      2267
   Film-noir       0.00      0.00      0.00       178
     History       0.25      0.15      0.19      1055
      Horror       0.42      0.48      0.45      4897
     Mystery       0.20      0.06      0.09      2652
     Romance       0.44      0.64      0.52      6722
       Scifi       0.37      0.32      0.34      2292
      Sports       0.45      0.31      0.37       605
    Thriller       0.28      0.35      0.31      7236
         War       0.43      0.40      0.41      1322

    accuracy                           0.35     47652
   macro avg       0.32   

In [7]:
def predict_genre(plot):
    vec = tfidf.transform([plot])
    return model.predict(vec)[0]

# Example
plot = "Your new plot here"
print("Predicted Genre:", predict_genre(plot))


Predicted Genre: Thriller
