#**Movie Genre Classification**



#Import necessary libraries

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

#Load training data

In [None]:
train_data = 'train_data.txt'
columns = ['ID', 'TITLE', 'GENRE', 'DESCRIPTION']
df_train = pd.read_csv(train_data, delimiter=' ::: ', header=None, names=columns,engine='python')

#Data Preprocessing for training data

In [None]:
df_train['DESCRIPTION'].fillna('', inplace=True)

#Create and fit the TF-IDF vectorizer

In [None]:
tfidf_vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = tfidf_vectorizer.fit_transform(df_train['DESCRIPTION'])

#Train a Multinomial Naive Bayes classifier

In [None]:
clf = MultinomialNB()
clf.fit(X_train_tfidf, df_train['GENRE'])

#Load test data

In [None]:
test_data = 'test_data.txt'
columns_test = ['ID', 'TITLE', 'DESCRIPTION']
df_test = pd.read_csv(test_data, delimiter=' ::: ', header=None, names=columns_test ,engine ='python')

#Data Preprocessing for test data

In [None]:
df_test['DESCRIPTION'].fillna('', inplace=True)

#Feature extraction for test data

In [None]:
X_test_tfidf = tfidf_vectorizer.transform(df_test['DESCRIPTION'])

#Load test data solution

In [None]:
test_solution = 'test_data_solution.txt'
columns_solution = ['ID', 'TITLE', 'GENRE', 'DESCRIPTION']  # Adjust column names for test solution
df_test_solution = pd.read_csv(test_solution, delimiter=' ::: ', header=None, names=columns_solution ,engine='python')

Predictions on the test set

In [None]:
predictions_test = clf.predict(X_test_tfidf)

#Evaluate the model on the test set

In [None]:
accuracy_test = accuracy_score(df_test_solution['GENRE'], predictions_test)
print("Naive Bayes Classification Report:")
print(classification_report(df_test_solution['GENRE'], predictions_test))
print(f"Naive Bayes Accuracy on Test Set: {accuracy_test}")

Naive Bayes Classification Report:


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

      action       0.61      0.08      0.13      1314
       adult       0.53      0.04      0.07       590
   adventure       0.78      0.04      0.08       775
   animation       0.00      0.00      0.00       498
   biography       0.00      0.00      0.00       264
      comedy       0.52      0.40      0.45      7446
       crime       0.00      0.00      0.00       505
 documentary       0.56      0.88      0.69     13096
       drama       0.45      0.83      0.58     13612
      family       1.00      0.00      0.00       783
     fantasy       0.00      0.00      0.00       322
   game-show       0.96      0.12      0.22       193
     history       0.00      0.00      0.00       243
      horror       0.73      0.28      0.41      2204
       music       0.76      0.10      0.18       731
     musical       0.00      0.00      0.00       276
     mystery       0.00      0.00      0.00       318
        news       0.00    

  _warn_prf(average, modifier, msg_start, len(result))


#Example (Thriller Movie)

In [None]:
# Your input data for testing
input_data = [
"A former detective haunted by his dark past is drawn into a high-stakes conspiracy when he receives a mysterious letter revealing a secret society's plan to unleash a deadly biological weapon. As he races against time to unravel the truth, he discovers that the key to stopping the impending catastrophe lies in solving a series of cryptic puzzles. The tension escalates as he navigates a web of deceit, facing unpredictable twists and betrayals. Will he uncover the sinister plot and save the world, or will the shadowy forces behind it all succeed in their malevolent scheme?"]

# Transform the input data using the TF-IDF vectorizer
X_input_tfidf = tfidf_vectorizer.transform(input_data)

# Predict the genres for the input data
y_input_pred = clf.predict(X_input_tfidf)

# Print the predicted genres for the input data
for input_text, predicted_genre in zip(input_data, y_input_pred):
    print(f"Input: {input_text}\nPredicted Genre By Model: {predicted_genre}\n")

Input: A former detective haunted by his dark past is drawn into a high-stakes conspiracy when he receives a mysterious letter revealing a secret society's plan to unleash a deadly biological weapon. As he races against time to unravel the truth, he discovers that the key to stopping the impending catastrophe lies in solving a series of cryptic puzzles. The tension escalates as he navigates a web of deceit, facing unpredictable twists and betrayals. Will he uncover the sinister plot and save the world, or will the shadowy forces behind it all succeed in their malevolent scheme?
Predicted Genre By Model: thriller



#Example (Comedy Movie)

In [None]:
# Your input data for testing
input_data = [
"In a small town where laughter is scarce, a quirky group of misfit comedians sets out to bring joy to their community. The leader, a failed stand-up comedian with a heart of gold, hatches a plan to organize a hilarious talent show to raise funds for the struggling local comedy club. As the group encounters various comedic challenges and absurd situations, they learn the true meaning of friendship and the power of laughter. With side-splitting performances and heartwarming moments, this comedy is a feel-good journey that will leave the audience in stitches."]

# Transform the input data using the TF-IDF vectorizer
X_input_tfidf = tfidf_vectorizer.transform(input_data)

# Predict the genres for the input data
y_input_pred = clf.predict(X_input_tfidf)

# Print the predicted genres for the input data
for input_text, predicted_genre in zip(input_data, y_input_pred):
    print(f"Input: {input_text}\nPredicted Genre By Model: {predicted_genre}\n")

Input: In a small town where laughter is scarce, a quirky group of misfit comedians sets out to bring joy to their community. The leader, a failed stand-up comedian with a heart of gold, hatches a plan to organize a hilarious talent show to raise funds for the struggling local comedy club. As the group encounters various comedic challenges and absurd situations, they learn the true meaning of friendship and the power of laughter. With side-splitting performances and heartwarming moments, this comedy is a feel-good journey that will leave the audience in stitches.
Predicted Genre By Model: comedy

