### Training Model

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import pickle

# Assuming tokenizer is your Tokenizer object


# Load the dataset
df = pd.read_csv('/content/combined_cleaned_dateset.csv')

# Preprocess the genres
genres = df['genre']
genres.fillna('', inplace=True)
genres = [genre.split(',') for genre in genres]

# Preprocess the descriptions
max_words = 1000
tokenizer = Tokenizer(num_words=max_words, oov_token='<OOV>')
tokenizer.fit_on_texts(df['description'])
X = tokenizer.texts_to_sequences(df['description'])
X = pad_sequences(X)
with open('/content/tokenizer.pkl', 'wb') as f:
    pickle.dump(tokenizer, f)

mlb = MultiLabelBinarizer()
y = mlb.fit_transform(genres)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=52)


model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=100, input_length=X.shape[1]))
model.add(LSTM(100))
model.add(Dense(y.shape[1], activation='sigmoid'))


model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=80, batch_size=45, validation_data=(X_val, y_val))

rnn_predictions_train = model.predict(X_train)
rnn_predictions_val = model.predict(X_val)

test_descriptions = ["a group of intergalactic criminals are forced to work together to stop a fanatical warrior from taking control of the universe."]
X_test = tokenizer.texts_to_sequences(test_descriptions)
X_test = pad_sequences(X_test, maxlen=X.shape[1])
rnn_predictions_test = model.predict(X_test)

X_train_with_predictions = np.concatenate((X_train, rnn_predictions_train), axis=1)
X_val_with_predictions = np.concatenate((X_val, rnn_predictions_val), axis=1)
X_test_with_predictions = np.concatenate((X_test, rnn_predictions_test), axis=1)

rf_model = RandomForestClassifier(n_estimators=1000, random_state=42)
rf_model.fit(X_train_with_predictions, y_train)

# Evaluate the stacked model
accuracy = rf_model.score(X_val_with_predictions, y_val)
print("Validation Accuracy (Stacked Model):", accuracy)

model.save('/content/mymodel.h5')
print("model saved successfully.")

# Make predictions using the stacked model
stacked_predictions = rf_model.predict(X_test_with_predictions)

# Convert predictions to genres
threshold = 0.25
binary_predictions = np.where(stacked_predictions > threshold, 1, 0)
predicted_genres = mlb.inverse_transform(binary_predictions)
predicted_genres = [list(set(genres)) for genres in predicted_genres]

print("Predicted Genres:")
for desc, genres in zip(test_descriptions, predicted_genres):
    print(f"{desc}: {', '.join(genres)}")



Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Validation Accuracy (Stacked Model): 0.5690759377859104
model saved successfully.


  saving_api.save_model(


Predicted Genres:
a group of intergalactic criminals are forced to work together to stop a fanatical warrior from taking control of the universe.: 


###FOR USING AND TESTING THE DATA(UI)

In [None]:
from tensorflow.keras.models import load_model
import pickle
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load the saved model
loaded_model = load_model('/content/mymodel.h5')
with open('/content/tokenizer.pkl', 'rb') as f:
    tokenizer = pickle.load(f)

test_descriptions = []
print('enter input: ')
inp=str(input())

test_descriptions.append(inp)
X_test = tokenizer.texts_to_sequences(test_descriptions)
X_test = pad_sequences(X_test, maxlen=X.shape[1])

predictions = loaded_model.predict(X_test)

threshold = 0.3
binary_predictions = np.where(predictions > threshold, 1, 0)
predicted_genres = mlb.inverse_transform(binary_predictions)
predicted_genres = [list(set(genres)) for genres in predicted_genres]

print("Predicted Genres:")
for desc, genres in zip(test_descriptions, predicted_genres):
    print(f"{desc}: {', '.join(genres)}")


OSError: No file or directory found at /content/mymodel.h5