In [27]:
from keras.datasets import imdb
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score

from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.ensemble import VotingClassifier

In [12]:
# Load the IMDB dataset
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=5000)

In [13]:
# Pad the sequences to have a fixed length
maxlen = 100  # or any other desired length
X_train = pad_sequences(X_train, maxlen=maxlen)
X_test = pad_sequences(X_test, maxlen=maxlen)

In [14]:
# Split the training data into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42) 

In [15]:
# Define the 3 base models
dt = DecisionTreeClassifier()
rf = RandomForestClassifier(n_estimators=100)
gb = GradientBoostingClassifier(n_estimators=100)

In [16]:
# Fit the base models on the training data
dt.fit(X_train, y_train)
rf.fit(X_train, y_train)  
gb.fit(X_train, y_train)

GradientBoostingClassifier()

In [20]:
# Define the majority vote function
def majority_vote(predictions):
    """
    Given a list of predictions, returns the most common prediction.
    """
    return max(set(predictions), key=predictions.count)

In [21]:
# Make predictions on the validation set
dt_preds = dt.predict(X_val)
rf_preds = rf.predict(X_val)
gb_preds = gb.predict(X_val)

In [22]:
# Ensemble by majority voting 
final_preds = []
for i in range(len(X_val)):
    pred = majority_vote([dt_preds[i], rf_preds[i], gb_preds[i]])
    final_preds.append(pred)

In [23]:
# Evaluate the ensemble model
accuracy = accuracy_score(y_val, final_preds)
print("Ensemble Accuracy: %.2f%%" % (accuracy*100.0))

Ensemble Accuracy: 55.52%


In [30]:
# Evaluate the performance of the individual models and the ensemble
print("Decision tree accuracy:", accuracy_score(y_test, y_pred1))
print("Random forest accuracy:", accuracy_score(y_test, y_pred2))
print("Gradient boosting accuracy:", accuracy_score(y_test, y_pred3))
print("Ensemble accuracy:", accuracy_score(y_test, y_pred_ensemble))

Decision tree accuracy: 0.51588
Random forest accuracy: 0.5308
Gradient boosting accuracy: 0.5614
Ensemble accuracy: 0.54616
