In [None]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import VotingClassifier

nb_pipeline = Pipeline([
    ('vect', CountVectorizer(analyzer='char')),
    ('tfidf', TfidfTransformer()),
    ('clf', MultinomialNB())
])

dt_pipeline = Pipeline([
    ('vect', CountVectorizer(analyzer='char')),
    ('tfidf', TfidfTransformer()),
    ('clf', DecisionTreeClassifier())
])

rf_pipeline = Pipeline([
    ('vect', CountVectorizer(analyzer='char')),
    ('tfidf', TfidfTransformer()),
    ('clf', RandomForestClassifier())
])

svc_pipeline = Pipeline([
    ('vect', CountVectorizer(analyzer='char')),
    ('tfidf', TfidfTransformer()),
    ('clf', SVC())
])

knn_pipeline = Pipeline([
    ('vect', CountVectorizer(analyzer='char')),
    ('tfidf', TfidfTransformer()),
    ('clf', KNeighborsClassifier())
])

ensemble = VotingClassifier(estimators=[
    ('nb', nb_pipeline),
    ('dt', dt_pipeline),
    ('rf', rf_pipeline),
    ('svc', svc_pipeline),
    ('knn', knn_pipeline)
], voting='hard')

X_train, X_test, y_train, y_test = train_test_split(df['emphasized'], df['actual'], test_size=0.1, random_state=42)

ensemble.fit(X_train, y_train)

y_pred = ensemble.predict(X_test)

print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Accuracy Score:", accuracy_score(y_test, y_pred))


In [None]:
ensemble.predict(["waay"])[0]

import pickle
with open('model_ensemble_v1.pkl', 'wb') as file:
    pickle.dump(ensemble, file)

with open('model_ensemble_v1.pkl', 'rb') as file:
    model = pickle.load(file)
