In [139]:
import pickle
import numpy as np
from text import cleaning, flat
import pandas as pd
import matplotlib.pyplot as plt

In [140]:
# Load Models & Vectorizer (Text --> matrix of TF-IDF features.)
def load_model():
    with open('./pickle/CombineModel.pkl', 'rb') as filemodel:
        model = pickle.load(filemodel)
        
    with open('./pickle/vectorizer.pkl', 'rb') as filevectorizer:
        vectorizer = pickle.load(filevectorizer)
        
    return model, vectorizer

In [141]:
# String to matrix of TF-IDF features.
def vectorize(vectorizer, text):
    cleantext = cleaning(text)
    print(f"Before Cleaning: {text}")
    print(f"After Cleaning: {cleantext}")
    
    vect = vectorizer.transform([cleantext])
    
    return vect

In [142]:
# Predict using three models (NB, SVM, Logistic Regs)
def predict(model, vect):
    LRpred = model['LRmodel'].predict(vect)
    SVCpred = model['SVCmodel'].predict(vect)
    BNBpred = model['BNBmodel'].predict(vect)
        
    LRpred_conf = max(flat(model['LRmodel'].predict_proba(vect)))
    SVCpred_conf = max(flat(model['SVCmodel'].predict_proba(vect)))
    BNBpred_conf = max(flat(model['BNBmodel'].predict_proba(vect)))
        
    result = np.concatenate((LRpred, SVCpred, BNBpred))
    result_conf = [LRpred_conf, SVCpred_conf, BNBpred_conf]
        
    result = pd.DataFrame({
        'model': ['Logistic Reg', 'SVM', 'NB'],
        'predict': result,
        'confidence': result_conf
        })
    print(f"3 Models Prediction:\n{result}")
    
    return result

In [143]:
# Majority Algorithm (Voting System)
def majority_algoritm(result):
    result_pred = result.predict.mode()[0]
    confidence = round(result[result['predict'] == result.predict.mode()[0]]['confidence'].mean()*100,2)
    print(f"Final Predict: {result_pred}")
    print(f"Confidence Mean: {confidence}%")
    
    return result_pred

In [159]:
# Overall Sentiment Stats
def sentiment_stats(sentiment):
    df = pd.DataFrame({
        'sentiment':sentiment
    })
    test = df['sentiment'].value_counts().to_frame()
    test['percentage'] = round(test['sentiment'] / test['sentiment'].sum() * 100,2)
    # plot = plt.pie(test)
    print(f"SENTIMENT STATS:\n{test}")    
    

In [160]:
def inisiasi(input_text):
    # load vectorizer and model pickle
    model, vectorizer = load_model()

    sentiment = []

    for text in input_text:
        # initiate vectorizer function
        vect = vectorize(vectorizer, text)
        
        # initiate predict function
        result = predict(model, vect)
        
        # initiate majority algorithm function
        score = majority_algoritm(result)
        sentiment.append(score)
        print("==========================\n")

    overall_sentiment = sentiment_stats(sentiment)
    

if __name__ == '__main__':
    test_input = ["Given the almost unimaginable nature of the present, what will the future be? https://t.co/b2Yw0AXGVA @elonmusk", 
                  "Unless susceptible to extreme natural disasters, nuclear power plants should not be shut down",
                  'Nothing is more permanent than a â€œtemporaryâ€ government program']
    inisiasi(test_input)

Before Cleaning: Given the almost unimaginable nature of the present, what will the future be? https://t.co/b2Yw0AXGVA @elonmusk
After Cleaning: given almost unimaginable nature present future
3 Models Prediction:
          model   predict  confidence
0  Logistic Reg  POSITIVE    0.797019
1           SVM  POSITIVE    0.724450
2            NB  POSITIVE    0.773338
Final Predict: POSITIVE
Confidence Mean: 76.49%

Before Cleaning: Unless susceptible to extreme natural disasters, nuclear power plants should not be shut down
After Cleaning: unless susceptible extreme natural disaster nuclear power plant shut
3 Models Prediction:
          model   predict  confidence
0  Logistic Reg  NEGATIVE    0.534753
1           SVM  POSITIVE    0.509576
2            NB  NEGATIVE    0.608619
Final Predict: NEGATIVE
Confidence Mean: 57.17%

Before Cleaning: Nothing is more permanent than a â€œtemporaryâ€ government program
After Cleaning: nothing permanent temporary government program
3 Models Prediction