In [2]:
import pickle
import numpy as np
from text import cleaning, flat
import pandas as pd

In [3]:
# Load Models & Vectorizer (Text --> matrix of TF-IDF features.)
def load_model():
    with open('./pickle/CombineModel.pkl', 'rb') as filemodel:
        model = pickle.load(filemodel)
        
    with open('./pickle/vectorizer.pkl', 'rb') as filevectorizer:
        vectorizer = pickle.load(filevectorizer)
        
    return model, vectorizer

In [4]:
# String to matrix of TF-IDF features.
def vectorize(vectorizer, text):
    cleantext = cleaning(text)
    print(f"Before Cleaning: {text}")
    print(f"After Cleaning: {cleantext}")
    
    vect = vectorizer.transform([cleantext])
    
    return vect

In [5]:
# Predict using three models (NB, SVM, Logistic Regs)
def predict(model, vect):
    LRpred = model['LRmodel'].predict(vect)
    SVCpred = model['SVCmodel'].predict(vect)
    BNBpred = model['BNBmodel'].predict(vect)
        
    LRpred_conf = max(flat(model['LRmodel'].predict_proba(vect)))
    SVCpred_conf = max(flat(model['SVCmodel'].predict_proba(vect)))
    BNBpred_conf = max(flat(model['BNBmodel'].predict_proba(vect)))
        
    result = np.concatenate((LRpred, SVCpred, BNBpred))
    result_conf = [LRpred_conf, SVCpred_conf, BNBpred_conf]
        
    result = pd.DataFrame({
        'model': ['Logistic Reg', 'SVM', 'NB'],
        'predict': result,
        'confidence': result_conf
        })
    print(f"3 Models Prediction:\n{result}")
    
    return result

In [6]:
# Majority Algorithm (Voting System)
def majority_algoritm(result):
    result_pred = result.predict.mode()[0]
    confidence = round(result[result['predict'] == result.predict.mode()[0]]['confidence'].mean()*100,2)
    print(f"Final Predict: {result_pred}")
    print(f"Confidence Mean: {confidence}%")
    
    return result_pred

In [9]:
# Overall Sentiment Stats
def sentiment_stats(sentiment):
    df = pd.DataFrame({
        'sentiment':sentiment
    })
    test = df['sentiment'].value_counts().to_frame()
    test['percentage'] = round(test['sentiment'] / test['sentiment'].sum() * 100,2)
    # plot = plt.pie(test)
    print(f"SENTIMENT COUNT:\n{test}")    
    

In [18]:
def inisiasi(input_text):
    # load vectorizer and model pickle
    model, vectorizer = load_model()

    sentiment = []

    for text in input_text:
        # initiate vectorizer function
        vect = vectorize(vectorizer, text)
        
        # initiate predict function
        result = predict(model, vect)
        
        # initiate majority algorithm function
        score = majority_algoritm(result)
        sentiment.append(score)
        print("==========================\n")

    overall_sentiment = sentiment_stats(sentiment)
    

if __name__ == '__main__':
    test_input = ["genshin is such a boring games, but i love you https://t.co/b2Yw0AXGVA @elonmusk", 
                  "RT @SpaceX: More photos from last night's Falcon 9 launch of @NASA's IXPE mission to better understand black holes, neutron stars, and otheâ€¦"
                  ]
    inisiasi(test_input)

Before Cleaning: genshin is such a boring games, but i love you https://t.co/b2Yw0AXGVA @elonmusk
After Cleaning: genshin boring game love
3 Models Prediction:
          model   predict  confidence
0  Logistic Reg  NEGATIVE    0.679348
1           SVM  NEGATIVE    0.700818
2            NB  POSITIVE    0.688758
Final Predict: NEGATIVE
Confidence Mean: 69.01%

Before Cleaning: RT @SpaceX: More photos from last night's Falcon 9 launch of @NASA's IXPE mission to better understand black holes, neutron stars, and otheâ€¦
After Cleaning: rt photo last night falcon 9 launch ixpe mission better understand black hole neutron star othe
3 Models Prediction:
          model   predict  confidence
0  Logistic Reg  POSITIVE    0.782353
1           SVM  POSITIVE    0.744609
2            NB  POSITIVE    0.952599
Final Predict: POSITIVE
Confidence Mean: 82.65%

SENTIMENT COUNT:
          sentiment  percentage
NEGATIVE          1        50.0
POSITIVE          1        50.0
