In [49]:
import pickle
import numpy as np
from packages.text import cleaning, flat
import pandas as pd

In [50]:
def load_model():
    file = open('pickle/CombineModel.pkl', 'rb')
    model = pickle.load(file)
    file.close()

    file = open('pickle/vectorizer.pkl', 'rb')
    vectorizer = pickle.load(file)
    file.close
    
    return model, vectorizer

def predict(model, vectorizer, texts):
    data = []
    
    for text in texts:
        clean = cleaning(text)
        vec_inputs = vectorizer.transform([clean])
        
        LRpred = model['LRmodel'].predict(vec_inputs)
        SVCpred = model['SVCmodel'].predict(vec_inputs)
        BNBpred = model['BNBmodel'].predict(vec_inputs)
        
        LRpred_conf = max(flat(model['LRmodel'].predict_proba(vec_inputs)))
        SVCpred_conf = max(flat(model['SVCmodel'].predict_proba(vec_inputs)))
        BNBpred_conf = max(flat(model['BNBmodel'].predict_proba(vec_inputs)))
        
        result = np.concatenate((LRpred, SVCpred, BNBpred))
        result_conf = [LRpred_conf, SVCpred_conf, BNBpred_conf]
        
        result = pd.DataFrame({
        'model': ['Logistic Reg', 'SVM', 'NB'],
        'predict': result,
        'confidence': result_conf
        })
        
        result_pred = result.predict.mode()[0]
        confidence = round(result[result['predict'] == result.predict.mode()[0]]['confidence'].mean()*100,2)
        
        data.append((text, clean, result_pred, confidence))
        
    df = pd.DataFrame(data, columns=['original text', 'clean text','sentiment', 'confidence'])
    return df

def main(path):
    test = pd.read_csv(path)
    text = test['1']
    
    model, vectorizer = load_model()
    
    inputTest = predict(model, vectorizer, text)
    
    return inputTest


In [51]:
AccountSentiment = main('D:\Project Alpro\Tweetoxicity\models\Tweets of POTUS.csv')
AccountSentiment.head()

Unnamed: 0,original text,clean text,sentiment,confidence
0,Congratulations to @NYCFC for winning the MLS ...,congratulation winning ml cup team reminder al...,POSITIVE,98.95
1,The federal government will do everything it c...,federal government everything help impacted we...,NEGATIVE,72.61
2,From renewing passports to claiming retirement...,renewing passport claiming retirement benefit ...,POSITIVE,62.97
3,"Today, I was briefed by FEMA on our response t...",today briefed fema response tornado extreme we...,NEGATIVE,79.67
4,"Happy Birthday, @NationalGuard! https://t.co/c...",happy birthday,POSITIVE,97.15


In [52]:
POSITIVE = AccountSentiment['sentiment'].value_counts()["POSITIVE"] / len(AccountSentiment['sentiment']) * 100
NEGATIVE = AccountSentiment['sentiment'].value_counts()["NEGATIVE"] / len(AccountSentiment['sentiment']) * 100

Sentiment_ratio = {
    'Sentiment': ["POSITIVE", "NEGATIVE"],
    'Ratio': [POSITIVE, NEGATIVE]
}

Result = pd.DataFrame(Sentiment_ratio)
Result

Unnamed: 0,Sentiment,Ratio
0,POSITIVE,68.0
1,NEGATIVE,32.0
