In [1]:
import pandas as pd
import numpy as np
from transformers import AutoModel, AutoTokenizer, AutoModelForSequenceClassification
import torch
from IPython.display import display
import matplotlib.pyplot as plt

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_name = "cardiffnlp/twitter-roberta-base-sentiment"

In [3]:
class SentimentAnalysisDataset(torch.utils.data.Dataset):
    def __init__(self, texts):
        super().__init__()
        self.texts = texts
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = self.texts[idx]
        tokenized_text = tokenizer(text, max_length=150, truncation=True, padding="max_length")
        tokenized_text["input_ids"] = torch.tensor(tokenized_text["input_ids"])
        tokenized_text["attention_mask"] = torch.tensor(tokenized_text["attention_mask"])
        return tokenized_text

In [4]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
model.load_state_dict(torch.load("../input/sentimentanalysisfinetunedmodel/SentimentClassifierEpoch1.pth"))

Downloading:   0%|          | 0.00/747 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/150 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/476M [00:00<?, ?B/s]

<All keys matched successfully>

In [14]:
def PredictNewText():
    confidence_threshold = 0.70
    print(f"Enter the social media post(s) you would like to analyze. If you would like to analyze multiple tweets (HIGHLY recommended > 5 tweets) seperate them with a semi-colon:")
    raw_text = input().split(";")
    if ";" in raw_text:
        text = raw_text.split(";")
    else:
        text = raw_text
    conv_dict = {
        0 : "negative",
        1 : "neutral",
        2 : "positive"
    }
    dataset = SentimentAnalysisDataset(text)
    dataloader = torch.utils.data.DataLoader(dataset, shuffle=False, num_workers=2, batch_size=32)
    preds = []
    probas = []
    for example in dataloader:
        input_ids = example["input_ids"].to(device)
        attn_mask = example["attention_mask"].to(device)

        pred = model(input_ids, attn_mask)[0]
        preds.append(np.argmax(pred.clone().detach().cpu().numpy(), -1))
        probas.append(torch.nn.Softmax(dim=-1)(pred))
    
    print("\n"*5)
    print("="*15, ">", "Full Report", "<", "="*15)
    print("\n")
    
    print(f"Individual Post Results: \n")
    counts = np.zeros(3)
    confidence_levels = 0
    confidence_for_analysis = np.zeros(3)
    probability_predictions = []
    for i, pred in enumerate(preds[0]):
        sentiment = conv_dict[pred]
        counts[pred] += 1
        probabilities = probas[0][i]
        selected_proba = round(probabilities[pred].item()*100, 2)
        print(f"Post {i+1}: {text[i]} | This post was classified as {sentiment}, with {selected_proba} percent confidence")
        confidence_levels += probabilities[pred].item()*100
        confidence_for_analysis[pred] += probabilities[pred].item()
        probability_predictions.append(probabilities[pred].detach().cpu().numpy())
        
    print("\n"*2)
    print("Cohesive Results: \n")
    labels = list(conv_dict.values())
    count = counts/len(preds[0])
    confidence_levels /= len(preds[0])
    explode = [0.1, 0, 0]
    
    #plt.figure(figsize=(20, 10))
    #plot = plt.pie(count, labels=labels, explode=explode, autopct='%1.1f%%', shadow=True, startangle=90)
    last_four = np.zeros(3)
    for pred in preds[0][-4:]:
        last_four[pred] += 1
        
    last_four /= 4
    confidence_for_analysis /= counts
        
    lowest_class_confidence = np.argmin(confidence_for_analysis)
    if min(confidence_for_analysis) < confidence_threshold:
        confidence_weak = f"However, we were weak in classifiying posts as {conv_dict[lowest_class_confidence]} at just {round(min(confidence_for_analysis)*100, 2)} percent confidence in our prediction."
    else:
        confidence_weak = ""
    threshold_not_met_locs = np.where(np.array(probability_predictions)<confidence_threshold)[0]
    
    weak_predictions = []
    for idx in threshold_not_met_locs:
        weak_predictions.append(str(idx+1))
    str_weak_preds = ", ".join(weak_predictions)
    cleaned_output = ""
    if len(weak_predictions) > 1:
        cleaned_output += str_weak_preds[:-(len(str(idx+1))+1)]
        cleaned_output += " and"
        cleaned_output += str_weak_preds[-(len(str(idx+1))+1):]
        str_weak_preds = cleaned_output
    
    print(f"Recent Post Analysis | Of you're last four posts, {(last_four[np.argmax(last_four)]*100)} percent have been {conv_dict[np.argmax(last_four)]}")
    print(f"Confidence Analysis | On average each time we classified a post, we were {confidence_levels} percent sure we were correct. " + confidence_weak)
    print(f"Weak Prediction Analysis | Of all the predictions made, the ones made on post(s) " + str_weak_preds + " were the most uncertain.")
    
    print("\n"*3, "-"*15, ">", "FINAL DIAGNOSTIC", "<", "-"*15, "\n")
    
    print("How this diagnostic will be calculated: three elements will be considered in evaluating the person: 1. The percentage of negative posts out of ALL posts made by the person 2. The percentage of negative posts out of the last four posts made by the person 3. The confidence we had in each of our predictions\nA score between 0 and 1 will be calculated for the posts submitted. The closer the score is to one, the more negative the person's feelings are. The  closer the score is to zero, the more normal/positive the person is feeling\n")
    
    recent_negative_perc = last_four[0]
    total_negative_perc = count[0]
    confidence_probas_negative = confidence_for_analysis[0]
    
    weight_eval = (recent_negative_perc*0.4) + (total_negative_perc*0.35) + (confidence_probas_negative*0.25)
    print(f"Person's Score: {round(weight_eval*100, 4)}/{100}")
    
    if weight_eval <= 0.5:
        print(f"Diagnostic: Normal | This person doesn't seem to be having any problems regarding mental and social health.")
    elif (weight_eval>0.5) and (weight_eval<=0.8):
        print(f"Diagnostic: Moderate To Significant Negativity | This person is generally displaying more negative feelings than positive. It may be very beneficial to them if a friend, or family member were to talk to them, or cheer them up, especially if many of the negative tweets were recent.")
    else:
        print(f"Diagnostic: Severe Negativity | This person is showing extremely high level's of negative emotions. Not only should a friend, or gaurdian check on the person, but it may be neccessary to bring in external help (counsler, etc...). It is crucial to QUICKLY support this person.")
    
    

In [15]:
"""
Sample Tweets Used To Show Functionality (Note that these are NOT real):
"""

posts = [
"will be at tonight's football game irl for first time in 2 years :) So EXCITED!!!!",
"going to get food from concessions stand. hot dog decent price at 2 bucks",
"game ended in a tie in ot last night but still worth it tho ngl",
"regret going to the game actually cause it was windy there and now im sniffling :(",
"officially sick. this sucks cause im gonna miss a week of school, and making up work online is a pain",
"head is hurting, nose blocked, 102 temp. im miserable",
"im so lost in all my subjects. cant follow online class. so stressed about my tests when i get back",
"so tired of all of this. wish i could just be healthy and back in irl school"
]
";".join(posts)

"will be at tonight's football game irl for first time in 2 years :) So EXCITED!!!!;going to get food from concessions stand. hot dog decent price at 2 bucks;game ended in a tie in ot last night but still worth it tho ngl;regret going to the game actually cause it was windy there and now im sniffling :(;officially sick. this sucks cause im gonna miss a week of school, and making up work online is a pain;head is hurting, nose blocked, 102 temp. im miserable;im so lost in all my subjects. cant follow online class. so stressed about my tests when i get back;so tired of all of this. wish i could just be healthy and back in irl school"

In [16]:
###When viewing this notebook after it's been downloaded (like in github) you won't see the text box. Note however, that upon calling this function, a text box appears where the user can enter the posts. After then clicking enter, you will see the following output:
PredictNewText()

Enter the social media post(s) you would like to analyze. If you would like to analyze multiple tweets (HIGHLY recommended > 5 tweets) seperate them with a semi-colon:


 will be at tonight's football game irl for first time in 2 years :) So EXCITED!!!!;going to get food from concessions stand. hot dog decent price at 2 bucks;game ended in a tie in ot last night but still worth it tho ngl;regret going to the game actually cause it was windy there and now im sniffling :(;officially sick. this sucks cause im gonna miss a week of school, and making up work online is a pain;head is hurting, nose blocked, 102 temp. im miserable;im so lost in all my subjects. cant follow online class. so stressed about my tests when i get back;so tired of all of this. wish i could just be healthy and back in irl school










Individual Post Results: 

Post 1: will be at tonight's football game irl for first time in 2 years :) So EXCITED!!!! | This post was classified as positive, with 92.34 percent confidence
Post 2: going to get food from concessions stand. hot dog decent price at 2 bucks | This post was classified as neutral, with 69.81 percent confidence
Post 3: game ended in a tie in ot last night but still worth it tho ngl | This post was classified as neutral, with 46.13 percent confidence
Post 4: regret going to the game actually cause it was windy there and now im sniffling :( | This post was classified as negative, with 96.99 percent confidence
Post 5: officially sick. this sucks cause im gonna miss a week of school, and making up work online is a pain | This post was classified as negative, with 97.48 percent confidence
Post 6: head is hurting, nose blocked, 102 temp. im miserable | This post was classified as negative, with 96.11 percent confidence
Post 7: im so lost in all my subjects. 