### Imports

In [9]:
import pandas as pd 
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from textblob import TextBlob
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.preprocessing import StandardScaler, LabelEncoder
from snorkel.labeling import labeling_function
from snorkel.labeling.model import LabelModel
from snorkel.labeling import PandasLFApplier
from snorkel.augmentation import transformation_function
from snorkel.augmentation import ApplyOnePolicy, PandasTFApplier
import random
from nltk.corpus import wordnet as wn
from nltk.sentiment import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')
from transformers import pipeline
from spellchecker import SpellChecker

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/ben/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [4]:
# Load the pre-trained model from the .h5 file
model_path = "model_11_72test.h5" # Best Performing - Currently Set
model_path2 = "model_11_neg_words.h5" # new try after edits / sentiment scores not as closely aligning to Vader as best performing
# Insert your relative path here
model = tf.keras.models.load_model('/Users/ben/Desktop/DSI_GA_Materials/capstone/Capstone_Project_backup/model_11_72test.h5')

# Load VADER sentiment analyzer
vader_analyzer = SentimentIntensityAnalyzer()



In [5]:
# Define variables
vocab_size = 23100
embedding_dim = 100
max_length = 78
trunc_type = 'post'
padding_type = 'post'
oov_token = '<OOV>'

tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_token)

misleading_bias_terms = ['trump', 'u', 'america', 'american', 'new', 'people', 'states', 'president', 'many', 'states', 'united', 'americans', 'one']
bias_words = ['fake', 'news', 'fale', 'biased', 'unreliable', 'propaganda', 'misleading', 'partisan', 'manipulative']
subj_words = ['feel', 'feels', 'thinks', 'thought', 'thoughts', 'opinion', 'bias', 'think','felt', 'believe', 'believed','believes','believer']
# Neg words
past_tense_keywords = ["hurt", "blamed", "harmed", "accused"]
present_tense_keywords = ["hurts", "blames", "harms", "accuses"]
active_voice_keywords = ["hurting", "blaming", "harming", "accusing"]

# Keyword binary functions
keywords = [
    "maps", "county", "election", "coronavirus", "case",
    "risk", "cases", "covid", "latest", "trump",
    "ukraine", "russia", "war", "reminiscent",
    "removes", "proceed", "ponder"
]

@labeling_function()
def lf_keyword_my_binary(x):
    """Return 1 if any of the misleading_bias_terms is present, else return 0."""
    presence = any(term in str(x).lower() for term in misleading_bias_terms)
    return 1 if presence else 0

@labeling_function()
def lf_regex_fake_news_binary(x):
    """Return 1 if any of the bias_words is present, else return 0."""
    presence = any(re.search(fr"\b{word}\b", str(x), flags=re.I) is not None for word in bias_words)
    return 1 if presence else 0

@labeling_function()
def lf_regex_subjective_binary(x):
    """Return 1 if any of the subj_words is present, else return 0."""
    presence = any(re.search(fr"\b{word}\b", str(x), flags=re.I) is not None for word in subj_words)
    return 1 if presence else 0

@labeling_function()
def lf_long_combined_text_binary(text_list):
    """Return 1 if the combined length is greater than 376, else return 0."""
    length = len(" ".join(str(text_list)).split())
    return 1 if length > 133 else 0 

@labeling_function()
def lf_textblob_polarity_binary(x):
    """
    We use a third-party sentiment classification model, TextBlob.

    We map the polarity to binary classification: 1 if negative, 0 otherwise.
    """
    polarity = TextBlob(str(x)).sentiment.polarity
    return 1 if polarity < 0 else 0

@labeling_function()
def lf_textblob_subjectivity_binary(x):
    """
    We use a third-party sentiment classification model, TextBlob.

    We map the subjectivity to binary classification: 1 if high subjectivity, 0 otherwise.
    """
    # Check if either of the two phrases occurs in the text
    if "see full results maps" in str(x).lower() or "see latest charts maps" in str(x).lower():
        return 0
    
    # Calculate subjectivity using TextBlob
    subjectivity = TextBlob(str(x)).sentiment.subjectivity
    
    # Return 1 if high subjectivity, 0 otherwise
    return 1 if subjectivity > 0.5 else 0

@labeling_function()
def lf_past_tense_keywords_binary(x):
    """Return BIAS if any of the subj_words is present, else ABSTAIN."""
    presence = any(re.search(fr"\b{word}\b", str(x), flags=re.I) is not None for word in past_tense_keywords)
    return 1 if presence else 0

@labeling_function()
def lf_present_tense_keywords_binary(x):
    """Return BIAS if any of the subj_words is present, else ABSTAIN."""
    presence = any(re.search(fr"\b{word}\b", str(x), flags=re.I) is not None for word in present_tense_keywords)
    return 1 if presence else 0

@labeling_function()
def lf_active_voice_keywords_binary(x):
    """Return BIAS if any of the subj_words is present, else ABSTAIN."""
    presence = any(re.search(fr"\b{word}\b", str(x), flags=re.I) is not None for word in active_voice_keywords)
    return 1 if presence else 0

keywords = [
    "maps", "county", "election", "coronavirus", "case",
    "risk", "cases", "covid", "latest", "trump",
    "ukraine", "russia", "war", "reminiscent",
    "removes", "proceed", "ponder"
]

keywords_pattern = "|".join(fr"\b{re.escape(keyword)}\b" for keyword in keywords)

@labeling_function()
def lf_keyword_maps_binary(x):
    return 1 if re.search(fr"\b{re.escape('maps')}\b", str(x), flags=re.I) else 0

@labeling_function()
def lf_keyword_county_binary(x):
    return 1 if re.search(fr"\b{re.escape('county')}\b", str(x), flags=re.I) else 0

@labeling_function()
def lf_keyword_election_binary(x):
    return 1 if re.search(fr"\b{re.escape('election')}\b", str(x), flags=re.I) else 0

@labeling_function()
def lf_keyword_coronavirus_binary(x):
    return 1 if re.search(fr"\b{re.escape('coronavirus')}\b", str(x), flags=re.I) else 0

@labeling_function()
def lf_keyword_case_binary(x):
    return 1 if re.search(fr"\b{re.escape('case')}\b", str(x), flags=re.I) else 0

@labeling_function()
def lf_keyword_risk_binary(x):
    return 1 if re.search(fr"\b{re.escape('risk')}\b", str(x), flags=re.I) else 0

@labeling_function()
def lf_keyword_cases_binary(x):
    return 1 if re.search(fr"\b{re.escape('cases')}\b", str(x), flags=re.I) else 0

@labeling_function()
def lf_keyword_covid_binary(x):
    return 1 if re.search(fr"\b{re.escape('covid')}\b", str(x), flags=re.I) else 0

@labeling_function()
def lf_keyword_latest_binary(x):
    return 1 if re.search(fr"\b{re.escape('latest')}\b", str(x), flags=re.I) else 0

@labeling_function()
def lf_keyword_trump_binary(x):
    return 1 if re.search(fr"\b{re.escape('trump')}\b", str(x), flags=re.I) else 0

@labeling_function()
def lf_keyword_ukraine_binary(x):
    return 1 if re.search(fr"\b{re.escape('ukraine')}\b", str(x), flags=re.I) else 0

@labeling_function()
def lf_keyword_russia_binary(x):
    return 1 if re.search(fr"\b{re.escape('russia')}\b", str(x), flags=re.I) else 0

@labeling_function()
def lf_keyword_war_binary(x):
    return 1 if re.search(fr"\b{re.escape('war')}\b", str(x), flags=re.I) else 0

@labeling_function()
def lf_keyword_reminiscent_binary(x):
    return 1 if re.search(fr"\b{re.escape('reminiscent')}\b", str(x), flags=re.I) else 0

@labeling_function()
def lf_keyword_removes_binary(x):
    return 1 if re.search(fr"\b{re.escape('removes')}\b", str(x), flags=re.I) else 0

@labeling_function()
def lf_keyword_proceed_binary(x):
    return 1 if re.search(fr"\b{re.escape('proceed')}\b", str(x), flags=re.I) else 0

@labeling_function()
def lf_keyword_ponder_binary(x):
    return 1 if re.search(fr"\b{re.escape('ponder')}\b", str(x), flags=re.I) else 0


In [6]:

# Define weights for each binary labeling function
weight_lf_keyword_my_binary = 0.1 # Lower this to 0.05 with neg words, raise to 0.2 without neg words move to 0.01 with other keywords
weight_lf_regex_fake_news_binary = 0.1
weight_lf_regex_subjective_binary = 0.1
weight_lf_long_combined_text_binary = 0.1
weight_lf_textblob_polarity_binary = 0.2 
weight_lf_textblob_subjectivity_binary = 0.4 # Up to 0.39 when individual keywords used
# Neg/pos words - Set weights to 0.00 for original model and put to 0.05 for subsequent pos and neg word tests
weight_lf_past_tense_keywords_binary = 0.00 
weight_lf_present_tense_keywords_binary = 0.00
weight_lf_active_voice_keywords_binary = 0.00
# sub 0.1
weight_lf_keyword_maps_binary = -0.01
weight_lf_keyword_county_binary = -0.01
weight_lf_keyword_election_binary = -0.01
weight_lf_keyword_coronavirus_binary = -0.01
weight_lf_keyword_case_binary = -0.01
weight_lf_keyword_risk_binary = -0.01
weight_lf_keyword_cases_binary = -0.01
weight_lf_keyword_covid_binary = -0.02
weight_lf_keyword_latest_binary = -0.01
# add 0.1
weight_lf_keyword_trump_binary = 0.03
weight_lf_keyword_ukraine_binary = 0.01
weight_lf_keyword_russia_binary = 0.01
weight_lf_keyword_war_binary = 0.01
weight_lf_keyword_reminiscent_binary = 0.01
weight_lf_keyword_removes_binary = 0.01
weight_lf_keyword_proceed_binary = 0.01
weight_lf_keyword_ponder_binary = 0.01

def combined_binary_bias_score(x):
    """Combine binary labeling functions into a linear equation."""
    lf1_score = lf_keyword_my_binary(x) * weight_lf_keyword_my_binary
    lf2_score = lf_regex_fake_news_binary(x) * weight_lf_regex_fake_news_binary
    lf3_score = lf_regex_subjective_binary(x) * weight_lf_regex_subjective_binary
    lf4_score = lf_long_combined_text_binary(x) * weight_lf_long_combined_text_binary
    lf5_score = lf_textblob_polarity_binary(x) * weight_lf_textblob_polarity_binary
    lf6_score = lf_textblob_subjectivity_binary(x) * weight_lf_textblob_subjectivity_binary
    # neg words
    lf7_score = lf_past_tense_keywords_binary(x) * weight_lf_past_tense_keywords_binary
    lf8_score = lf_present_tense_keywords_binary(x) * weight_lf_present_tense_keywords_binary
    lf9_score = lf_active_voice_keywords_binary(x) * weight_lf_active_voice_keywords_binary
    # keyword binary functions
    lf10_score = lf_keyword_maps_binary(x) * weight_lf_keyword_maps_binary
    lf11_score = lf_keyword_county_binary(x) * weight_lf_keyword_county_binary
    lf12_score = lf_keyword_election_binary(x) * weight_lf_keyword_election_binary
    lf13_score = lf_keyword_coronavirus_binary(x) * weight_lf_keyword_coronavirus_binary
    lf14_score = lf_keyword_case_binary(x) * weight_lf_keyword_case_binary
    lf15_score = lf_keyword_risk_binary(x) * weight_lf_keyword_risk_binary
    lf16_score = lf_keyword_cases_binary(x) * weight_lf_keyword_cases_binary
    lf17_score = lf_keyword_covid_binary(x) * weight_lf_keyword_covid_binary
    lf18_score = lf_keyword_latest_binary(x) * weight_lf_keyword_latest_binary
    lf19_score = lf_keyword_trump_binary(x) * weight_lf_keyword_trump_binary
    lf20_score = lf_keyword_ukraine_binary(x) * weight_lf_keyword_ukraine_binary
    lf21_score = lf_keyword_russia_binary(x) * weight_lf_keyword_russia_binary
    lf22_score = lf_keyword_war_binary(x) * weight_lf_keyword_war_binary
    lf23_score = lf_keyword_reminiscent_binary(x) * weight_lf_keyword_reminiscent_binary
    lf24_score = lf_keyword_removes_binary(x) * weight_lf_keyword_removes_binary
    lf25_score = lf_keyword_proceed_binary(x) * weight_lf_keyword_proceed_binary
    lf26_score = lf_keyword_ponder_binary(x) * weight_lf_keyword_ponder_binary

    # Combine scores with weights
    combined_score = (
            lf1_score + lf2_score + lf3_score + lf4_score +
            lf5_score + lf6_score + lf7_score + lf8_score + lf9_score +
            lf10_score + lf11_score + lf12_score + lf13_score +
            lf14_score + lf15_score + lf16_score + lf17_score +
            lf18_score + lf19_score + lf20_score + lf21_score +
            lf22_score + lf23_score + lf24_score + lf25_score + lf26_score
    )
    # Normalize to the range [0, 1]
    normalized_score = max(0, min(combined_score, 1))

    return normalized_score


In [7]:

# Function to predict and display outcomes
def predict_and_display_outcomes(user_input, show_sentiment_scores):
    # Tokenize and pad the input sequence
    tokenizer.fit_on_texts([user_input])
    sequence = tokenizer.texts_to_sequences([user_input])
    padded_sequence = pad_sequences(sequence, maxlen=max_length)

    # Display phrase with stopwords removed
    stop_words = set(stopwords.words('english'))
    word_tokens = word_tokenize(user_input)
    filtered_tokens = [word for word in word_tokens if word.lower() not in stop_words]
    filtered_phrase = " ".join(filtered_tokens)
    print(f"Phrase with stopwords removed: {filtered_phrase}")

    # Make a prediction using the neural net model
    model_score = np.argmax(model.predict(padded_sequence)) / 10
    print(f"Neural Net Model Tendency Towards Bias Score: {model_score:.2f}")

    # Use VADER sentiment analyzer
    vader_score_total = vader_analyzer.polarity_scores(user_input)['compound']
    vader_score_pos = vader_analyzer.polarity_scores(user_input)['pos']
    vader_score_neutral = vader_analyzer.polarity_scores(user_input)['neu']
    vader_score_neg = vader_analyzer.polarity_scores(user_input)['neg']

    # Call the previous function to display outcomes of labeling functions
    print("Labeling Function Outcomes:")
    print(lf_outcomes(user_input, model_score))

    combined_score = combined_binary_bias_score(user_input)
    # This sentiment score combines the predicted model score and combined train score, where train tends more towards bias and predicted errs on less bias
    # Their sum is a total that captures sentiment: closer to positive 1 indicates more positive sentiment whereas closer to negative 1 indicates more negative sentiment.
    sent_score = combined_score + model_score

    if vader_score_total < 0:
        sent_score *= -1
    elif vader_score_total > 0:
        sent_score *= 1



    print(f"Model Sentiment Score:, {sent_score:.4f}")
    print(f"VADER Total Sentiment Score:, {vader_score_total:.4f}")
    print(f"VADER Positive Score: {vader_score_pos:.4f}")
    print(f"VADER Neutral Score: {vader_score_neutral:.4f}")
    print(f"VADER Negative Score: {vader_score_neg:.4f}")

# Define a function to display outcomes of labeling functions
@st.cache_resource()
def lf_outcomes(user_input, model_score):
    # Labeling Function 1
    lf1_outcome = lf_keyword_my_binary(user_input)
    print(f"LF 1 - Keyword My Binary: Outcome - {lf1_outcome}, Score - {lf1_outcome * weight_lf_keyword_my_binary:.2f}")

    # Labeling Function 2
    lf2_outcome = lf_regex_fake_news_binary(user_input)
    print(f"LF 2 - Regex Fake News Binary: Outcome - {lf2_outcome}, Score - {lf2_outcome * weight_lf_regex_fake_news_binary:.2f}")

    # Labeling Function 3
    lf3_outcome = lf_regex_subjective_binary(user_input)
    print(f"LF 3 - Regex Subjective Binary: Outcome - {lf3_outcome}, Score - {lf3_outcome * weight_lf_regex_subjective_binary:.2f}")

    # Labeling Function 4
    lf4_outcome = lf_long_combined_text_binary(user_input)
    print(f"LF 4 - Long Combined Text Binary: Outcome - {lf4_outcome}, Score - {lf4_outcome * weight_lf_long_combined_text_binary:.2f}")

    # Labeling Function 5
    lf5_outcome = lf_textblob_polarity_binary(user_input)
    print(f"LF 5 - Textblob Polarity Binary: Outcome - {lf5_outcome}, Score - {lf5_outcome * weight_lf_textblob_polarity_binary:.2f}")

    # Labeling Function 6
    lf6_outcome = lf_textblob_subjectivity_binary(user_input)
    print(f"LF 6 - Textblob Subjective Binary: Outcome - {lf6_outcome}, Score - {lf6_outcome * weight_lf_textblob_subjectivity_binary:.2f}")

    # Labeling Function 7
    lf7_outcome = lf_past_tense_keywords_binary(user_input)
    print(f"LF 7 - Past Tense Keywords Binary: Outcome - {lf7_outcome}, Score - {lf7_outcome * weight_lf_past_tense_keywords_binary:.2f}")

    # Labeling Function 8
    lf8_outcome = lf_present_tense_keywords_binary(user_input)
    print(f"LF 8 - Present Tense Keywords Binary: Outcome - {lf8_outcome}, Score - {lf8_outcome * weight_lf_present_tense_keywords_binary:.2f}")

    # Labeling Function 9
    lf9_outcome = lf_active_voice_keywords_binary(user_input)
    print(f"LF 9 - Active Voice Keywords Binary: Outcome - {lf9_outcome}, Score - {lf9_outcome * weight_lf_active_voice_keywords_binary:.2f}")

    # Labeling Function 10
    lf10_outcome = lf_keyword_maps_binary(user_input)
    print(f"LF 10 - Keyword Maps Binary: Outcome - {lf10_outcome}, Score - {lf10_outcome * weight_lf_keyword_maps_binary:.2f}")

    # Labeling Function 11
    lf11_outcome = lf_keyword_county_binary(user_input)
    print(f"LF 11 - Keyword County Binary: Outcome - {lf11_outcome}, Score - {lf11_outcome * weight_lf_keyword_county_binary:.2f}")

    # Labeling Function 12
    lf12_outcome = lf_keyword_election_binary(user_input)
    print(f"LF 12 - Keyword Election Binary: Outcome - {lf12_outcome}, Score - {lf12_outcome * weight_lf_keyword_election_binary:.2f}")

    # Labeling Function 13
    lf13_outcome = lf_keyword_coronavirus_binary(user_input)
    print(f"LF 13 - Keyword Coronavirus Binary: Outcome - {lf13_outcome}, Score - {lf13_outcome * weight_lf_keyword_coronavirus_binary:.2f}")

    # Labeling Function 14
    lf14_outcome = lf_keyword_case_binary(user_input)
    print(f"LF 14 - Keyword Case Binary: Outcome - {lf14_outcome}, Score - {lf14_outcome * weight_lf_keyword_case_binary:.2f}")

    # Labeling Function 15
    lf15_outcome = lf_keyword_risk_binary(user_input)
    print(f"LF 15 - Keyword Risk Binary: Outcome - {lf15_outcome}, Score - {lf15_outcome * weight_lf_keyword_risk_binary:.2f}")

    # Labeling Function 16
    lf16_outcome = lf_keyword_cases_binary(user_input)
    print(f"LF 16 - Keyword Cases Binary: Outcome - {lf16_outcome}, Score - {lf16_outcome * weight_lf_keyword_cases_binary:.2f}")

    # Labeling Function 17
    lf17_outcome = lf_keyword_covid_binary(user_input)
    print(f"LF 17 - Keyword COVID Binary: Outcome - {lf17_outcome}, Score - {lf17_outcome * weight_lf_keyword_covid_binary:.2f}")

    # Labeling Function 18
    lf18_outcome = lf_keyword_latest_binary(user_input)
    print(f"LF 18 - Keyword Latest Binary: Outcome - {lf18_outcome}, Score - {lf18_outcome * weight_lf_keyword_latest_binary:.2f}")

    # Labeling Function 19
    lf19_outcome = lf_keyword_trump_binary(user_input)
    print(f"LF 19 - Keyword Trump Binary: Outcome - {lf19_outcome}, Score - {lf19_outcome * weight_lf_keyword_trump_binary:.2f}")

    # Labeling Function 20
    lf20_outcome = lf_keyword_ukraine_binary(user_input)
    print(f"LF 20 - Keyword Ukraine Binary: Outcome - {lf20_outcome}, Score - {lf20_outcome * weight_lf_keyword_ukraine_binary:.2f}")

    # Labeling Function 21
    lf21_outcome = lf_keyword_russia_binary(user_input)
    print(f"LF 21 - Keyword Russia Binary: Outcome - {lf21_outcome}, Score - {lf21_outcome * weight_lf_keyword_russia_binary:.2f}")

    # Labeling Function 22
    lf22_outcome = lf_keyword_war_binary(user_input)
    print(f"LF 22 - Keyword War Binary: Outcome - {lf22_outcome}, Score - {lf22_outcome * weight_lf_keyword_war_binary:.2f}")

    # Labeling Function 23
    lf23_outcome = lf_keyword_reminiscent_binary(user_input)
    print(f"LF 23 - Keyword Reminiscent Binary: Outcome - {lf23_outcome}, Score - {lf23_outcome * weight_lf_keyword_reminiscent_binary:.2f}")

    # Labeling Function 24
    lf24_outcome = lf_keyword_removes_binary(user_input)
    print(f"LF 24 - Keyword Removes Binary: Outcome - {lf24_outcome}, Score - {lf24_outcome * weight_lf_keyword_removes_binary:.2f}")

    # Labeling Function 25
    lf25_outcome = lf_keyword_proceed_binary(user_input)
    print(f"LF 25 - Keyword Proceed Binary: Outcome - {lf25_outcome}, Score - {lf25_outcome * weight_lf_keyword_proceed_binary:.2f}")

    # Labeling Function 26
    lf26_outcome = lf_keyword_ponder_binary(user_input)
    print(f"LF 26 - Keyword Ponder Binary: Outcome - {lf26_outcome}, Score - {lf26_outcome * weight_lf_keyword_ponder_binary:.2f}")

    # Combined Binary Bias Score
    combined_score = combined_binary_bias_score(user_input)
    print(f"Labeling Function Tendency Towards Bias Score: {combined_score:.2f}")


### Load in Dataframe

In [10]:
df = pd.read_csv('/Users/ben/Desktop/DSI_GA_Materials/capstone/Capstone_Project_backup/Data/sampled_test_hand_labeled.csv')

In [11]:
df.head()

Unnamed: 0.1,Unnamed: 0,text,label
0,0,militants engaged former officials hamid karza...,1
1,1,american spirit gets lebanese makeover beirut ...,1
2,2,pleasant protest tyranny workplace productivit...,1
3,3,see latest charts maps coronavirus cases death...,0
4,4,biden administration said measures would degra...,1


In [16]:
df['model_scores'] = df['text'].apply(lambda x: predict_and_display_outcomes(x, combined_binary_bias_score))

Phrase with stopwords removed: militants engaged former officials hamid karzai abdullah abdullah well moscow seek help building inclusive government cementing rule chaos persists kabul airport taliban discuss new government


2024-03-07 11:03:01.893 
  command:

    streamlit run /Users/ben/miniforge3/lib/python3.10/site-packages/ipykernel_launcher.py [ARGUMENTS]


Neural Net Model Tendency Towards Bias Score: 0.20
Labeling Function Outcomes:
LF 1 - Keyword My Binary: Outcome - 1, Score - 0.20
LF 2 - Regex Fake News Binary: Outcome - 0, Score - 0.00
LF 3 - Regex Subjective Binary: Outcome - 0, Score - 0.00
LF 4 - Long Combined Text Binary: Outcome - 1, Score - 0.10
LF 5 - Textblob Polarity Binary: Outcome - 0, Score - 0.00
LF 6 - Textblob Subjective Binary: Outcome - 0, Score - 0.00
LF 7 - Past Tense Keywords Binary: Outcome - 0, Score - 0.00
LF 8 - Present Tense Keywords Binary: Outcome - 0, Score - 0.00
LF 9 - Active Voice Keywords Binary: Outcome - 0, Score - 0.00
LF 10 - Keyword Maps Binary: Outcome - 0, Score - -0.00
LF 11 - Keyword County Binary: Outcome - 0, Score - -0.00
LF 12 - Keyword Election Binary: Outcome - 0, Score - -0.00
LF 13 - Keyword Coronavirus Binary: Outcome - 0, Score - -0.00
LF 14 - Keyword Case Binary: Outcome - 0, Score - -0.00
LF 15 - Keyword Risk Binary: Outcome - 0, Score - -0.00
LF 16 - Keyword Cases Binary: Outcome

In [17]:
df.head()

Unnamed: 0.1,Unnamed: 0,text,label,model_scores
0,0,militants engaged former officials hamid karza...,1,
1,1,american spirit gets lebanese makeover beirut ...,1,
2,2,pleasant protest tyranny workplace productivit...,1,
3,3,see latest charts maps coronavirus cases death...,0,
4,4,biden administration said measures would degra...,1,


In [13]:
# Apply labeling functions and store the results in new columns
# Convert each text entry to a list
df['text_list'] = df['text'].apply(lambda x: [x])

# Apply your model to each row of the DataFrame
# Assuming your model takes a list of strings as input
df['model_scores'] = df['text_list'].apply(lambda x: model.predict(x)[0][0])

df['lf_scores'] = df['text'].apply(lambda x: lf_outcomes(x, model_score))
df['vader_scores'] = df['text'].apply(lambda x: vader_analyze(x))  # Use your VADER analysis function here

ValueError: in user code:

    File "/Users/ben/miniforge3/lib/python3.10/site-packages/keras/src/engine/training.py", line 2440, in predict_function  *
        return step_function(self, iterator)
    File "/Users/ben/miniforge3/lib/python3.10/site-packages/keras/src/engine/training.py", line 2425, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/ben/miniforge3/lib/python3.10/site-packages/keras/src/engine/training.py", line 2413, in run_step  **
        outputs = model.predict_step(data)
    File "/Users/ben/miniforge3/lib/python3.10/site-packages/keras/src/engine/training.py", line 2381, in predict_step
        return self(x, training=False)
    File "/Users/ben/miniforge3/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/Users/ben/miniforge3/lib/python3.10/site-packages/keras/src/engine/input_spec.py", line 235, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer 'sequential' (type Sequential).
    
    Input 0 of layer "bidirectional" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 100)
    
    Call arguments received by layer 'sequential' (type Sequential):
      • inputs=tf.Tensor(shape=(None,), dtype=string)
      • training=False
      • mask=None


In [14]:
df['lf_scores'] = df['text'].apply(lambda x: lf_outcomes(x, model_score))
df['vader_scores'] = df['text'].apply(lambda x: vader_analyze(x))  # Use your VADER analysis function here

NameError: name 'model_score' is not defined