In [11]:
import pandas as pd

import torch
from transformers import RobertaTokenizer, RobertaForSequenceClassification
import numpy as np

In [12]:
# Load tokenizer
tokenizer = RobertaTokenizer.from_pretrained("roberta-base", do_lower_case=True)

# Load your fine-tuned model from the local directory
model_path = "/Users/scrubscrub/ppol6801/final_project/econ_classification_model"
my_model = RobertaForSequenceClassification.from_pretrained(model_path)

# Move model to CPU
device = torch.device("cpu")
my_model = my_model.to(device)

# Put model in evaluation mode
my_model.eval()

# Define label dictionary
label_dict = {3: "inflation", 4: "national debt", 0: "GDP", 2: "housing", 5: "stock market", 1: "employment/wages"}

max_len = 200
def predict(sentence):
    sentence = tokenizer.encode_plus(sentence, return_tensors='pt', max_length=max_len, truncation=True)
    outputs = my_model(sentence["input_ids"], attention_mask=sentence["attention_mask"])
    outputs = outputs[0].detach().numpy()
    predicted_label = np.argmax(outputs)
    label = label_dict[predicted_label]
    return predicted_label, label

predict("help i cant afford any groceries its so expensive")


(3, 'inflation')

In [13]:
econ_news = pd.read_csv('data/econ_news.csv')

#replacing null source values with the source.name value if there is one
for index, row in econ_news.iterrows():
    if pd.isna(row['source']) and not pd.isna(row['source.name']):
        econ_news.at[index, 'source'] = row['source.name']
        
#replacing null published date values with the publishedAt value if there is one
for index, row in econ_news.iterrows():
    if pd.isna(row['published date']) and not pd.isna(row['publishedAt']):
        econ_news.at[index, 'published date'] = row['publishedAt']
        
# replacing null values in extracted_content with the description
for index, row in econ_news.iterrows():
    if pd.isna(row['extracted_content']) and not pd.isna(row['description']):
        econ_news.at[index, 'extracted_content'] = row['description']
        
econ_news

Unnamed: 0.1,Unnamed: 0,title,description,published date,url,source,extracted_content,author,urlToImage,publishedAt,content,source.id,source.name
0,0.0,"Circular economy: definition, importance and b...","Circular economy: definition, importance and b...","Wed, 24 May 2023 07:00:00 GMT",https://news.google.com/rss/articles/CBMidWh0d...,European Parliament,Benefits: why do we need to switch to a circul...,,,,,,
1,1.0,ICYMI: California Poised to Become World's 4th...,ICYMI: California Poised to Become World's 4th...,"Mon, 24 Oct 2022 07:00:00 GMT",https://news.google.com/rss/articles/CBMiX2h0d...,Office of Governor Gavin Newsom,ICYMI: California Poised to Become World's 4th...,,,,,,
2,2.0,A snapshot of a new working-from-home economy ...,A snapshot of a new working-from-home economy ...,"Mon, 29 Jun 2020 07:00:00 GMT",https://news.google.com/rss/articles/CBMiR2h0d...,Stanford University News,"The new “working-from-home economy,” which is ...",,,,,,
3,3.0,Recreation Economy for Rural Communities | US ...,Recreation Economy for Rural Communities | US ...,"Wed, 06 Dec 2023 08:00:00 GMT",https://news.google.com/rss/articles/CBMiRGh0d...,U.S. EPA.gov,The Recreation Economy for Rural Communities p...,,,,,,
4,4.0,The Political Economy of Economic Policy - IMF...,The Political Economy of Economic Policy - IMF...,"Fri, 10 Jun 2022 22:51:41 GMT",https://news.google.com/rss/articles/CBMiamh0d...,International Monetary Fund,5 min (1403 words) Read\n\nDownload PDF\n\nWe ...,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4119,,'Bidenomics': Some House Democrats cool on Joe...,'Bidenomics': Some House Democrats cool on Joe...,"Tue, 29 Aug 2023 07:00:00 GMT",https://news.google.com/rss/articles/CBMiemh0d...,USA TODAY,"STRATHAM, N.H. − While President Joe Biden tou...",,,,,,
4120,,Yale professor: Biden's economy most successfu...,Yale professor: Biden's economy most successfu...,"Tue, 25 Jul 2023 07:00:00 GMT",https://news.google.com/rss/articles/CBMiemh0d...,MSNBC,MSNBC's Lawrence O'Donnell is joined by Yale P...,,,,,,
4121,,Bidenomics: White House adviser Jared Bernstei...,Bidenomics: White House adviser Jared Bernstei...,"Wed, 28 Jun 2023 07:00:00 GMT",https://news.google.com/rss/articles/CBMid2h0d...,NPR,Remember Reaganomics? Freakonomics? Now there'...,,,,,,
4122,,The Big Squeeze: How Biden's Environmental Jus...,The Big Squeeze: How Biden's Environmental Jus...,"Thu, 07 Sep 2023 07:00:00 GMT",https://news.google.com/rss/articles/CBMiaWh0d...,Manhattan Institute,Executive Summary\n\nFrom its first week in of...,,,,,,


In [14]:
econ_news.dropna(subset=['extracted_content'], inplace=True)
econ_news.isna().sum()

econ_news['predicted_label'] = econ_news['extracted_content'].apply(predict)

In [15]:
econ_news

Unnamed: 0.1,Unnamed: 0,title,description,published date,url,source,extracted_content,author,urlToImage,publishedAt,content,source.id,source.name,predicted_label
0,0.0,"Circular economy: definition, importance and b...","Circular economy: definition, importance and b...","Wed, 24 May 2023 07:00:00 GMT",https://news.google.com/rss/articles/CBMidWh0d...,European Parliament,Benefits: why do we need to switch to a circul...,,,,,,,"(0, GDP)"
1,1.0,ICYMI: California Poised to Become World's 4th...,ICYMI: California Poised to Become World's 4th...,"Mon, 24 Oct 2022 07:00:00 GMT",https://news.google.com/rss/articles/CBMiX2h0d...,Office of Governor Gavin Newsom,ICYMI: California Poised to Become World's 4th...,,,,,,,"(0, GDP)"
2,2.0,A snapshot of a new working-from-home economy ...,A snapshot of a new working-from-home economy ...,"Mon, 29 Jun 2020 07:00:00 GMT",https://news.google.com/rss/articles/CBMiR2h0d...,Stanford University News,"The new “working-from-home economy,” which is ...",,,,,,,"(0, GDP)"
3,3.0,Recreation Economy for Rural Communities | US ...,Recreation Economy for Rural Communities | US ...,"Wed, 06 Dec 2023 08:00:00 GMT",https://news.google.com/rss/articles/CBMiRGh0d...,U.S. EPA.gov,The Recreation Economy for Rural Communities p...,,,,,,,"(2, housing)"
4,4.0,The Political Economy of Economic Policy - IMF...,The Political Economy of Economic Policy - IMF...,"Fri, 10 Jun 2022 22:51:41 GMT",https://news.google.com/rss/articles/CBMiamh0d...,International Monetary Fund,5 min (1403 words) Read\n\nDownload PDF\n\nWe ...,,,,,,,"(3, inflation)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4119,,'Bidenomics': Some House Democrats cool on Joe...,'Bidenomics': Some House Democrats cool on Joe...,"Tue, 29 Aug 2023 07:00:00 GMT",https://news.google.com/rss/articles/CBMiemh0d...,USA TODAY,"STRATHAM, N.H. − While President Joe Biden tou...",,,,,,,"(0, GDP)"
4120,,Yale professor: Biden's economy most successfu...,Yale professor: Biden's economy most successfu...,"Tue, 25 Jul 2023 07:00:00 GMT",https://news.google.com/rss/articles/CBMiemh0d...,MSNBC,MSNBC's Lawrence O'Donnell is joined by Yale P...,,,,,,,"(0, GDP)"
4121,,Bidenomics: White House adviser Jared Bernstei...,Bidenomics: White House adviser Jared Bernstei...,"Wed, 28 Jun 2023 07:00:00 GMT",https://news.google.com/rss/articles/CBMid2h0d...,NPR,Remember Reaganomics? Freakonomics? Now there'...,,,,,,,"(0, GDP)"
4122,,The Big Squeeze: How Biden's Environmental Jus...,The Big Squeeze: How Biden's Environmental Jus...,"Thu, 07 Sep 2023 07:00:00 GMT",https://news.google.com/rss/articles/CBMiaWh0d...,Manhattan Institute,Executive Summary\n\nFrom its first week in of...,,,,,,,"(3, inflation)"


In [16]:
econ_news.to_csv('data/classified_econ_news.csv')