In [1]:
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use('ggplot')

# load model and tokenizer
roberta = "cardiffnlp/twitter-roberta-base-sentiment"
model = AutoModelForSequenceClassification.from_pretrained(roberta)
tokenizer = AutoTokenizer.from_pretrained(roberta)


In [2]:



# testing our sentiment analysis function
def sentiment_analysis(tweet):
# precprcess tweet
    tweet_words = []

    for word in tweet.split(' '):
        if word.startswith('@') and len(word) > 1:
            word = '@user'
        
        elif word.startswith('http'):
            word = "http"
        tweet_words.append(word)

    tweet_proc = " ".join(tweet_words)


    labels = ['Negative', 'Neutral', 'Positive']

    # sentiment analysis
    encoded_tweet = tokenizer(tweet_proc, return_tensors='pt')
    # output = model(encoded_tweet['input_ids'], encoded_tweet['attention_mask'])
    output = model(**encoded_tweet)

    scores = output[0][0].detach().numpy()
    scores = softmax(scores)

    for i in range(len(scores)):
        
        l = labels[i]
        s = scores[i]
        print(l,s)

    # Calculate compound score
    compound_score = np.dot(scores, [-1, 0, 1])
    print(f"Compound Score: {compound_score:.4}")

sentiment_analysis("testing out our wondefull  tweet, with roberta model")


Negative 0.012825836
Neutral 0.73449975
Positive 0.2526744
Compound Score: 0.2398


In [4]:
# Define the file paths
file_path = 'Final_NYT_articles.csv'

# Read the CSV files
NYT_articles = pd.read_csv(file_path)

NYT_articles.head()





Unnamed: 0,pub_date,headline,abstract,lead_paragraph,byline,section_name,web_url,source,multimedia_url
0,2024-01-30T13:40:11+0000,Saudi Aramco Abruptly Drops Plans to Expand Oi...,"The pullback, at the behest of the Saudi gover...",Saudi Aramco said Tuesday that it would call o...,By Stanley Reed,Business Day,https://www.nytimes.com/2024/01/30/business/sa...,The New York Times,https://www.nytimes.com/images/2024/01/30/mult...
1,2024-01-29T10:01:56+0000,Where Southerners Go to Fill the Tank and Feed...,Are they gas stations that serve food or resta...,New York City has its bodegas. The South has i...,By Kim Severson,Food,https://www.nytimes.com/2024/01/29/dining/sout...,The New York Times,https://www.nytimes.com/images/2024/01/31/mult...
2,2024-01-27T08:00:40+0000,Democrats Say Biden Hasn’t ‘Made the Case’ on ...,The president likes to talk about his climate ...,President Biden has done more than any preside...,By Lisa Friedman,Climate,https://www.nytimes.com/2024/01/27/climate/bid...,The New York Times,https://www.nytimes.com/images/2024/01/26/mult...
3,2024-01-24T19:59:53+0000,Shipping Costs Soar in Wake of Red Sea Attacks,Ships risking Houthi attacks must pay high ins...,"For about two months, a barrage of missile and...",By Stanley Reed,Business Day,https://www.nytimes.com/2024/01/24/business/re...,The New York Times,https://www.nytimes.com/images/2024/01/23/mult...
4,2024-01-24T17:59:40+0000,White House Said to Delay Decision on Enormous...,"Before deciding whether to approve it, the Ene...",The Biden administration is pausing a decision...,By Coral Davenport,Climate,https://www.nytimes.com/2024/01/24/climate/bid...,The New York Times,https://www.nytimes.com/images/2024/01/24/mult...


In [5]:
NYT_articles.insert(0, 'article_count', range(1, len(NYT_articles) + 1))
NYT_articles.tail()

Unnamed: 0,article_count,pub_date,headline,abstract,lead_paragraph,byline,section_name,web_url,source,multimedia_url
144,145,2024-06-01T11:00:06+0000,Oil and Gas Companies Are Trying to Rig the Ma...,Fossil fuel interests are spreading misinforma...,Many of us focused on the problem of climate c...,By Andrew Dessler,Opinion,https://www.nytimes.com/2024/06/01/opinion/cle...,The New York Times,https://www.nytimes.com/images/2024/06/03/opin...
145,146,2024-07-07T19:55:06+0000,"Officials Urge Coastal Texans to Evacuate, but...",Despite warnings that Beryl could be a “deadly...,As Beryl chugged toward the Texas Gulf Coast o...,"By Edgar Sandoval, Maria Jimenez Moya and Jack...",U.S.,https://www.nytimes.com/2024/07/07/us/texas-ev...,The New York Times,https://www.nytimes.com/images/2024/07/07/mult...
146,147,2024-07-07T09:00:16+0000,New Plan to Target Russia’s Oil Revenue Brings...,Treasury officials want to impose penalties on...,Officials in President Biden’s Treasury Depart...,By Jim Tankersley and Alan Rappeport,U.S.,https://www.nytimes.com/2024/07/07/us/politics...,The New York Times,https://www.nytimes.com/images/2024/06/25/mult...
147,148,2024-07-06T10:00:05+0000,"Facing New ‘Greenwashing’ Law, an Oil Industry...",Oil sands companies pushing a carbon capture p...,When Parliament passed a law last month bannin...,By Ian Austen,World,https://www.nytimes.com/2024/07/06/world/canad...,The New York Times,https://www.nytimes.com/images/2024/07/06/mult...
148,149,2024-07-02T01:23:39+0000,Judge Orders Biden Administration to Resume Pe...,President Biden had paused new natural gas exp...,A federal judge on Monday ordered the Biden ad...,By Coral Davenport,Climate,https://www.nytimes.com/2024/07/01/climate/fed...,The New York Times,https://www.nytimes.com/images/2024/07/01/mult...


In [6]:
example = NYT_articles["headline"][4]
print(f"Example: {example}")
def polarity_scores_roberta(example):
    encoded_text = tokenizer(example, return_tensors='pt')
    output = model(**encoded_text)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)
    scores_dict = {
        'negative(sentiment_score_headline)' : scores[0],
        'neutral(sentiment_score_headline)' : scores[1],
        'positive(sentiment_score_headline)' : scores[2],
        'compound(sentiment_score_headline)' : np.dot(scores, [-1, 0, 1]) 
    }
    return scores_dict

polarity_scores_roberta(example)

Example: White House Said to Delay Decision on Enormous Natural Gas Export Terminal


{'negative(sentiment_score_headline)': 0.19727972,
 'neutral(sentiment_score_headline)': 0.7822566,
 'positive(sentiment_score_headline)': 0.02046365,
 'compound(sentiment_score_headline)': -0.17681607231497765}

In [7]:
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax
from tqdm import tqdm
res = {}
for i, row in tqdm(NYT_articles.iterrows(), total=len(NYT_articles)):
    try:
        text = row['headline']
        myid = row['article_count']
        res[myid] = polarity_scores_roberta(text)
    except RuntimeError:
        print(f'Broke for id {myid}')


100%|██████████| 149/149 [00:04<00:00, 34.41it/s]


In [8]:
results_df = pd.DataFrame(res).T
results_df = results_df.reset_index().rename(columns={'index': 'article_count'})
results_df = results_df.merge(NYT_articles, how='left')
results_df.head()

Unnamed: 0,article_count,negative(sentiment_score_headline),neutral(sentiment_score_headline),positive(sentiment_score_headline),compound(sentiment_score_headline),pub_date,headline,abstract,lead_paragraph,byline,section_name,web_url,source,multimedia_url
0,1,0.368058,0.601618,0.030324,-0.337734,2024-01-30T13:40:11+0000,Saudi Aramco Abruptly Drops Plans to Expand Oi...,"The pullback, at the behest of the Saudi gover...",Saudi Aramco said Tuesday that it would call o...,By Stanley Reed,Business Day,https://www.nytimes.com/2024/01/30/business/sa...,The New York Times,https://www.nytimes.com/images/2024/01/30/mult...
1,2,0.166938,0.778636,0.054426,-0.112513,2024-01-29T10:01:56+0000,Where Southerners Go to Fill the Tank and Feed...,Are they gas stations that serve food or resta...,New York City has its bodegas. The South has i...,By Kim Severson,Food,https://www.nytimes.com/2024/01/29/dining/sout...,The New York Times,https://www.nytimes.com/images/2024/01/31/mult...
2,3,0.505036,0.465767,0.029197,-0.475839,2024-01-27T08:00:40+0000,Democrats Say Biden Hasn’t ‘Made the Case’ on ...,The president likes to talk about his climate ...,President Biden has done more than any preside...,By Lisa Friedman,Climate,https://www.nytimes.com/2024/01/27/climate/bid...,The New York Times,https://www.nytimes.com/images/2024/01/26/mult...
3,4,0.418735,0.560968,0.020298,-0.398437,2024-01-24T19:59:53+0000,Shipping Costs Soar in Wake of Red Sea Attacks,Ships risking Houthi attacks must pay high ins...,"For about two months, a barrage of missile and...",By Stanley Reed,Business Day,https://www.nytimes.com/2024/01/24/business/re...,The New York Times,https://www.nytimes.com/images/2024/01/23/mult...
4,5,0.19728,0.782257,0.020464,-0.176816,2024-01-24T17:59:40+0000,White House Said to Delay Decision on Enormous...,"Before deciding whether to approve it, the Ene...",The Biden administration is pausing a decision...,By Coral Davenport,Climate,https://www.nytimes.com/2024/01/24/climate/bid...,The New York Times,https://www.nytimes.com/images/2024/01/24/mult...


In [9]:
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax
from tqdm import tqdm

# Load the model and tokenizer
model_name = "cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Define function to calculate polarity scores for abstracts
def polarity_scores_roberta_abstract(example):
    encoded_text = tokenizer(example, return_tensors='pt')
    output = model(**encoded_text)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)
    scores_dict = {
        'negative(sentiment_score_abstract)': scores[0],
        'neutral(sentiment_score_abstract)': scores[1],
        'positive(sentiment_score_abstract)': scores[2],
        'compound(sentiment_score_abstract)': np.dot(scores, [-1, 0, 1])
    }
    return scores_dict

# Load the results DataFrame (assuming it's named results_df)
# results_df = pd.read_csv('path_to_your_csv_file.csv') # Uncomment and set the correct path to your file

# Calculate sentiment scores for abstracts and store in a dictionary
res_abstract = {}
for i, row in tqdm(results_df.iterrows(), total=len(results_df)):
    try:
        text = row['abstract']
        myid = row['article_count']
        res_abstract[myid] = polarity_scores_roberta_abstract(text)
    except RuntimeError:
        print(f'Broke for id {myid}')

# Convert the results dictionary to a DataFrame
results_df_abstract = pd.DataFrame(res_abstract).T.reset_index().rename(columns={'index': 'article_count'})

# Ensure article_count is the same type in both DataFrames
results_df['article_count'] = results_df['article_count'].astype(str)
results_df_abstract['article_count'] = results_df_abstract['article_count'].astype(str)

# Merge the new DataFrame with the original one
results_df_final = results_df.merge(results_df_abstract, on='article_count', how='left')

# Display the head of the final DataFrame
results_df_final.head()


100%|██████████| 149/149 [00:04<00:00, 30.58it/s]


Unnamed: 0,article_count,negative(sentiment_score_headline),neutral(sentiment_score_headline),positive(sentiment_score_headline),compound(sentiment_score_headline),pub_date,headline,abstract,lead_paragraph,byline,section_name,web_url,source,multimedia_url,negative(sentiment_score_abstract),neutral(sentiment_score_abstract),positive(sentiment_score_abstract),compound(sentiment_score_abstract)
0,1,0.368058,0.601618,0.030324,-0.337734,2024-01-30T13:40:11+0000,Saudi Aramco Abruptly Drops Plans to Expand Oi...,"The pullback, at the behest of the Saudi gover...",Saudi Aramco said Tuesday that it would call o...,By Stanley Reed,Business Day,https://www.nytimes.com/2024/01/30/business/sa...,The New York Times,https://www.nytimes.com/images/2024/01/30/mult...,0.425521,0.555502,0.018976,-0.406545
1,2,0.166938,0.778636,0.054426,-0.112513,2024-01-29T10:01:56+0000,Where Southerners Go to Fill the Tank and Feed...,Are they gas stations that serve food or resta...,New York City has its bodegas. The South has i...,By Kim Severson,Food,https://www.nytimes.com/2024/01/29/dining/sout...,The New York Times,https://www.nytimes.com/images/2024/01/31/mult...,0.037574,0.847687,0.114738,0.077164
2,3,0.505036,0.465767,0.029197,-0.475839,2024-01-27T08:00:40+0000,Democrats Say Biden Hasn’t ‘Made the Case’ on ...,The president likes to talk about his climate ...,President Biden has done more than any preside...,By Lisa Friedman,Climate,https://www.nytimes.com/2024/01/27/climate/bid...,The New York Times,https://www.nytimes.com/images/2024/01/26/mult...,0.690003,0.280284,0.029713,-0.660291
3,4,0.418735,0.560968,0.020298,-0.398437,2024-01-24T19:59:53+0000,Shipping Costs Soar in Wake of Red Sea Attacks,Ships risking Houthi attacks must pay high ins...,"For about two months, a barrage of missile and...",By Stanley Reed,Business Day,https://www.nytimes.com/2024/01/24/business/re...,The New York Times,https://www.nytimes.com/images/2024/01/23/mult...,0.638661,0.350388,0.010952,-0.627709
4,5,0.19728,0.782257,0.020464,-0.176816,2024-01-24T17:59:40+0000,White House Said to Delay Decision on Enormous...,"Before deciding whether to approve it, the Ene...",The Biden administration is pausing a decision...,By Coral Davenport,Climate,https://www.nytimes.com/2024/01/24/climate/bid...,The New York Times,https://www.nytimes.com/images/2024/01/24/mult...,0.096726,0.87878,0.024494,-0.072232


In [10]:
results_df_final.head()

Unnamed: 0,article_count,negative(sentiment_score_headline),neutral(sentiment_score_headline),positive(sentiment_score_headline),compound(sentiment_score_headline),pub_date,headline,abstract,lead_paragraph,byline,section_name,web_url,source,multimedia_url,negative(sentiment_score_abstract),neutral(sentiment_score_abstract),positive(sentiment_score_abstract),compound(sentiment_score_abstract)
0,1,0.368058,0.601618,0.030324,-0.337734,2024-01-30T13:40:11+0000,Saudi Aramco Abruptly Drops Plans to Expand Oi...,"The pullback, at the behest of the Saudi gover...",Saudi Aramco said Tuesday that it would call o...,By Stanley Reed,Business Day,https://www.nytimes.com/2024/01/30/business/sa...,The New York Times,https://www.nytimes.com/images/2024/01/30/mult...,0.425521,0.555502,0.018976,-0.406545
1,2,0.166938,0.778636,0.054426,-0.112513,2024-01-29T10:01:56+0000,Where Southerners Go to Fill the Tank and Feed...,Are they gas stations that serve food or resta...,New York City has its bodegas. The South has i...,By Kim Severson,Food,https://www.nytimes.com/2024/01/29/dining/sout...,The New York Times,https://www.nytimes.com/images/2024/01/31/mult...,0.037574,0.847687,0.114738,0.077164
2,3,0.505036,0.465767,0.029197,-0.475839,2024-01-27T08:00:40+0000,Democrats Say Biden Hasn’t ‘Made the Case’ on ...,The president likes to talk about his climate ...,President Biden has done more than any preside...,By Lisa Friedman,Climate,https://www.nytimes.com/2024/01/27/climate/bid...,The New York Times,https://www.nytimes.com/images/2024/01/26/mult...,0.690003,0.280284,0.029713,-0.660291
3,4,0.418735,0.560968,0.020298,-0.398437,2024-01-24T19:59:53+0000,Shipping Costs Soar in Wake of Red Sea Attacks,Ships risking Houthi attacks must pay high ins...,"For about two months, a barrage of missile and...",By Stanley Reed,Business Day,https://www.nytimes.com/2024/01/24/business/re...,The New York Times,https://www.nytimes.com/images/2024/01/23/mult...,0.638661,0.350388,0.010952,-0.627709
4,5,0.19728,0.782257,0.020464,-0.176816,2024-01-24T17:59:40+0000,White House Said to Delay Decision on Enormous...,"Before deciding whether to approve it, the Ene...",The Biden administration is pausing a decision...,By Coral Davenport,Climate,https://www.nytimes.com/2024/01/24/climate/bid...,The New York Times,https://www.nytimes.com/images/2024/01/24/mult...,0.096726,0.87878,0.024494,-0.072232


In [11]:
NYT_articles_results = results_df_final

# Save the merged DataFrame to a new CSV file
output_path = 'NYT_articles_results.csv'
NYT_articles_results.to_csv(output_path, index=False)

print(f"The merged DataFrame has been saved to {output_path}")

The merged DataFrame has been saved to NYT_articles_results.csv


In [12]:
NYT_articles_results["multimedia_url"][8]

'https://www.nytimes.com/images/2024/01/19/multimedia/19ukraine-oil-mhbj/19ukraine-oil-mhbj-articleLarge.jpg'

In [13]:
file_path = 'Final_X_articles.csv'

# Read the CSV files
X_articles = pd.read_csv(file_path)

X_articles.tail()


Unnamed: 0,tweet_count,tweet.created_at,tweet.user.name,tweet.user.location,tweet.user.followers_count,tweet.text,tweet.view_count,tweet.favorite_count,tweet.retweet_count,tweet.reply_count,tweet.media
1726,1727,2024-07-09 18:17:04+00:00,Asia Fact Check Lab,,148,A claim emerged among Chinese netizens in June...,66,3,1,0,"[{'display_url': 'pic.x.com/znday8bmoj', 'expa..."
1727,1728,2024-07-09 18:28:45+00:00,House Appropriations,"Washington, DC",22922,The FY25 Interior and Environment Appropriatio...,1787,9,2,4,"[{'display_url': 'pic.x.com/hwidzcew02', 'expa..."
1728,1729,2024-07-09 18:45:17+00:00,EIA,"Washington, DC",181421,☝️Correction: #Cryptocurrency mining. Apologie...,2245,1,0,0,
1729,1730,2024-07-09 19:46:22+00:00,GetWireless,Minnesota,693,💡100% of the top 20 U.S. oil and gas producers...,26,0,0,0,"[{'display_url': 'pic.x.com/ezsk6nc95w', 'expa..."
1730,1731,2024-07-09 19:50:02+00:00,EIA,"Washington, DC",181421,As we continue to highlight the #STEO forecast...,9149,8,3,1,


In [14]:
example = X_articles["tweet.text"][13]
print(f"Example: {example}")
def polarity_scores_roberta(example):
    encoded_text = tokenizer(example, return_tensors='pt')
    output = model(**encoded_text)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)
    scores_dict = {
        'negative(sentiment_score)' : scores[0],
        'neutral(sentiment_score)' : scores[1],
        'positive(sentiment_score)' : scores[2],
        'compound(sentiment_score)' : np.dot(scores, [-1, 0, 1]) 
    }
    return scores_dict

polarity_scores_roberta(example)

Example: #TodayInEnergy - U.S. #HenryHub #naturalgasprices in 2023 were the lowest since mid-2020   https://t.co/EGSbAKi2jP https://t.co/rObAaluhMQ


{'negative(sentiment_score)': 0.43668902,
 'neutral(sentiment_score)': 0.5049686,
 'positive(sentiment_score)': 0.058342475,
 'compound(sentiment_score)': -0.3783465437591076}

In [15]:
res = {}
for i, row in tqdm(X_articles.iterrows(), total=len(X_articles)):
    try:
        text = row['tweet.text']
        myid = row['tweet_count']
        res[myid] = polarity_scores_roberta(text)
    except RuntimeError:
        print(f'Broke for id {myid}')

 34%|███▍      | 595/1731 [00:33<01:03, 17.80it/s]


KeyboardInterrupt: 

In [None]:
X_articles_results = pd.DataFrame(res).T
X_articles_results = X_articles_results.reset_index().rename(columns={'index': 'tweet_count'})
X_articles_results = X_articles_results.merge(X_articles, how='left')
X_articles_results.head()

Unnamed: 0,tweet_count,negative(sentiment_score),neutral(sentiment_score),positive(sentiment_score),compound(sentiment_score),tweet.created_at,tweet.user.name,tweet.user.location,tweet.user.followers_count,tweet.text,tweet.view_count,tweet.favorite_count,tweet.retweet_count,tweet.reply_count,tweet.media
0,1,0.224576,0.714257,0.061168,-0.163408,2024-01-02 13:10:06+00:00,EIA,"Washington, DC",181422,#TodayInEnergy - #Brent crude #oilprices avera...,20679,30,17,0,"[{'display_url': 'pic.x.com/gexe4zlndm', 'expa..."
1,2,0.004042,0.304035,0.691923,0.687881,2024-01-02 18:11:37+00:00,EIA,"Washington, DC",181422,Reminder: #TodayInEnergy will see some big cha...,6858,15,10,1,
2,3,0.177734,0.775157,0.047109,-0.130626,2024-01-02 19:04:55+00:00,EIA,"Washington, DC",181422,Annual average price of Brent crude #oil: \n\n...,11759,27,9,1,"[{'display_url': 'pic.x.com/iaqlwlrdst', 'expa..."
3,4,0.036057,0.877669,0.086274,0.050218,2024-01-02 21:09:05+00:00,EIA,"Washington, DC",181422,We're adjusting our data release schedule due ...,3671,7,1,1,"[{'display_url': 'pic.x.com/nnvwtlnraj', 'expa..."
4,5,0.194202,0.72231,0.083488,-0.110714,2024-01-02 21:30:01+00:00,EIA,"Washington, DC",181422,Comparing September 2023 to September 2022:\n\...,3956,15,3,1,


In [16]:
# Save the merged DataFrame to a new CSV file
output_path = 'X_articles_results.csv'
X_articles_results.to_csv(output_path, index=False)

print(f"The merged DataFrame has been saved to {output_path}")

NameError: name 'X_articles_results' is not defined

In [None]:
X_articles_results["tweet.media"][8]

NameError: name 'X_articles_results' is not defined

In [20]:
import pandas as pd
import json

# Load the CSV file into a DataFrame
file_path = 'C:\\Users\\aerok\\OneDrive\\Рабочий стол\\Socioeconomic Dynamics\\Sentiment Analysis\\X_articles_results.csv'
df = pd.read_csv(file_path)

# Display the first few rows of the DataFrame
print(df.head())

# Function to extract media_url_https
def extract_media_url(media_column):
    if pd.isna(media_column):
        return ""
    try:
        # Ensure the input is a string
        media_column = str(media_column)
        # Parse the JSON-like structure
        media_items = json.loads(media_column.replace("'", '"'))  # Replace single quotes with double quotes for valid JSON
        if isinstance(media_items, list):
            # Extract media_url_https from each media item
            media_urls = [item['media_url_https'] for item in media_items if 'media_url_https' in item]
            return ', '.join(media_urls)  # Join multiple URLs with a comma
        else:
            return ""
    except (json.JSONDecodeError, TypeError):
        return ""

# Apply the extraction function to the tweet.media column
df['media_url_https'] = df['tweet.media'].apply(extract_media_url)

# Display the first few rows of the updated DataFrame
df.tail()




   tweet_count  negative(sentiment_score)  neutral(sentiment_score)  \
0            1                   0.224576                  0.714257   
1            2                   0.004042                  0.304035   
2            3                   0.177734                  0.775157   
3            4                   0.036057                  0.877669   
4            5                   0.194202                  0.722310   

   positive(sentiment_score)  compound(sentiment_score)  \
0                   0.061168                  -0.163408   
1                   0.691923                   0.687881   
2                   0.047109                  -0.130626   
3                   0.086274                   0.050218   
4                   0.083488                  -0.110714   

            tweet.created_at tweet.user.name tweet.user.location  \
0  2024-01-02 13:10:06+00:00             EIA      Washington, DC   
1  2024-01-02 18:11:37+00:00             EIA      Washington, DC   
2  2024-01-02 

Unnamed: 0,tweet_count,negative(sentiment_score),neutral(sentiment_score),positive(sentiment_score),compound(sentiment_score),tweet.created_at,tweet.user.name,tweet.user.location,tweet.user.followers_count,tweet.text,tweet.view_count,tweet.favorite_count,tweet.retweet_count,tweet.reply_count,tweet.media,media_url_https
1726,1727,0.272725,0.700648,0.026627,-0.246098,2024-07-09 18:17:04+00:00,Asia Fact Check Lab,,148,A claim emerged among Chinese netizens in June...,66,3,1,0,"[{'display_url': 'pic.x.com/znday8bmoj', 'expa...",https://pbs.twimg.com/media/GSEKZmcaUAEbSeh.jpg
1727,1728,0.018073,0.553974,0.427953,0.409881,2024-07-09 18:28:45+00:00,House Appropriations,"Washington, DC",22922,The FY25 Interior and Environment Appropriatio...,1787,9,2,4,"[{'display_url': 'pic.x.com/hwidzcew02', 'expa...",
1728,1729,0.462649,0.478608,0.058743,-0.403906,2024-07-09 18:45:17+00:00,EIA,"Washington, DC",181421,☝️Correction: #Cryptocurrency mining. Apologie...,2245,1,0,0,,
1729,1730,0.004067,0.266414,0.729519,0.725453,2024-07-09 19:46:22+00:00,GetWireless,Minnesota,693,💡100% of the top 20 U.S. oil and gas producers...,26,0,0,0,"[{'display_url': 'pic.x.com/ezsk6nc95w', 'expa...",https://pbs.twimg.com/tweet_video_thumb/GSEbrZ...
1730,1731,0.048669,0.794752,0.156578,0.107909,2024-07-09 19:50:02+00:00,EIA,"Washington, DC",181421,As we continue to highlight the #STEO forecast...,9149,8,3,1,,


In [21]:
# Save the merged DataFrame to a new CSV file
output_path = 'X_articles_results_2.csv'
df.to_csv(output_path, index=False)

print(f"The merged DataFrame has been saved to {output_path}")

The merged DataFrame has been saved to X_articles_results_2.csv
