In [2]:
import pandas as pd

# File paths
input_file = "RAG_news.csv"
output_file = "RAG_news_sorted.csv"

# Read the CSV file
df = pd.read_csv(input_file)

# Ensure newsDatetime is treated as datetime
df['newsDatetime'] = pd.to_datetime(df['newsDatetime'])

# Sort by newsDatetime
df = df.sort_values(by='newsDatetime').reset_index(drop=True)

# Save to new CSV
df.to_csv(output_file, index=False)

print(f"Saved sorted file to {output_file}")

Saved sorted file to RAG_news_sorted.csv


In [3]:
import pandas as pd

# File paths
input_file = "RAG_news.csv"
output_file = "RAG_news_2025.csv"

# Read the CSV file
df = pd.read_csv(input_file)

# Ensure newsDatetime is treated as datetime
df['newsDatetime'] = pd.to_datetime(df['newsDatetime'])

# Filter for 2025
df_2025 = df[df['newsDatetime'].dt.year == 2025]

# Display (print) the 2025 rows
print(df_2025)

# Save to new CSV
df_2025.to_csv(output_file, index=False)
print(f"Saved 2025 news to {output_file}")

           id                                              title  \
22166  174627  Ripple’s XRP takes second place in trade in So...   
22167  174628  Andrew Tate Launches BRUV Party, Eyes Bitcoin ...   
22168  174629  Cardano's Price Might Reclaim $1.20 in Days, H...   
22169  174630  Cardano’s 2025 Strategy: Wallet Innovation and...   
22170  174631  MicroStrategy Plans $2B Stock Offering to Boos...   
...       ...                                                ...   
24253  225824  Trump Coin Whales Dump $6.4M Ahead Of VIP Meme...   
24254  225825  Over 760K lost money on Donald Trump's memecoi...   
24255  225826  Trump to host memecoin gala dinner amid backla...   
24256  226047  Vitalik Buterin praises real-time ZK proving m...   
24257  226185  Cardano to Directly Feature Blockchain and Ass...   

                                             description        newsDatetime  \
22166  A recent survey by South Korea’s News1 has rev... 2025-01-07 15:13:49   
22167  Tate has launche

In [5]:
import pandas as pd

# File paths
input_file = "RAG_news.csv"
output_file = "RAG_news_2024_2025_sorted.csv"

# Read the CSV file
df = pd.read_csv(input_file)

# Ensure newsDatetime is datetime
df['newsDatetime'] = pd.to_datetime(df['newsDatetime'])

# Filter for 2024 and 2025
df_filtered = df[df['newsDatetime'].dt.year.isin([2024, 2025])]

# Sort from beginning of 2024 to end of 2025
df_sorted = df_filtered.sort_values(by='newsDatetime', ascending=True).reset_index(drop=True)

# Save to new CSV
df_sorted.to_csv(output_file, index=False)
print(f"Saved sorted 2024 & 2025 news to {output_file}")

Saved sorted 2024 & 2025 news to RAG_news_2024_2025_sorted.csv


In [6]:
import pandas as pd

# File paths
input_file = "RAG_news_2024_2025_sorted.csv"  # <- only 2024 and 2025 news
output_file = "RAG_news_2024_2025_updated_dates.csv"

# Read the CSV
df = pd.read_csv(input_file)

# Ensure newsDatetime is datetime
df['newsDatetime'] = pd.to_datetime(df['newsDatetime'])

# Define date range to change from 2024 to 2025
start = pd.to_datetime('2024-05-24')
end   = pd.to_datetime('2024-07-31')

# Mask: dates in 2024 and between start & end
mask = (df['newsDatetime'] >= start) & (df['newsDatetime'] <= end) & (df['newsDatetime'].dt.year == 2024)

# Update the year to 2025 for these rows
df.loc[mask, 'newsDatetime'] = df.loc[mask, 'newsDatetime'].apply(lambda x: x.replace(year=2025))

# Sort by newsDatetime
df = df.sort_values(by='newsDatetime', ascending=True).reset_index(drop=True)

# Save to new CSV
df.to_csv(output_file, index=False)
print(f"Updated dates and saved to {output_file}")


Updated dates and saved to RAG_news_2024_2025_updated_dates.csv


In [7]:
import pandas as pd

# File paths
input_file = "RAG_new_2024-2025.csv"
output_file = "RAG_new_2024-2025_switched.csv"

# Read the CSV
df = pd.read_csv(input_file)

# Ensure newsDatetime is datetime
df['newsDatetime'] = pd.to_datetime(df['newsDatetime'])

# Define date range
start = pd.to_datetime('2025-05-24')
end   = pd.to_datetime('2025-07-31')

# Mask: rows in the date range
mask = (df['newsDatetime'] >= start) & (df['newsDatetime'] <= end)

# Columns to check/replace
cols = ['title', 'description', 'url', 'sourceUrl']

# For each column, replace '2024' with '2025' in the masked rows
for col in cols:
    df.loc[mask, col] = df.loc[mask, col].astype(str).str.replace('2024', '2025', regex=False)

# Save to new CSV
df.to_csv(output_file, index=False)
print(f"Switched '2024' to '2025' in date range and saved to {output_file}")


Switched '2024' to '2025' in date range and saved to RAG_new_2024-2025_switched.csv


In [9]:
import pandas as pd

# Load the processed file
df = pd.read_csv("RAG_new_2024-2025_switched.csv")

# Display only the specified columns
df[['positiveVotes', 'negativeVotes', 'sourceUrl', 'currencies']]



Unnamed: 0,positiveVotes,negativeVotes,sourceUrl,currencies
0,4,7,https://u.today/cardano-ada-skyrockets-in-this...,ADA
1,1,1,https://finbold.com/4-altcoins-to-buy-under-1-...,"BTC,ADA,XRP,MATIC,ALGO"
2,0,4,https://www.cryptopolitan.com/five-bitcoin-min...,BTC
3,2,0,https://www.newsbtc.com/news/ethereum/ethereum...,"BTC,ETH,GRT"
4,3,0,https://www.benzinga.com/markets/cryptocurrenc...,"BTC,ETH,DOGE"
...,...,...,...,...
4573,2,3,https://u.today/cardano-inching-closer-to-chan...,ADA
4574,6,5,https://u.today/cardano-ada-prediction-for-jul...,ADA
4575,3,1,https://finbold.com/rich-dad-r-kiyosaki-reveal...,"BTC,ETH,SOL"
4576,1,4,https://ambcrypto.com/will-solana-become-a-tru...,"ETH,SOL"


In [10]:
import pandas as pd

# Load your file
df = pd.read_csv("RAG_new_2024-2025.csv")

# Split the 'currencies' column by comma, then explode it into separate rows
df['currencies'] = df['currencies'].astype(str).str.split(',')

# Explode so each currency gets its own row
df_exploded = df.explode('currencies').reset_index(drop=True)

# Clean up whitespace around currency names
df_exploded['currencies'] = df_exploded['currencies'].str.strip()

# Save the exploded DataFrame
df_exploded.to_csv("RAG_new_2024-2025_exploded.csv", index=False)

print("Done! Each row now has only one currency. Saved as RAG_new_2024-2025_exploded.csv")


Done! Each row now has only one currency. Saved as RAG_new_2024-2025_exploded.csv


In [12]:
import pandas as pd

df = pd.read_csv("RAG_new_2024-2025.csv")
df  # Just type the variable name in a cell to see a nice table!

Unnamed: 0,id,title,description,newsDatetime,url,positiveVotes,negativeVotes,sourceUrl,currencies
0,148257,Cardano (ADA) Skyrockets in This Bullish Metri...,Majority of growth occurred in mid-December,2024-01-01 12:14:00,https://cryptopanic.com/news/19143576/Cardano-...,4,7,https://u.today/cardano-ada-skyrockets-in-this...,ADA
1,152291,4 altcoins to buy under $1 for start of 2024,After altcoins took a breather in the final we...,2024-01-01 14:00:00,https://cryptopanic.com/news/19143677/4-altcoi...,1,1,https://finbold.com/4-altcoins-to-buy-under-1-...,BTC
2,152291,4 altcoins to buy under $1 for start of 2024,After altcoins took a breather in the final we...,2024-01-01 14:00:00,https://cryptopanic.com/news/19143677/4-altcoi...,1,1,https://finbold.com/4-altcoins-to-buy-under-1-...,ADA
3,152291,4 altcoins to buy under $1 for start of 2024,After altcoins took a breather in the final we...,2024-01-01 14:00:00,https://cryptopanic.com/news/19143677/4-altcoi...,1,1,https://finbold.com/4-altcoins-to-buy-under-1-...,XRP
4,152291,4 altcoins to buy under $1 for start of 2024,After altcoins took a breather in the final we...,2024-01-01 14:00:00,https://cryptopanic.com/news/19143677/4-altcoi...,1,1,https://finbold.com/4-altcoins-to-buy-under-1-...,MATIC
...,...,...,...,...,...,...,...,...,...
7666,164101,‘Rich Dad’ R. Kiyosaki reveals the only 3 cryp...,"Though often viewed as controversial, Robert K...",2025-07-30 15:24:51,https://cryptopanic.com/news/19741440/Rich-Dad...,3,1,https://finbold.com/rich-dad-r-kiyosaki-reveal...,SOL
7667,164438,Will Solana become a true ‘Ethereum killer’ in...,"Solana's total value locked has surged by 25%,...",2025-07-30 22:00:25,https://cryptopanic.com/news/19742703/Will-Sol...,1,4,https://ambcrypto.com/will-solana-become-a-tru...,ETH
7668,164438,Will Solana become a true ‘Ethereum killer’ in...,"Solana's total value locked has surged by 25%,...",2025-07-30 22:00:25,https://cryptopanic.com/news/19742703/Will-Sol...,1,4,https://ambcrypto.com/will-solana-become-a-tru...,SOL
7669,164437,Polygon (MATIC) Gets MoveVM Boost in Goal to B...,MoveVM Chains are coming to the AggLayer in a ...,2025-07-30 22:35:00,https://cryptopanic.com/news/19742767/Polygon-...,3,0,https://dailycoin.com/polygon-matic-gets-movev...,ETH
