In [None]:
pip install requests praw


Collecting praw
  Downloading praw-7.7.1-py3-none-any.whl.metadata (9.8 kB)
Collecting prawcore<3,>=2.1 (from praw)
  Downloading prawcore-2.4.0-py3-none-any.whl.metadata (5.0 kB)
Collecting update-checker>=0.18 (from praw)
  Downloading update_checker-0.18.0-py3-none-any.whl.metadata (2.3 kB)
Downloading praw-7.7.1-py3-none-any.whl (191 kB)
   ---------------------------------------- 0.0/191.0 kB ? eta -:--:--
   ------ -------------------------------- 30.7/191.0 kB 660.6 kB/s eta 0:00:01
   ------------------------------------ --- 174.1/191.0 kB 2.1 MB/s eta 0:00:01
   ---------------------------------------- 191.0/191.0 kB 1.7 MB/s eta 0:00:00
Downloading prawcore-2.4.0-py3-none-any.whl (17 kB)
Downloading update_checker-0.18.0-py3-none-any.whl (7.0 kB)
Installing collected packages: update-checker, prawcore, praw
Successfully installed praw-7.7.1 prawcore-2.4.0 update-checker-0.18.0


In [None]:
import requests
from datetime import datetime, timedelta
import pandas as pd
import time
import praw

###Retrieval using GDELT API

In [None]:
url = "https://api.gdeltproject.org/api/v2/doc/doc"
end_date = datetime.now()
start_date = end_date - timedelta(days=365)
all_articles = []

current_date = start_date
backoff_time = 3

while current_date <= end_date:
    params = {
        'query': 'Ethereum',
        'mode': 'ArtList',
        'maxrecords': 20,
        'sort': 'DateDesc',
        'format': 'json',
        'start': current_date.strftime('%Y%m%d000000'),
        'end': current_date.strftime('%Y%m%d235959')
    }
    response = requests.get(url, params=params)

    if response.status_code == 200:
        backoff_time = 3
        data = response.json().get('articles', [])

        if data:
            for article in data:
                all_articles.append({
                    'Title': article['title'],
                    'SeenDate': article['seendate'],
                    'URL': article['url'],
                    'Domain': article['domain'],
                    'Date': current_date.strftime('%Y-%m-%d')
                })
        else:
            print(f"No articles found for date {current_date.strftime('%Y-%m-%d')}")

    elif response.status_code == 429:
        retry_after = int(response.headers.get('Retry-After', backoff_time))
        print(f"Rate limited on {current_date.strftime('%Y-%m-%d')}. Retrying after {retry_after} seconds.")
        time.sleep(retry_after)
        backoff_time *= 2
        continue

    else:
        print(f"Error: {response.status_code} on date {current_date.strftime('%Y-%m-%d')}")


    current_date += timedelta(days=1)
    time.sleep(backoff_time)
df = pd.DataFrame(all_articles)
if 'SeenDate' in df.columns:
    df['SeenDate'] = pd.to_datetime(df['SeenDate'], format='%Y%m%dT%H%M%SZ')

print("Columns in DataFrame:", df.columns)
print("Number of articles collected:", len(df))
df.to_csv('gdelt_ethereum_news_last_1_year.csv', index=False)

Columns in DataFrame: Index(['Title', 'SeenDate', 'URL', 'Domain', 'Date'], dtype='object')
Number of articles collected: 7320


In [None]:
df

Unnamed: 0,Title,SeenDate,URL,Domain,Date
0,Cheating 9 to 5 : 13 Better Income Options,2024-10-06 10:00:00,https://bmmagazine.co.uk/business/cheating-9-t...,bmmagazine.co.uk,2023-10-07
1,JP Morgan Predicts Major Revenue Boost from Ba...,2024-10-06 09:15:00,https://insidebitcoins.com/news/jp-morgan-pred...,insidebitcoins.com,2023-10-07
2,US Spot Bitcoin ETFs Record $54 Million in Out...,2024-10-06 09:15:00,https://techreport.com/crypto-news/us-spot-bit...,techreport.com,2023-10-07
3,Evaluating the Risks and Rewards of Tezos Inve...,2024-10-06 08:30:00,https://gisuser.com/2024/10/evaluating-the-ris...,gisuser.com,2023-10-07
4,Kava : Top 3 Innovations in DeFi Lending Platf...,2024-10-06 08:30:00,https://gisuser.com/2024/10/kava-top-3-innovat...,gisuser.com,2023-10-07
...,...,...,...,...,...
7315,Decentraland Price Tops $0 . 29 on Top Exchang...,2024-10-05 23:15:00,https://www.tickerreport.com/banking-finance/1...,tickerreport.com,2024-10-06
7316,Taiko Hits Self Reported Market Cap of $124 . ...,2024-10-05 22:30:00,https://www.tickerreport.com/banking-finance/1...,tickerreport.com,2024-10-06
7317,Lido Staked Matic ( STMATIC ) Self Reported Ma...,2024-10-05 22:30:00,https://www.tickerreport.com/banking-finance/1...,tickerreport.com,2024-10-06
7318,MANEKI Price Down 31 . 7 % Over Last Week ( MA...,2024-10-05 22:30:00,https://www.tickerreport.com/banking-finance/1...,tickerreport.com,2024-10-06


###Retrieval of Ethereum Data using Kraken API

In [None]:
def get_daily_eth_price_kraken(start_date, end_date):
    url = "https://api.kraken.com/0/public/OHLC"
    pair = 'XETHZUSD'
    interval = 1440

    # Convert start and end dates to UNIX timestamps
    start_timestamp = int(datetime.strptime(start_date, '%Y-%m-%d').timestamp())
    end_timestamp = int(datetime.strptime(end_date, '%Y-%m-%d').timestamp())

    all_data = []
    current_timestamp = start_timestamp

    while current_timestamp < end_timestamp:

        params = {
            'pair': pair,
            'interval': interval,
            'since': current_timestamp
        }
        response = requests.get(url, params=params)

        if response.status_code == 200:
            data = response.json()


            if 'result' in data and pair in data['result']:
                for item in data['result'][pair]:
                    timestamp = item[0]
                    date = datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d')
                    open_price = float(item[1])
                    high_price = float(item[2])
                    low_price = float(item[3])
                    close_price = float(item[4])
                    volume = float(item[6])
                    all_data.append({
                        'Date': date,
                        'Open Price (USD)': open_price,
                        'High Price (USD)': high_price,
                        'Low Price (USD)': low_price,
                        'Close Price (USD)': close_price,
                        'Volume': volume
                    })
                current_timestamp = data['result']['last']

                if current_timestamp >= end_timestamp:
                    break
            else:
                print(f"No {pair} data available in the response.")
                break
        else:
            print(f"Failed to retrieve data. Status code: {response.status_code}")
            break
    df = pd.DataFrame(all_data)
    print(df.head())

    #save to a CSV file
    #df.to_csv('eth_daily_prices_sept_2023_to_oct_2024.csv', index=False)

    return df

df_eth_prices = get_daily_eth_price_kraken('2023-09-01', '2024-10-01')
print(df_eth_prices)


         Date  Open Price (USD)  High Price (USD)  Low Price (USD)  \
0  2023-09-02           1628.67           1644.50          1627.69   
1  2023-09-03           1636.89           1646.97          1625.11   
2  2023-09-04           1635.80           1643.37          1616.21   
3  2023-09-05           1629.17           1646.81          1608.13   
4  2023-09-06           1633.46           1668.43          1609.06   

   Close Price (USD)        Volume  
0            1636.89   3243.675721  
1            1635.79   4577.329291  
2            1629.17   5476.966673  
3            1633.46  16519.124389  
4            1632.29  18376.350432  
           Date  Open Price (USD)  High Price (USD)  Low Price (USD)  \
0    2023-09-02           1628.67           1644.50          1627.69   
1    2023-09-03           1636.89           1646.97          1625.11   
2    2023-09-04           1635.80           1643.37          1616.21   
3    2023-09-05           1629.17           1646.81          1608.13 

###MediaStack API

In [None]:
API_KEY = 'your_mediastack_api_key'
BASE_URL = "http://api.mediastack.com/v1/news"
today = datetime.now().strftime('%Y-%m-%d')
one_year_ago = (datetime.now() - timedelta(days=365)).strftime('%Y-%m-%d')
offset = 0
limit = 100
total_results = 1000
articles_list = []

while len(articles_list) < total_results:
    params = {
        'access_key': 'a91d032c2f2d302b0c932f0eb1aff7be',
        'keywords': 'ethereum',
        'date': f"{one_year_ago},{today}",
        'languages': 'en',
        'sort': 'published_desc',
        'limit': limit,
        'offset': offset
    }

    response = requests.get(BASE_URL, params=params)
    if response.status_code == 200:
        data = response.json()
        if 'data' in data:
            for article in data['data']:
                articles_list.append({
                    'Title': article['title'],
                    'Published At': article['published_at'],
                    'Source': article['source'],
                    'URL': article['url']
                })
            offset += limit
            print(f"Fetched {len(articles_list)} articles so far...")
        else:
            print("No more news found.")
            break
    else:
        print(f"Failed to retrieve data: {response.status_code} - {response.text}")
        break
df = pd.DataFrame(articles_list)
print(df)

# save the DataFrame to a CSV file
# df.to_csv('ethereum_news_large.csv', index=False)

print(f"Total articles fetched: {len(articles_list)}")


Fetched 100 articles so far...
Fetched 200 articles so far...
Fetched 257 articles so far...
Fetched 257 articles so far...
Fetched 257 articles so far...
Fetched 257 articles so far...
Fetched 257 articles so far...
Fetched 257 articles so far...
Fetched 257 articles so far...
Fetched 257 articles so far...
Fetched 257 articles so far...
Fetched 257 articles so far...
Fetched 257 articles so far...
Fetched 257 articles so far...
Fetched 257 articles so far...
Fetched 257 articles so far...
Fetched 257 articles so far...
Fetched 257 articles so far...
Fetched 257 articles so far...
Fetched 257 articles so far...
Fetched 257 articles so far...
Fetched 257 articles so far...
Fetched 257 articles so far...
Fetched 257 articles so far...
Fetched 257 articles so far...
Fetched 257 articles so far...
Fetched 257 articles so far...
Fetched 257 articles so far...
Fetched 257 articles so far...
Fetched 257 articles so far...
Fetched 257 articles so far...
Fetched 257 articles so far...
Fetched 

In [None]:
# Reddit API credentials
CLIENT_ID = 'ISCUgWmee1JIVjioApZNjw'
CLIENT_SECRET = '8ePr2XZy7NcSA8o2pJgiCRQdrkOngw'
USER_AGENT = 'EthereumAnalyzer/1.0 by Nareshkumarsatish'
reddit = praw.Reddit(
    client_id=CLIENT_ID,
    client_secret=CLIENT_SECRET,
    user_agent=USER_AGENT
)
subreddit = reddit.subreddit('ethereum')
today = datetime.now()
one_year_ago = today - timedelta(days=365)
articles_list = []
for submission in subreddit.new(limit=None):
    if datetime.utcfromtimestamp(submission.created_utc) < one_year_ago:
        break

    articles_list.append({
        'Title': submission.title,
        'Published At': datetime.utcfromtimestamp(submission.created_utc).strftime('%Y-%m-%d'),
        'URL': submission.url,
        'Source': 'Reddit'
    })

df = pd.DataFrame(articles_list)
print(df)

#save the DataFrame to a CSV file
#df.to_csv('ethereum_reddit_news_last_year.csv', index=False)


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/l

                                                 Title Published At  \
0                                    Can’t withdrawal    2024-10-06   
1    When using Groth16 on Ethereum through ᴇɪᴘ‒197...   2024-10-05   
2    What is the Ethereum Virtual Machine (EVM)? Ba...   2024-10-05   
3        Staking or not staking? That is the question…   2024-10-05   
4                         Latest Week in Ethereum News   2024-10-05   
..                                                 ...          ...   
569                      Eth code complexity as a flaw   2024-05-21   
570                        What do i do with my WBETH?   2024-05-21   
571               Staking Ethereum (ETH) Now on Trezor   2024-05-20   
572            Sent ETH to my ETC address on Robinhood   2024-05-20   
573                                 EIP-7684 explained   2024-05-20   

                                                   URL  Source  
0    https://www.reddit.com/r/ethereum/comments/1fx...  Reddit  
1    https://www.