In [None]:
import requests
from datetime import datetime, timedelta

api_key = ""
base_url = "https://www.alphavantage.co/query"

def get_company_news(ticker, start):
    start_formatted = start.strftime('%Y%m%dT%H%M')

    params = {
        "function": "NEWS_SENTIMENT",
        "apikey": api_key,
        "tickers": ticker,
        "time_from": start_formatted,
        "limit": 1000,
        "sort": "EARLIEST"
    }

    response = requests.get(base_url, params=params)

    if response.status_code != 200:
        print(f"Ошибка: {response.text}")
        return None

    return response.json().get("feed", [])

In [None]:
def fetch_news_for_last_year(ticker):
    all_news = []
    start_date = datetime.now() - timedelta(days=365)
    current_date = start_date

    while True:
        news_data = get_company_news(ticker, current_date)
        if not news_data:
            print("Больше нет новостей или произошла ошибка.")
            break

        all_news.extend(news_data)

        last_news_datetime_str = news_data[-1]["time_published"]
        last_news_datetime = datetime.strptime(last_news_datetime_str, "%Y%m%dT%H%M%S")
        current_date = last_news_datetime + timedelta(minutes=1)

        if current_date >= datetime.now():
            break

    return all_news

In [None]:
import pandas as pd
def create_dataframe(news_list, ticker="AAPL"):
    news_info = []

    for news in news_list:
        for ticker_data in news.get("ticker_sentiment", []):
            if ticker_data["ticker"] == ticker:
                news_data = {
                    "title": news.get("title"),
                    "url": news.get("url"),
                    "time_published": news.get("time_published"),
                    "summary": news.get("summary"),
                    "overall_sentiment_score": news.get("overall_sentiment_score"),
                    "overall_sentiment_label": news.get("overall_sentiment_label"),
                    "relevance_score": ticker_data.get("relevance_score"),
                    "ticker_sentiment_score": ticker_data.get("ticker_sentiment_score"),
                    "ticker_sentiment_label": ticker_data.get("ticker_sentiment_label")
                }
                news_info.append(news_data)
                break
    return pd.DataFrame(news_info)

In [None]:
ticker = "AAPL"
news_last_year = fetch_news_for_last_year(ticker)
news = create_dataframe(news_last_year, ticker = ticker)
news.to_csv("AAPLNews.csv", encoding='utf-8')

In [None]:
!pip install investpy

In [None]:
import investpy
def getEconomicNews(countries, start, end):
  events = investpy.economic_calendar(countries = countries, importances=['high', 'medium'], from_date=start, to_date=end)
  return events[events[['importance','previous']].notnull().all(axis=1)].reset_index(drop=True)


In [None]:
economicNews = getEconomicNews(None, "28/10/2023", "28/10/2024")
economicNews

Unnamed: 0,id,date,time,zone,currency,importance,event,actual,forecast,previous
0,484285,30/10/2023,00:30,australia,AUD,medium,Retail Sales (MoM) (Sep),0.9%,0.3%,0.3%
1,484300,30/10/2023,07:00,sweden,SEK,medium,GDP (YoY) (Q3),-1.2%,,-1.0%
2,484299,30/10/2023,07:00,sweden,SEK,medium,GDP (QoQ) (Q3),0.0%,0.4%,-0.8%
3,484309,30/10/2023,08:00,switzerland,CHF,medium,KOF Leading Indicators (Oct),95.8,95.0,95.9
4,484305,30/10/2023,08:00,spain,EUR,medium,Spanish CPI (YoY) (Oct),3.5%,3.8%,3.5%
...,...,...,...,...,...,...,...,...,...,...
4925,510038,25/10/2024,19:30,brazil,BRL,medium,CFTC BRL speculative net positions,-2.8K,,-5.1K
4926,510031,25/10/2024,19:30,japan,JPY,medium,CFTC JPY speculative net positions,12.8K,,34.1K
4927,510042,25/10/2024,19:30,euro zone,EUR,medium,CFTC EUR speculative net positions,-28.5K,,17.1K
4928,510197,28/10/2024,15:30,united states,USD,medium,2-Year Note Auction,4.130%,,3.520%


In [None]:
economicNews.to_csv("investingEconomicCalendarNews.csv", encoding='utf-8')

In [None]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime, timedelta


headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36"
}

currency_pairs = ['eur-usd', 'jpy-usd', 'gbp-usd', 'aud-usd', 'cad-usd', 'chf-usd', 'nzd-usd', 'sek-usd',
                  'nok-usd', 'dkk-usd', 'sgd-usd', 'hkd-usd', 'krw-usd', 'inr-usd', 'cny-usd', 'thb-usd',
                  'mxn-usd', 'brl-usd', 'rub-usd', 'zar-usd', 'twd-usd', 'try-usd', 'pln-usd', 'huf-usd',
                  'czk-usd', 'ils-usd', 'php-usd', 'myr-usd', 'idr-usd', 'sar-usd']

one_year_ago = datetime.now() - timedelta(days=365)

all_news_data = []

for pair in currency_pairs:
    print(f"Парсинг новостей для валютной пары: {pair}")
    page_number = 1
    collecting_data = True
    pair_news = []

    while collecting_data and page_number < 101:
        url = f"https://investing.com/currencies/{pair}-news/{page_number}"
        response = requests.get(url, headers=headers)

        if response.status_code != 200:
            print(f"Не удалось получить данные для {pair}, страница {page_number}")
            break

        soup = BeautifulSoup(response.content, "html.parser")

        news_items = soup.find_all("article", {"data-test": "article-item"})
        if not news_items:
            break
        for item in news_items:
            title = item.find("a", {"data-test": "article-title-link"}).text.strip()

            desc = item.find("p", {"data-test": "article-description"})
            if desc:
              description = desc.text.strip()

            link = item.find("a", {"data-test": "article-title-link"})["href"]

            provider = item.find("span", {"data-test": "news-provider-name"}).text.strip()

            time_published = item.find("time", {"data-test": "article-publish-date"})["datetime"]
            time_published = datetime.strptime(time_published, "%Y-%m-%d %H:%M:%S")

            if time_published < one_year_ago:
                collecting_data = False
                break

            pair_news.append({
                "pair": pair,
                "title": title,
                "description": description,
                "link": link,
                "provider": provider,
                "time_published": time_published
            })

        page_number += 1

    all_news_data.extend(pair_news)
for news in all_news_data:
    print(news)

In [None]:
pd.DataFrame(all_news_data).to_csv("topCurrenciesUSDLastInvestingComNews.csv", encoding='utf-8')