In [17]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime, timedelta

# URL of the website to scrape
url = "https://news.metal.com/list/industry/aluminium"

# Send a GET request to the website
response = requests.get(url)

# Parse the HTML content of the webpage
soup = BeautifulSoup(response.content, 'html.parser')

# Find the container that holds the news articles
articles_container = soup.find('ul', class_='ant-list-items')

# Function to parse the date string
def parse_date(date_str):
    try:
        return datetime.strptime(date_str, '%b %d, %Y %H:%M')
    except ValueError:
        return None

# Get the date 45 days ago from today
cutoff_date = datetime.now() - timedelta(days=45)

# Check if the articles container was found
if articles_container:
    # Extract all articles within the container
    articles = articles_container.find_all('div', class_='newsItem___wZtKx')

    # Loop through each article and extract the relevant information
    for article in articles:
        # Extract the title of the article
        title_div = article.find('div', class_='title___1baLV')
        title = title_div.text.strip() if title_div else 'No title available'
        
        # Extract the summary of the article
        summary_div = article.find('div', class_='description___z7ktb')
        summary = summary_div.text.strip() if summary_div else 'No summary available'
        
        # Extract the publication date
        date_div = article.find('div', class_='date___3dzkE')
        date_str = date_div.text.strip() if date_div else 'No date available'
        article_date = parse_date(date_str)
        
        # Only print articles published in the last 45 days
        if article_date and article_date >= cutoff_date:
            print(f"Title: {title}")
            print(f"Summary: {summary}")
            print(f"Date: {article_date.strftime('%b %d, %Y %H:%M')}")
            print("-" * 40)
else:
    print("No articles container found. Please check the HTML structure of the website.")


Title: Vedanta Aluminium receives BIS certification for high-quality 12 mm aluminium wire rods
Summary: Vedanta Aluminium has received a certification from the Bureau of Indian Standards (BIS) for its 12 mm aluminium wire rods manufactured at the BALCO facility in Korba, India. In addition, the company has acquired six renewed certifications for other products.
Date: Aug 02, 2024 17:11
----------------------------------------
Title: Bauxite-rich Guinea redrafts political landscape with new constitution
Summary: Guinea's military leadership has introduced a draft constitution to reduce presidential terms and limit the number of terms a president can serve the West African nation with the world's largest bauxite reserves on record.
Date: Aug 02, 2024 17:07
----------------------------------------
Title: Aluminum billets : This week's performance was relatively stable, and the off-season market atmosphere did not see significant improvement
Summary: Although aluminum billet inventory slig

In [19]:
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer

# Example scraped data
scraped_articles = [
    {"title": "Vedanta Aluminium receives BIS certification for high-quality 12 mm aluminium wire rods", "summary": "Vedanta Aluminium has received a certification from the Bureau of Indian Standards (BIS) for its 12 mm aluminium wire rods manufactured at the BALCO facility in Korba, India. In addition, the company has acquired six renewed certifications for other products.", "date": "Aug 02, 2024 17:11"},
    {"title": "Bauxite-rich Guinea redrafts political landscape with new constitution", "summary": "Guinea's military leadership has introduced a draft constitution to reduce presidential terms and limit the number of terms a president can serve the West African nation with the world's largest bauxite reserves on record.", "date": "Aug 02, 2024 17:07"},
    {"title":"Aluminum billets : This week's performance was relatively stable, and the off-season market atmosphere did not see significant improvement","summary": "Although aluminum billet inventory slightly decreased mid-week, the inventory was affected by low domestic inflow.","date": "Aug 02, 2024 17:02"},
    {"title":"Aluminum ingot inventory built-up showed signs of accelerating, surpassing 800,000 mt this week","summary": "As of August 1, 2024, SMM reported a total social inventory of aluminum ingots at 814,000 mt, (the amount for sale stood at 688,000 mt), an increase of 24,000 mt WoW and 289,000 mt higher YoY.","date": "Aug 02, 2024 17:01"},
    {"title":"In July, domestic alumina costs increased by 1.77% MoM, with strong support from mining costs","summary": "According to the SMM alumina daily cost model, as of July 31, the weighted average total cost of the domestic alumina industry was 2,867.2 yuan/mt, up 49.7 yuan/mt from the end of June, up 1.77% MoM, and up 7.34% YoY.","date": "Aug 02, 2024 16:59"},
    {"title":"Aluminum prices to move rangebound due to positive macro front but weak fundamentals","summary": "This week, the macro front improved.","date": " Aug 02, 2024 16:57"},
    {"title":"SMM Morning Comment For SHFE Base Metals On August 2","summary": "Overnight, LME copper opened at $9,112/mt.","date": "Aug 02, 2024 09:40"},
    {"title":"SMM Aluminum Market Morning Comment (Aug 2)","summary": "The most-traded SHFE 2409 aluminum contract opened at 19,230 yuan/mt overnight, reaching a high of 19,330 yuan/mt and a low of 19,225 yuan/mt, and closed at 19,225 yuan/mt, up 50 yuan/mt, an increase of 0.26%.","date": " Aug 02, 2024 09:30"},
    {"title":"The decline in aluminum prices makes it difficult to drive downstream demand. In July, the domestic aluminum processing sector PMI remained below 50% [SMM Downstream In-depth Analysis]","summary": "According to SMM data, the comprehensive PMI index of the domestic aluminum processing industry recorded 41.80% in July 2024, up 0.2 percentage points MoM, remaining below 50% for three consecutive months.","date": "Aug 01, 2024 15:45"},
    {"title":"Canyon Resources secures mining convention for Minim-Martap Bauxite Project in Cameroon","summary": "Canyon Resources Limited has formally signed a mining convention for the Minim-Martap Bauxite Project in Cameroon.","date": "Aug 01, 2024 09:42"}
]

# Combine titles and summaries for embedding
documents = [article['title'] + " " + article['summary'] for article in scraped_articles]

# Convert text to TF-IDF vectors
vectorizer = TfidfVectorizer()
embeddings = vectorizer.fit_transform(documents).toarray()

# Store the embeddings with the corresponding articles
for i, article in enumerate(scraped_articles):
    article['embedding'] = embeddings[i]
