## Acousort

In [2]:
import requests
from bs4 import BeautifulSoup

url = 'https://www.acousort.com/'

# GET request
response = requests.get(url)
response.raise_for_status() 

#  Parse  HTML content using BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')

rss_links = soup.find_all('link', {'type': 'application/rss+xml'})

if rss_links:
    for link in rss_links:
        print(f"Found RSS Feed: {link['href']}")
else:
    print("No RSS feed found")

# searching for links containing press
keywords = ['press', 'news', 'investor']
links = soup.find_all('a', href=True)

for link in links:
    if any(keyword in link['href'].lower() for keyword in keywords):
        print(f"Possible Press Release Link: {link['href']}")


Found RSS Feed: https://acousort.com/feed/
Found RSS Feed: https://acousort.com/comments/feed/
Possible Press Release Link: https://acousort.com/company/news-page/
Possible Press Release Link: /investors
Possible Press Release Link: https://acousort.com/mfn_news/acousort-in-new-collaboration-to-improve-sepsis-treatment/
Possible Press Release Link: https://acousort.com/mfn_news/acousort-to-explore-south-korean-opportunities-at-the-kimes-exhibition/
Possible Press Release Link: https://acousort.com/mfn_news/the-exercise-period-for-warrants-of-series-to-2-in-acousort-begins-today/
Possible Press Release Link: https://acousort.com/mfn_news/notice-of-extraordinary-general-meeting-in-acousort-ab-publ/
Possible Press Release Link: https://acousort.com/company/news-page/
Possible Press Release Link: https://acousort.com/investors/
Possible Press Release Link: https://acousort.com/company/news-page/
Possible Press Release Link: /investors/


###  Scraper for the press Release feed

In [101]:
import requests
from bs4 import BeautifulSoup

url = 'https://acousort.com/investors/press-releases/'
headers = {'User-Agent': 'Mozilla/5.0'}

response = requests.get(url, headers=headers)

# Parse content using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')

press_releases = [
    {
        'title': link.text.strip(),
        'link': link['href'] if link['href'].startswith('http') else f"https://acousort.com{link['href']}"
    }
    for link in soup.find_all('a', href=True) if '/mfn_news/' in link['href']
]

import pandas as pd
df = pd.DataFrame(press_releases)

print(df)


                                                     title                                                      \
0                                    AcouSort announces the outcome of the exercise of warrants of series TO 2   
1                                                    AcouSort in new collaboration to improve sepsis treatment   
2                                       AcouSort to explore South Korean opportunities at the KIMES exhibition   
3                                     The exercise period for warrants of series TO 2 in AcouSort begins today   
4                                                NOTICE OF EXTRAORDINARY GENERAL MEETING IN ACOUSORT AB (PUBL)   
5  The Board of Directors’ of AcouSort resolves on a partially secured rights issue of approximately MSEK 34.1   
6                                                                  AcouSort and Bio-ReCell enter collaboration   
7                                                 Year end report for AcouSort AB 1 Janu

#### Filtering for Quarterly and Annual Reports

In [102]:
# keywords to filter press releases that contain quarterly or annual reports
keywords = ['q1', 'q2', 'q3', 'q4', 'year-end', 'financial']

# keep relevant press releases
filtered_reports = df[df['title'].str.contains('|'.join(keywords), case=False, na=False)].copy()

print(filtered_reports)


                                 title                                 \
8  AcouSort reschedules the 2024 year-end report to February 14, 2025   

                                                 link                                                
8  https://acousort.com/mfn_news/acousort-reschedules-the-2024-year-end-report-to-february-14-2025/  


#### Extract Year and Quarter from Titles

In [103]:
import re

def extract_year_and_quarter(title):
    year = re.search(r'\b(20\d{2})\b', title)  
    quarter_match = re.search(r'\b(Q[1-4])\b', title)  
    quarter = quarter_match.group(0) if quarter_match else ('Q4' if 'year-end' in title.lower() else None)
    return year.group(0) if year else None, quarter

# function to extract year and quarter for each press release
filtered_reports[['year', 'quarter']] = filtered_reports['title'].apply(
    lambda x: pd.Series(extract_year_and_quarter(x))
)

print(filtered_reports)


                                 title                                 \
8  AcouSort reschedules the 2024 year-end report to February 14, 2025   

                                                 link                                                \
8  https://acousort.com/mfn_news/acousort-reschedules-the-2024-year-end-report-to-february-14-2025/   

   year quarter  
8  2024    Q4    


#### Track Links to Financial Reports Page

In [108]:
# Function to check if the press release contains a financial report reference
def check_for_financial_report_link(title, link):
    financial_link = None
    # check if the title mentions 'financial' or 'year-end' reports
    if 'financial' in title.lower() or 'year-end' in title.lower():
        financial_link = 'https://acousort.com/investors/reports/financial-reports/'
    return financial_link

# function to add the financial report link
filtered_reports['financial_report_link'] = filtered_reports.apply(
    lambda row: check_for_financial_report_link(row['title'], row['link']), axis=1
)

print(filtered_reports)


                                 title                                 \
8  AcouSort reschedules the 2024 year-end report to February 14, 2025   

                                                 link                                                \
8  https://acousort.com/mfn_news/acousort-reschedules-the-2024-year-end-report-to-february-14-2025/   

   year quarter                   financial_report_link                     
8  2024    Q4    https://acousort.com/investors/reports/financial-reports/  


In [217]:
# checking page Elements 
print(soup.prettify()) 


<!DOCTYPE html>
<html class="no-js" itemscope="" itemtype="https://schema.org/WebPage" lang="en-US">
 <head>
  <meta charset="utf-8"/>
  <meta content="width=device-width, initial-scale=1, minimum-scale=1" name="viewport"/>
  <meta content="index, follow, max-image-preview:large, max-snippet:-1, max-video-preview:-1" name="robots">
   <style>
    img:is([sizes="auto" i], [sizes^="auto," i]) { contain-intrinsic-size: 3000px 1500px }
   </style>
   <script data-cookieconsent="ignore">
    window.dataLayer = window.dataLayer || [];
            function gtag() {
                dataLayer.push(arguments);
            }
            gtag("consent", "default", {
                ad_personalization: "denied",
                ad_storage: "denied",
                ad_user_data: "denied",
                analytics_storage: "denied",
                functionality_storage: "denied",
                personalization_storage: "denied",
                security_storage: "granted",
                wait_fo

In [218]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

url = 'https://acousort.com/investors/reports/financial-reports/'
driver = webdriver.Chrome() 
driver.get(url)

WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.CSS_SELECTOR, '.mfn-archive'))
)

# Extracting PDF links for 2024 reports
pdf_links_2024 = []

pdf_elements = driver.find_elements(By.XPATH, "//a[contains(@href, '.pdf')]")

# Loop through each element and filter links for 2024
for element in pdf_elements:
    link = element.get_attribute('href')
    if '2024' in link:  
        pdf_links_2024.append(link)

for link in pdf_links_2024:
    print(link)

driver.quit()


https://storage.mfn.se/e6f249f5-6986-4c4f-8e01-a8ee123c8096/acousort-ab-q4-2024-eng.pdf
https://storage.mfn.se/095a74ce-e30e-43e8-a5ba-d2b2227a631d/acousort-ab-q3-2024-eng.pdf
https://storage.mfn.se/ff719ce8-8b13-4acc-9b1e-93a16d30cd9d/acousort-ab-q2-2024-eng.pdf
https://storage.mfn.se/6df319fa-5038-4c9c-ab16-1e2d633e154c/acousort-ab-q1-2024-eng.pdf


## Carlsberg

In [138]:
import requests
from bs4 import BeautifulSoup
import re

base_url = "https://www.carlsberggroup.com"

url = base_url + '/investor-relations/investor-home/company-announcements/'
response = requests.get(url)

if response.status_code == 200:
    print("Page fetched successfully!")
    page_content = response.content
else:
    print(f"Failed to fetch page. Status code: {response.status_code}")

Page fetched successfully!


In [144]:
soup = BeautifulSoup(page_content, 'html.parser')

def is_relevant_link(link):
    relevant_keywords = ['fy-', 'q1-', 'q2-', 'q3-', 'q4-', 'annual-report', 'financial-statement', 'trading-statement', 'press-release', 'announcement']
    
    for keyword in relevant_keywords:
        if keyword.lower() in link.lower():
            return True
    return False

# Find all the links on the page
links = soup.find_all('a', href=True)

# Initialize a list to store relevant links
relevant_links = []

# Loop through links
for link in links:
    href = link['href']
    if is_relevant_link(href):
        full_url = base_url + href if not href.startswith('http') else href
        relevant_links.append(full_url)
        print(f"Found relevant link: {full_url}")

if relevant_links:
    print("Refined Relevant links found:")
    for url in relevant_links:
        print(url)
else:
    print("No relevant links found.")


Found relevant link: https://www.carlsberggroup.com/sustainability/report-policies/2024-annual-report/
Found relevant link: https://www.carlsberggroup.com/investor-relations/investor-home/fy-2024-financial-statement/
Found relevant link: https://www.carlsberggroup.com/investor-relations/investor-home/company-announcements/
Found relevant link: https://www.carlsberggroup.com/sustainability/report-policies/2024-annual-report/
Found relevant link: https://www.carlsberggroup.com/investor-relations/investor-home/fy-2024-financial-statement/
Found relevant link: https://www.carlsberggroup.com/investor-relations/investor-home/company-announcements/
Refined Relevant links found:
https://www.carlsberggroup.com/sustainability/report-policies/2024-annual-report/
https://www.carlsberggroup.com/investor-relations/investor-home/fy-2024-financial-statement/
https://www.carlsberggroup.com/investor-relations/investor-home/company-announcements/
https://www.carlsberggroup.com/sustainability/report-polic

In [145]:
# Remove duplicate links
relevant_links = list(set(relevant_links))

print("Unique Relevant links found:")
for url in relevant_links:
    print(url)


Unique Relevant links found:
https://www.carlsberggroup.com/investor-relations/investor-home/company-announcements/
https://www.carlsberggroup.com/sustainability/report-policies/2024-annual-report/
https://www.carlsberggroup.com/investor-relations/investor-home/fy-2024-financial-statement/


##  Stockwik

In [162]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By

# Set up driver 
chrome_service = Service(ChromeDriverManager().install())
chrome_options = Options()

try:
    driver = webdriver.Chrome(service=chrome_service, options=chrome_options)

    driver.get("https://www.stockwik.se/")

    # Extract all links from the page
    links = driver.find_elements(By.XPATH, "//a[@href]")

    print("All Links on Stockwik Main Page:")
    for link in links:
        href = link.get_attribute('href')
        print(href)

except Exception as e:
    print(f"Error occurred: {e}")

finally:
    driver.quit()


All Links on Stockwik Main Page:
https://www.stockwik.se/
https://www.stockwik.se/om-oss-eng
https://www.stockwik.se/
https://www.stockwik.se/om-oss
https://www.stockwik.se/var-process
https://www.stockwik.se/bolag-och-segment
https://www.stockwik.se/bolagsstyrning
https://www.stockwik.se/karriar
https://www.stockwik.se/finansiellt
https://www.stockwik.se/aktien
https://www.stockwik.se/press
https://www.stockwik.se/om-oss
https://www.stockwik.se/
mailto:info@stockwik.se
https://www.linkedin.com/company/stockwik-f%C3%B6rvaltning-ab/
https://www.facebook.com/stockwik/
https://www.instagram.com/stockwik/
https://cns.omxgroup.com/cdsPublic/createSubscriptionsWizard.action?request_locale=sv&css=http://www.omxgroup.com/static/css/externPop.css


#### Press release page

In [163]:
# Start Chrome browser
try:
    driver = webdriver.Chrome(service=chrome_service, options=chrome_options)

    # Open Stockwik's press page
    driver.get("https://www.stockwik.se/press")

    # Extract all links from the press page
    links = driver.find_elements(By.XPATH, "//a[@href]")

    print("All Links on Stockwik Press Page:")
    for link in links:
        href = link.get_attribute('href')
        print(href)

except Exception as e:
    print(f"Error occurred: {e}")

finally:
    driver.quit()

All Links on Stockwik Press Page:
https://www.stockwik.se/
https://www.stockwik.se/om-oss-eng
https://www.stockwik.se/
https://www.stockwik.se/om-oss
https://www.stockwik.se/var-process
https://www.stockwik.se/bolag-och-segment
https://www.stockwik.se/bolagsstyrning
https://www.stockwik.se/karriar
https://www.stockwik.se/finansiellt
https://www.stockwik.se/aktien
https://www.stockwik.se/press
https://www.stockwik.se/presentationer
https://www.stockwik.se/pressmeddelanden
https://www.stockwik.se/bilder-och-grafik
https://www.stockwik.se/
mailto:info@stockwik.se
https://www.linkedin.com/company/stockwik-f%C3%B6rvaltning-ab/
https://www.facebook.com/stockwik/
https://www.instagram.com/stockwik/
https://cns.omxgroup.com/cdsPublic/createSubscriptionsWizard.action?request_locale=sv&css=http://www.omxgroup.com/static/css/externPop.css


In [198]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import re

driver = webdriver.Chrome()

driver.get('https://www.stockwik.se/pressmeddelanden')

time.sleep(5)

# Find all press release links on the page
links = driver.find_elements(By.TAG_NAME, 'a')

financial_keywords = ['quarter', 'Q1', 'Q2', 'Q3', 'Q4', 'year-end', 'financial statement', 'annual report', '(ENG)']
stockwik_relevant_links = []

# Loop through all the links and filter based on relevant keywords
for link in links:
    title = link.text.lower()
    if any(keyword.lower() in title for keyword in financial_keywords):
        stockwik_relevant_links.append(link.get_attribute('href'))

for link in relevant_links:
    print(link)

driver.quit()


https://www.stockwik.se/_files/ugd/5fbe47_66958287e4df444bb4f521b04c65f474.pdf
https://www.stockwik.se/_files/ugd/5fbe47_5a886f08b2c8442dab8fa88933aa7a7d.pdf
https://www.stockwik.se/_files/ugd/5fbe47_81335e69a71b43bfb0e31b24171da7db.pdf
https://www.stockwik.se/_files/ugd/5fbe47_e10e78b3a2f54ba3a7d56373b19a2045.pdf
https://www.stockwik.se/_files/ugd/5fbe47_6eccb0172c3346ab9896be77596071e1.pdf
https://www.stockwik.se/_files/ugd/5fbe47_b2ac0f4598e24c088aa1b9ce93398ddb.pdf
https://www.stockwik.se/_files/ugd/5fbe47_425b13672aa64555b81100fb509c97a4.pdf
https://www.stockwik.se/_files/ugd/5fbe47_62d6829b8e15476e8d73168a6e140274.pdf
https://www.stockwik.se/_files/ugd/5fbe47_5d4a03c356504887894e1f34f8113e58.pdf
https://www.stockwik.se/_files/ugd/5fbe47_ed56bd3527644c2c97709c66e1e2b14a.pdf
https://www.stockwik.se/_files/ugd/5fbe47_3e3b35a06327437d8aa68a174a611553.pdf
https://www.stockwik.se/_files/ugd/5fbe47_2158ad8aa5a04586882f4ec35225d41f.pdf
https://www.stockwik.se/_files/ugd/5fbe47_3d62c07940

In [202]:
from bs4 import BeautifulSoup
import requests

url = 'https://www.stockwik.se/pressmeddelanden'

# Send request
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

links = soup.find_all('a', href=True)

# Inspect all the links' href and text
for link in links:
    href = link['href']
    link_text = link.text.strip()
    print(f"Link text: {link_text}")
    print(f"Link href: {href}")
    print("-" * 50)

# Filter links
filtered_links = []
for link in links:
    href = link['href']
    link_text = link.text.strip()

    if '2024' in href and '(ENG)' in link_text:
        if href.startswith('/'):
            full_url = f"https://www.stockwik.se{href}"
        else:
            full_url = href
        filtered_links.append(full_url)

if filtered_links:
    print("\nFiltered links:")
    for link in filtered_links:
        print(link)
else:
    print("No matching links found.")


Link text: 
Link href: https://www.stockwik.se
--------------------------------------------------
Link text: IN ENGLISH
Link href: https://www.stockwik.se/om-oss-eng
--------------------------------------------------
Link text: START
Link href: https://www.stockwik.se
--------------------------------------------------
Link text: OM OSS
Link href: https://www.stockwik.se/om-oss
--------------------------------------------------
Link text: VÅR PROCESS
Link href: https://www.stockwik.se/var-process
--------------------------------------------------
Link text: BOLAG OCH SEGMENT
Link href: https://www.stockwik.se/bolag-och-segment
--------------------------------------------------
Link text: BOLAGSSTYRNING
Link href: https://www.stockwik.se/bolagsstyrning
--------------------------------------------------
Link text: KARRIÄR
Link href: https://www.stockwik.se/karriar
--------------------------------------------------
Link text: FINANSIELLT
Link href: https://www.stockwik.se/finansiellt
-----

In [203]:
# filter words
filtered_links = []
for link in links:
    href = link['href']
    link_text = link.text.strip()

    if '(ENG)' in link_text and ('4Q24' in link_text or '3Q24' in link_text or '2Q24' in link_text or '1Q24' in link_text or '2024' in link_text):
        if href.startswith('/'):
            full_url = f"https://www.stockwik.se{href}"
        else:
            full_url = href
        filtered_links.append(full_url)

if filtered_links:
    print("\nFiltered links:")
    for link in filtered_links:
        print(link)
else:
    print("No matching links found.")



Filtered links:
https://www.stockwik.se/_files/ugd/5fbe47_5a886f08b2c8442dab8fa88933aa7a7d.pdf
https://www.stockwik.se/_files/ugd/5fbe47_e10e78b3a2f54ba3a7d56373b19a2045.pdf
https://www.stockwik.se/_files/ugd/5fbe47_b2ac0f4598e24c088aa1b9ce93398ddb.pdf
https://www.stockwik.se/_files/ugd/5fbe47_5d4a03c356504887894e1f34f8113e58.pdf


In [222]:
import pandas as pd

data = [
    {'Company': 'Acousort', 'Quarter': 'FY/Q4', 'Report Type': 'Year-End Report', 'Link': 'https://storage.mfn.se/e6f249f5-6986-4c4f-8e01-a8ee123c8096/acousort-ab-q4-2024-eng.pdf', 'Year': 2024},
    {'Company': 'Acousort', 'Quarter': 'Q3', 'Report Type': 'Interim Report', 'Link': 'https://storage.mfn.se/095a74ce-e30e-43e8-a5ba-d2b2227a631d/acousort-ab-q3-2024-eng.pdf', 'Year': 2024},
    {'Company': 'Acousort', 'Quarter': 'Q2', 'Report Type': 'Interim Report', 'Link': 'https://storage.mfn.se/ff719ce8-8b13-4acc-9b1e-93a16d30cd9d/acousort-ab-q2-2024-eng.pdf', 'Year': 2024},
    {'Company': 'Acousort', 'Quarter': 'Q1', 'Report Type': 'Interim Report', 'Link': 'https://storage.mfn.se/6df319fa-5038-4c9c-ab16-1e2d633e154c/acousort-ab-q1-2024-eng.pdf', 'Year': 2024},
    {'Company': 'Carlsberg', 'Quarter': 'FY', 'Report Type': 'Annual Report', 'Link': 'https://www.carlsberggroup.com/sustainability/report-policies/2024-annual-report/', 'Year': 2024},
    {'Company': 'Carlsberg', 'Quarter': 'FY', 'Report Type': 'Financial Statement', 'Link': 'https://www.carlsberggroup.com/investor-relations/investor-home/fy-2024-financial-statement/', 'Year': 2024},
    {'Company': 'Stockwik', 'Quarter': 'Q4', 'Report Type': 'Quarterly Report', 'Link': 'https://www.stockwik.se/_files/ugd/5fbe47_5a886f08b2c8442dab8fa88933aa7a7d.pdf', 'Year': 2024},
    {'Company': 'Stockwik', 'Quarter': 'Q3', 'Report Type': 'Quarterly Report', 'Link': 'https://www.stockwik.se/_files/ugd/5fbe47_e10e78b3a2f54ba3a7d56373b19a2045.pdf', 'Year': 2024},
    {'Company': 'Stockwik', 'Quarter': 'Q2', 'Report Type': 'Quarterly Report', 'Link': 'https://www.stockwik.se/_files/ugd/5fbe47_b2ac0f4598e24c088aa1b9ce93398ddb.pdf', 'Year': 2024},
    {'Company': 'Stockwik', 'Quarter': 'Q1', 'Report Type': 'Quarterly Report', 'Link': 'https://www.stockwik.se/_files/ugd/5fbe47_5d4a03c356504887894e1f34f8113e58.pdf', 'Year': 2024}
]

df = pd.DataFrame(data)

pd.set_option('display.max_colwidth', None) 
pd.set_option('display.expand_frame_repr', False)

print(df.to_string(index=False))


 Company  Quarter     Report Type                                                 Link                                              Year
 Acousort  FY/Q4      Year-End Report      https://storage.mfn.se/e6f249f5-6986-4c4f-8e01-a8ee123c8096/acousort-ab-q4-2024-eng.pdf  2024
 Acousort     Q3       Interim Report      https://storage.mfn.se/095a74ce-e30e-43e8-a5ba-d2b2227a631d/acousort-ab-q3-2024-eng.pdf  2024
 Acousort     Q2       Interim Report      https://storage.mfn.se/ff719ce8-8b13-4acc-9b1e-93a16d30cd9d/acousort-ab-q2-2024-eng.pdf  2024
 Acousort     Q1       Interim Report      https://storage.mfn.se/6df319fa-5038-4c9c-ab16-1e2d633e154c/acousort-ab-q1-2024-eng.pdf  2024
Carlsberg     FY        Annual Report            https://www.carlsberggroup.com/sustainability/report-policies/2024-annual-report/  2024
Carlsberg     FY  Financial Statement https://www.carlsberggroup.com/investor-relations/investor-home/fy-2024-financial-statement/  2024
 Stockwik     Q4     Quarterly Report    

In [223]:
df.to_csv('financial_reports.csv', index=False)

print("CSV file saved as 'financial_reports.csv'")

CSV file saved as 'financial_reports.csv'
