In [2]:
import os
from pathlib import Path
from dotenv import load_dotenv

env_path = Path('.') / '.env'
load_dotenv(dotenv_path=env_path)

signing_secret=os.environ['SLACK_SIGNING_SECRET']
slack_bot_token = os.environ.get("SLACK_BOT_TOKEN")


In [3]:
import os
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError


# Initialize a WebClient
client = WebClient(token=slack_bot_token)

try:
    # Call the emoji.list method
    response = client.emoji_list()
    emojis = response['emoji']

    # Print the list of emojis
    for emoji, url in emojis.items():
        print(f":{emoji}: - {url}")

except SlackApiError as e:
    print(f"Error fetching emojis: {e.response['error']}")



:bowtie: - https://emoji.slack-edge.com/T077B9HK6BZ/bowtie/f3ec6f2bb0.png
:squirrel: - https://emoji.slack-edge.com/T077B9HK6BZ/squirrel/465f40c0e0.png
:glitch_crab: - https://emoji.slack-edge.com/T077B9HK6BZ/glitch_crab/db049f1f9c.png
:piggy: - https://emoji.slack-edge.com/T077B9HK6BZ/piggy/b7762ee8cd.png
:cubimal_chick: - https://emoji.slack-edge.com/T077B9HK6BZ/cubimal_chick/85961c43d7.png
:dusty_stick: - https://emoji.slack-edge.com/T077B9HK6BZ/dusty_stick/6177a62312.png
:slack: - https://emoji.slack-edge.com/T077B9HK6BZ/slack/7d462d2443.png
:pride: - https://emoji.slack-edge.com/T077B9HK6BZ/pride/56b1bd3388.png
:thumbsup_all: - https://emoji.slack-edge.com/T077B9HK6BZ/thumbsup_all/50096a1020.png
:slack_call: - https://emoji.slack-edge.com/T077B9HK6BZ/slack_call/b81fffd6dd.png
:shipit: - alias:squirrel
:white_square: - alias:white_large_square
:black_square: - alias:black_large_square
:simple_smile: - https://a.slack-edge.com/80588/img/emoji_2017_12_06/apple/simple_smile.png


In [1]:
import json

def read_json_from_file(file_path):
    
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

# Example usage
file_path = 'messages.json'
messages = read_json_from_file(file_path)
print(messages)

[{'user': 'U077NUWPN9K', 'type': 'message', 'ts': '1718575120.205999', 'client_msg_id': '9ecbc7bb-838c-4315-8cda-eac43b338262', 'text': '<https://www.biorxiv.org/content/10.1101/2024.05.20.594820v1?rss=1&amp;utm_source=dlvr.it&amp;utm_medium=twitter>', 'team': 'T077K7ED4SJ', 'attachments': [{'from_url': 'https://www.biorxiv.org/content/10.1101/2024.05.20.594820v1?rss=1&utm_source=dlvr.it&utm_medium=twitter', 'ts': 1716274800, 'thumb_url': 'https://www.biorxiv.org/sites/default/files/images/biorxiv_logo_homepage7-5-small.png', 'thumb_width': 252, 'thumb_height': 252, 'service_icon': 'https://www.biorxiv.org/sites/default/files/images/favicon.ico', 'id': 1, 'original_url': 'https://www.biorxiv.org/content/10.1101/2024.05.20.594820v1?rss=1&amp;utm_source=dlvr.it&amp;utm_medium=twitter', 'fallback': 'bioRxiv: Germline status and micronutrient availability regulate a somatic mitochondrial quality control pathway via short-chain fatty acid metabolism', 'text': 'Reproductive status, such as p

In [12]:
import re

def process_messages(messages):
    urls = []
    for message in messages:
        user = message['user']
        type = message['type']
        #msg_id = message['client_msg_id']
        text = message.get('text')
        files = message.get('files')
        
        if text:
            urls.extend(extract_urls(text))
            
    write_json_to_file(urls,"urls.json")

import re
from urllib.parse import quote, unquote

def extract_urls(text):
    """
    Extracts all well-formed URLs starting with http:// or https:// from the given text
    and percent-encodes any invalid characters.

    Args:
    text (str): The input text containing URLs.

    Returns:
    list: A list of extracted and encoded URLs.
    """
    url_pattern = re.compile(r'https?://[^\s<>"]+|www\.[^\s<>"]+')
    urls = url_pattern.findall(text)
    encoded_urls = [quote(url, safe=':/') for url in urls]
    decoded_urls = [unquote(url) for url in encoded_urls]
    return decoded_urls

def write_json_to_file(json_data,file_nm="messages.json"):
    with open(file_nm, "w") as f:
        json.dump(json_data, f, indent=4)
        

In [13]:
process_messages(messages)

In [11]:
import re
from urllib.parse import quote, unquote

def extract_and_encode_urls(text):
   
    url_pattern = re.compile(r'https?://[^\s<>"]+|www\.[^\s<>"]+')
    urls = url_pattern.findall(text)
    encoded_urls = [quote(url, safe=':/') for url in urls]
    decoded_urls = [unquote(url) for url in encoded_urls]
    return decoded_urls

# Example usage
text = """
Here are some URLs:
https://www.example1.com>
http://example.org
https://sub.example.com/path?query=param#fragment
Invalid URLs:
htp://invalid.com
www.example.com
http://example.com/greater>than
"""

urls = extract_and_encode_urls(text)
print(urls)


['https://www.example1.com', 'http://example.org', 'https://sub.example.com/path?query=param#fragment', 'www.example.com', 'http://example.com/greater']


In [23]:
import requests
from urllib.parse import urlparse, parse_qs

def convert_to_download_url(pmid):
    
    # Query the PubMed API to get the PMC ID
    api_url = f"https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/?tool=slack_download&email=daniel.higgins@umassmed.edu&ids={pmid}&format=json"
    response = requests.get(api_url)
    
    if response.status_code != 200:
        return "Error querying PubMed API"
    
    data = response.json()
    
    # Check if a PMC ID exists
    if 'records' in data and data['records'][0].get('pmcid'):
        pmc_id = data['records'][0]['pmcid']
        pmc_url = f"https://www.ncbi.nlm.nih.gov/pmc/articles/{pmc_id}/"
        return pmc_url
    else:
        return None


pmc_url = convert_to_download_url(38187566)
print(pmc_url)


https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10769218/


In [16]:
import re

def extract_pmid_from_pubmed_url(url):
    """
    Extracts the PMID from a PubMed URL if it starts with 'https://pubmed.ncbi.nlm.nih.gov'.

    Args:
    url (str): The input URL.

    Returns:
    str: The extracted PMID, or None if the URL format is not recognized.
    """
    # Define the pattern to match the PMID
    pattern = r"https://pubmed.ncbi.nlm.nih.gov/(\d+)/?"

    # Try to match the pattern in the URL
    match = re.match(pattern, url)
    if match:
        return match.group(1)
    else:
        return None

# Example usage
url = "https://pubmed.ncbi.nlm.nih.gov/38187566"
pmid = extract_pmid_from_pubmed_url(url)
print(pmid)


38187566
