In [1]:
import nest_asyncio
nest_asyncio.apply()
from pathlib import Path

import sys
sys.path.append("../")
from typing import Optional, Union
import re
import requests
from datetime import datetime

from desci_sense.shared_functions.dataloaders import (
    scrape_post,
    convert_text_to_ref_post,
)
from desci_sense.shared_functions.configs import MetadataExtractionType
from desci_sense.shared_functions.web_extractors.metadata_extractors import extract_all_metadata_to_dict
from desci_sense.shared_functions.dataloaders.twitter.twitter_utils import convert_vxtweet_to_quote_ref_post, convert_vxtweet_to_ref_post
from desci_sense.shared_functions.schema.post import QuoteRefPost
from desci_sense.shared_functions.utils import remove_dups_ordered

In [2]:
tweet_url = "https://x.com/StephensonJones/status/1799035911042482210"
quote_ref_post = scrape_post(tweet_url)

In [3]:
# determine ordering
ordered_refs = quote_ref_post.md_ref_urls(include_quoted_ref_urls=True)


refs_to_process = ordered_refs.copy()

ordered_refs


['https://x.com/biorxiv_neursci/status/1798962015148576815',
 'https://www.biorxiv.org/content/10.1101/2024.06.05.597547v1']

In [4]:
# create dict of metadata
md_dict = extract_all_metadata_to_dict(ordered_refs, MetadataExtractionType.CITOID, 500 )

[32m2024-06-12 13:17:56.505[0m | [34m[1mDEBUG   [0m | [36mdesci_sense.shared_functions.web_extractors.citoid[0m:[36mfetch_citation_async_retry[0m:[36m104[0m - [34m[1mskipping citoid for https://x.com/biorxiv_neursci/status/1798962015148576815[0m
[32m2024-06-12 13:17:56.506[0m | [34m[1mDEBUG   [0m | [36mdesci_sense.shared_functions.web_extractors.citoid[0m:[36mfetch_citation_async_retry[0m:[36m111[0m - [34m[1mtarget_url=https://www.biorxiv.org/content/10.1101/2024.06.05.597547v1[0m


In [5]:
processed_content = quote_ref_post.content

# add quoted post to end of quote post content if not present there
if quote_ref_post.quoted_url not in processed_content:
    processed_content += f" {quote_ref_post.quoted_url}"

print(processed_content)

New preprint from the lab! 🚨

Replay of procedural experience occurs in the striatum and is independent of the hippocampus.

Heroic effort by @EmmettJThompson & the rest of the team @_JasvinKaur, @_GeorginaMills, @dorrell_will, @ClementineDomi6, @TomNotGeorge 

🧵👇1/13 https://x.com/biorxiv_neursci/status/1798962015148576815


In [6]:
# if quoted post content available, add it
if quote_ref_post.has_quote_post:
    quoted_post = quote_ref_post.quoted_post

    # get order of appearance for quoted post url
    quoted_url_idx = ordered_refs.index(quote_ref_post.quoted_url)

    rendered_quoted_post = f"<quoted ref_{quoted_url_idx}>{quoted_post.content}</quote>"
    

In [7]:
quoted_post = quote_ref_post.quoted_post

# get order of appearance for quoted post url
quoted_url_idx = ordered_refs.index(quote_ref_post.quoted_url)

rendered_quoted_post = f"<quoted ref_{quoted_url_idx}>{quoted_post.content}</quote>"
rendered_quoted_post

'<quoted ref_0>Replay of procedural experience is independent of the hippocampus  https://www.biorxiv.org/content/10.1101/2024.06.05.597547v1 #biorxiv_neursci</quote>'

In [8]:
# replace quoted post url with rendered version
processed_content = processed_content.replace(quote_ref_post.quoted_url, rendered_quoted_post)
print(processed_content)

New preprint from the lab! 🚨

Replay of procedural experience occurs in the striatum and is independent of the hippocampus.

Heroic effort by @EmmettJThompson & the rest of the team @_JasvinKaur, @_GeorginaMills, @dorrell_will, @ClementineDomi6, @TomNotGeorge 

🧵👇1/13 <quoted ref_0>Replay of procedural experience is independent of the hippocampus  https://www.biorxiv.org/content/10.1101/2024.06.05.597547v1 #biorxiv_neursci</quote>


In [9]:
# remove quoted post url from list to process
refs_to_process.remove(quote_ref_post.quoted_url)

# replace other urls with <ref> tokens
for url in refs_to_process:
    url_idx = ordered_refs.index(url)
    ref_token = f"<ref_{url_idx}>"
    processed_content = processed_content.replace(url, ref_token)

print(processed_content)

New preprint from the lab! 🚨

Replay of procedural experience occurs in the striatum and is independent of the hippocampus.

Heroic effort by @EmmettJThompson & the rest of the team @_JasvinKaur, @_GeorginaMills, @dorrell_will, @ClementineDomi6, @TomNotGeorge 

🧵👇1/13 <quoted ref_0>Replay of procedural experience is independent of the hippocampus  <ref_1> #biorxiv_neursci</quote>


In [12]:
# print metadata section
md = md_dict[ordered_refs[1]]
print(md.to_str())
# for url in ordered_refs:
    # metadata = 


url: https://www.biorxiv.org/content/10.1101/2024.06.05.597547v1
item_type: preprint
title: Replay of procedural experience is independent of the hippocampus
summary: Sleep is critical for consolidating all forms of memory1-3, from episodic experience to the development of motor skills4-6. A core feature of the consolidation process is offline replay of neuronal firing patterns that occur during experience7,8. This replay is thought to originate in the hippocampus and trigger the reactivation of ensembles of cortical and subcortical neurons1,3,9-18. However, non-declarative memories do not require the hippocampus for learning or for sleep-dependent consolidat


In [15]:
metadata_str = ""
for i,url in enumerate(ordered_refs):
    metadata = md_dict[url]
    metadata_str += f"<ref_{i}> \n{metadata.to_str()}\n==========\n"
print(metadata_str)

<ref_0> 
 url: https://x.com/biorxiv_neursci/status/1798962015148576815
item_type: forumPost
title: Twitter post
summary: None
<ref_1> 
 url: https://www.biorxiv.org/content/10.1101/2024.06.05.597547v1
item_type: preprint
title: Replay of procedural experience is independent of the hippocampus
summary: Sleep is critical for consolidating all forms of memory1-3, from episodic experience to the development of motor skills4-6. A core feature of the consolidation process is offline replay of neuronal firing patterns that occur during experience7,8. This replay is thought to originate in the hippocampus and trigger the reactivation of ensembles of cortical and subcortical neurons1,3,9-18. However, non-declarative memories do not require the hippocampus for learning or for sleep-dependent consolidat

