In [1]:
# !pip install feedparser pandas

Collecting feedparser
  Downloading feedparser-6.0.10-py3-none-any.whl (81 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.1/81.1 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pandas
  Downloading pandas-2.0.3-cp311-cp311-macosx_11_0_arm64.whl (10.7 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.7/10.7 MB[0m [31m17.1 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
[?25hCollecting sgmllib3k (from feedparser)
  Downloading sgmllib3k-1.0.0.tar.gz (5.8 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting tzdata>=2022.1 (from pandas)
  Using cached tzdata-2023.3-py2.py3-none-any.whl (341 kB)
Collecting numpy>=1.21.0 (from pandas)
  Downloading numpy-1.25.0-cp311-cp311-macosx_11_0_arm64.whl (14.0 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.0/14.0 MB[0m [31m25.2 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m0:01[0m:01[0m
Buil

In [26]:
import feedparser
import pandas as pd
import csv
import os
from dateutil.parser import parse
import pytz

In [30]:
# Parse the RSS feed
feed = feedparser.parse('https://biztoc.com/feed/t/bitcoin')

entries = []

# If the file exists, read the existing entries
if os.path.isfile('biztoc.csv'):
    with open('biztoc.csv', 'r') as file:
        reader = csv.reader(file)
        # Convert each date string to a timezone-aware datetime object
        entries = [[row[0], row[1], parse(row[2]).replace(tzinfo=pytz.timezone('Asia/Hong_Kong'))] for row in list(reader)[1:]]

# Parse each item in the feed
for entry in feed.entries:
    title = entry.title
    try:
        # Here I'm assuming that the paragraph you want is the second <p> element in the description
        paragraph = entry.description.split('</p>')[1].split('<p>')[1]
    except IndexError:
        # If there is no second <p> or </p>, set paragraph to a default value
        paragraph = "No second paragraph found"

    # Convert date to Hong Kong Time zone
    date = parse(entry.published)
    date = date.astimezone(pytz.timezone('Asia/Hong_Kong'))

    # Check if the entry already exists
    if not any(title == row[0] for row in entries):
        entries.append([title, paragraph, date])  # If not, append it to the list

# Sort entries by date in descending order
entries.sort(key=lambda row: row[2], reverse=True)

# Write sorted entries to the file
with open('biztoc.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["Title", "Paragraph", "Date"])  # Write header
    for entry in entries:
        # Convert datetime object back to string format for CSV
        entry[2] = entry[2].strftime('%Y-%m-%d %H:%M:%S')
        writer.writerow(entry)  # Write sorted entries


In [31]:
pd.set_option('max_colwidth', 40)
df = pd.read_csv('biztoc.csv', encoding='latin1')

In [32]:
df

Unnamed: 0,Title,Paragraph,Date
0,Bitcoins Best Asset (Fixed Supply...,Bitcoin recently reached a 1-year hi...,2023-07-02 22:44:04
1,Bitcoin Gains Unexpected Allies As F...,"Bitcoin BTC/USD, the world's largest...",2023-07-02 22:42:04
2,Analyst Who Called May 2021 Collapse...,Widely followed crypto analyst Dave ...,2023-07-02 22:32:03
3,What Will You Use As Money If The Do...,What Will You Use As Money If The Do...,2023-07-02 21:38:03
4,Jumps 30% in Three Days as One Month...,The price of LTC has surged over 27%...,2023-07-02 21:18:05
...,...,...,...
125,Applied Digital Stock Surges 12% Aft...,Shares of Applied Digital Corporatio...,2023-06-30 19:30:04
126,EOS price outlook: EOS lags as Bitco...,EOS price is down 13% year-to-date e...,2023-06-30 19:14:03
127,Bitcoins breakup with gold may be a...,The U.S. economic landscape has been...,2023-06-30 19:02:07
128,How to Buy Bitcoin with ACH Transfer,How to Buy Bitcoin Crypto with an AC...,2023-06-30 18:42:13
