In [1]:
import pandas as pd
import numpy as np 
import feedparser
import time
from datetime import datetime, timezone, timedelta

In [2]:
# set parameters for how many viewable columns there are 

pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
pd.set_option("max_colwidth", None)

In [3]:
# Using a URL with the country and product included, may change this in the future to be more
# generic and then filter it out later 

url_rpi5 = 'https://rpilocator.com/feed/?country=US&cat=PI5'

In [4]:
# method to read the feed and convert to data frame

def read_rss_convert(url: str) -> object:

    # read feed 
    rpi_feed = feedparser.parse(url)
    
    # parse out the entries & convert to data frame 
    rpi_feed =pd.DataFrame(rpi_feed.entries)
    
    # validate that there are entries -helpful for when usigng pre-filtered URLs     
    if len(rpi_feed) > 0:
        filtered = rpi_feed[['title', 'published']]
        return filtered
    else:
        return None 


In [5]:
feed_df = read_rss_convert(url_rpi5)
feed_df.head()


Unnamed: 0,title,published
0,Stock Alert (US): RPi 5 - 4GB RAM is In Stock at Adafruit 100 units in stock.,"Thu, 11 Jan 2024 16:30:24 GMT"
1,Stock Alert (US): RPi 5 - 8GB RAM is In Stock at Pishop 7 units in stock.,"Thu, 11 Jan 2024 04:24:03 GMT"
2,Stock Alert (US): RPi 5 - 8GB RAM is In Stock at Pishop 27 units in stock.,"Wed, 10 Jan 2024 23:56:10 GMT"
3,Stock Alert (US): RPi 5 - 8GB RAM is In Stock at Pishop 99 units in stock.,"Wed, 10 Jan 2024 21:00:08 GMT"
4,Stock Alert (US): RPi 5 - 4GB RAM is In Stock at Adafruit 100 units in stock.,"Wed, 10 Jan 2024 17:16:38 GMT"


In [6]:
# update column names 

feed_df.rename(columns = {'title': 'product_alert'}, inplace = True)




In [7]:
# need to convert published to date time format 

feed_df['published']= pd.to_datetime(feed_df['published'])


In [8]:
# check format 
feed_df.head()

Unnamed: 0,product_alert,published
0,Stock Alert (US): RPi 5 - 4GB RAM is In Stock at Adafruit 100 units in stock.,2024-01-11 16:30:24
1,Stock Alert (US): RPi 5 - 8GB RAM is In Stock at Pishop 7 units in stock.,2024-01-11 04:24:03
2,Stock Alert (US): RPi 5 - 8GB RAM is In Stock at Pishop 27 units in stock.,2024-01-10 23:56:10
3,Stock Alert (US): RPi 5 - 8GB RAM is In Stock at Pishop 99 units in stock.,2024-01-10 21:00:08
4,Stock Alert (US): RPi 5 - 4GB RAM is In Stock at Adafruit 100 units in stock.,2024-01-10 17:16:38


In [9]:
# set the timezone used for the dataframe to be GMT like the original data
tzinfo = timezone(timedelta(hours=-8))

In [10]:
# get current time in GMT 

x = datetime.now(timezone.utc)

In [11]:
# set format 
current_date_time = x.strftime("%Y-%m-%d %H:%M:%S")

In [12]:
# check the value 

current_date_time

'2024-01-11 22:49:27'

In [13]:
# add current date time to data frame 
feed_df['current time'] = current_date_time




In [14]:
# check the data frame 

feed_df.head()

Unnamed: 0,product_alert,published,current time
0,Stock Alert (US): RPi 5 - 4GB RAM is In Stock at Adafruit 100 units in stock.,2024-01-11 16:30:24,2024-01-11 22:49:27
1,Stock Alert (US): RPi 5 - 8GB RAM is In Stock at Pishop 7 units in stock.,2024-01-11 04:24:03,2024-01-11 22:49:27
2,Stock Alert (US): RPi 5 - 8GB RAM is In Stock at Pishop 27 units in stock.,2024-01-10 23:56:10,2024-01-11 22:49:27
3,Stock Alert (US): RPi 5 - 8GB RAM is In Stock at Pishop 99 units in stock.,2024-01-10 21:00:08,2024-01-11 22:49:27
4,Stock Alert (US): RPi 5 - 4GB RAM is In Stock at Adafruit 100 units in stock.,2024-01-10 17:16:38,2024-01-11 22:49:27


In [15]:
# check the data formats 

feed_df.dtypes

product_alert            object
published        datetime64[ns]
current time             object
dtype: object

In [16]:
# convert current time string to actual date time 
feed_df['current time']= pd.to_datetime(feed_df['current time'])




In [17]:
# calculate hours since the update was published in hours 
feed_df['alert_age'] = (feed_df['current time'] - feed_df['published']) / pd.Timedelta(hours=1)

In [18]:
# check the data frame/validate the data
feed_df.head()

Unnamed: 0,product_alert,published,current time,alert_age
0,Stock Alert (US): RPi 5 - 4GB RAM is In Stock at Adafruit 100 units in stock.,2024-01-11 16:30:24,2024-01-11 22:49:27,6.3175
1,Stock Alert (US): RPi 5 - 8GB RAM is In Stock at Pishop 7 units in stock.,2024-01-11 04:24:03,2024-01-11 22:49:27,18.423333
2,Stock Alert (US): RPi 5 - 8GB RAM is In Stock at Pishop 27 units in stock.,2024-01-10 23:56:10,2024-01-11 22:49:27,22.888056
3,Stock Alert (US): RPi 5 - 8GB RAM is In Stock at Pishop 99 units in stock.,2024-01-10 21:00:08,2024-01-11 22:49:27,25.821944
4,Stock Alert (US): RPi 5 - 4GB RAM is In Stock at Adafruit 100 units in stock.,2024-01-10 17:16:38,2024-01-11 22:49:27,29.546944


In [20]:
# filter out the newest stock data, as the devices sell out so quickly that 
# there isn't much value in older alerts 

feed_df = feed_df[feed_df['alert_age'] < 24]


In [21]:
# validate the data frame - see if we have anything 
# in this case we have nothing, and checking the vendors shows nothing in stock so this
# isn't something we'd want to drive alerts off of. 

feed_df.head()

Unnamed: 0,product_alert,published,current time,alert_age
0,Stock Alert (US): RPi 5 - 4GB RAM is In Stock at Adafruit 100 units in stock.,2024-01-11 16:30:24,2024-01-11 22:49:27,6.3175
1,Stock Alert (US): RPi 5 - 8GB RAM is In Stock at Pishop 7 units in stock.,2024-01-11 04:24:03,2024-01-11 22:49:27,18.423333
2,Stock Alert (US): RPi 5 - 8GB RAM is In Stock at Pishop 27 units in stock.,2024-01-10 23:56:10,2024-01-11 22:49:27,22.888056


In [22]:
# create new dataframe with just the product alert and alert age columns 

alert_df = feed_df[['product_alert', 'alert_age']]

alert_df.head()


Unnamed: 0,product_alert,alert_age
0,Stock Alert (US): RPi 5 - 4GB RAM is In Stock at Adafruit 100 units in stock.,6.3175
1,Stock Alert (US): RPi 5 - 8GB RAM is In Stock at Pishop 7 units in stock.,18.423333
2,Stock Alert (US): RPi 5 - 8GB RAM is In Stock at Pishop 27 units in stock.,22.888056


In [26]:
# convert the dataframe to a json string

alert_json = alert_df.to_json(orient="values")

In [27]:
# validate the json - we want alert followed by age

print(alert_json)

[["Stock Alert (US): RPi 5 - 4GB RAM is In Stock at Adafruit 100 units in stock.",6.3175],["Stock Alert (US): RPi 5 - 8GB RAM is In Stock at Pishop 7 units in stock.",18.4233333333],["Stock Alert (US): RPi 5 - 8GB RAM is In Stock at Pishop 27 units in stock.",22.8880555556]]
