In [1]:
import requests
import pandas as pd
import json
import pprint
import os
import time
from datetime import datetime, timedelta

import warnings
warnings.filterwarnings('ignore')

api infos: https://data.bs.ch/api/v2/console

In [None]:
# def call_newest_data(url):
#     '''
#     Requests the newest data from the data.bs.ch api and returns it as pandas dataframe.
#     '''
#     try:
#         r = requests.get(url)

#         df = pd.DataFrame(r.json())
#         df = df.dropna(axis="rows")
#         return df
#     except requests.exceptions.RequestException as e:  # This is the correct syntax
#         raise (f"An exception occured: {e}")
#         return None
    
# url = "https://data.bs.ch/api/v2/catalog/datasets/100088/exports/json?limit=-1&offset=0&timezone=Europe%2FBerlin"
# df = call_newest_data(url)
# df

In [2]:
class LiveDataCollector():
    
    def __init__(self):
        self.update_time = datetime.now().replace(second=0, microsecond=0) - timedelta(minutes=1)

        
    def open_csv(self, file_path):
        if os.path.exists(file_path):
            df = pd.read_csv(file_path)
        else:
            df = pd.DataFrame()
        return df


    def save_csv(self, df, file_path):
        df.to_csv(file_path, index=False)
        
    def call_newest_data(self, url):
        '''
        Requests the newest data from the data.bs.ch api and returns it as pandas dataframe.
        '''
        try:
            r = requests.get(url)
            df = pd.DataFrame(r.json())
            df = df.dropna(axis="rows")
            return df
        except requests.exceptions.RequestException as e:
            raise (f"An exception occured: {e}")
            return None
    
    
    def collect_data(self, url, file_path):

        # get current datetime
        now = datetime.now()
        now = now.replace(second=0, microsecond=0)

        # check if last update is older than a minute
        if (self.update_time < now): 
            
            # import csv as dataframe
            df = self.open_csv(file_path)

            # get new data
            print("Reading data...")
            df_new = self.call_newest_data(url)
            
            if df_new is not None:
                # update dataframe
                df = df.append(df_new)
                df = df.reset_index(drop=True)

                # save as csv
                print(f"Saving: {file_path} at time: {now}")
                self.save_csv(df, file_path)
                print("Complete")

                # update update time
                self.update_time = now

In [3]:
url = "https://data.bs.ch/api/v2/catalog/datasets/100088/exports/json?limit=-1&offset=0&timezone=Europe%2FBerlin"
path = "../data/"

collector = LiveDataCollector()

while True:
    # define csv name
    file = f'{(datetime.now().strftime("%d_%m_%Y"))}.csv'
    # update csv
    collector.collect_data(url, path+file)
    time.sleep(10)

Reading data...
Saving: ../data/20_10_2022.csv at time: 2022-10-20 10:51:00
Complete
Reading data...
Saving: ../data/20_10_2022.csv at time: 2022-10-20 10:52:00
Complete


KeyboardInterrupt: 