In [40]:
import requests
import pandas as pd
from datetime import datetime, timedelta

# Get data from CIM api

In [52]:
def get_cim_tv_data(date_diff="2025-5-10"):
    """
    Fetch data from CIM TV API and convert it to a pandas DataFrame.
    
    Args:
        date_diff (str): Date difference in format YYYY-M-D
    
    Returns:
        pandas.DataFrame: The API data in DataFrame format
    """
    # Construct the API URL
    api_url = f"https://api.cim.be/api/cim_tv_public_results_daily_views?dateDiff={date_diff}&reportType=north"
    
    try:
        # Make the API request
        response = requests.get(api_url)
        response.raise_for_status()  # Raise an exception for bad status codes
        data = response.json()
        
        # Get the ratings data from the response
        ratings_data = data['hydra:member']
        
        # Convert to DataFrame
        df = pd.DataFrame(ratings_data)

        # Keep relevant columns
        #cols = ["id", "description", "category", "channel", "dateDiff", "startTime", "rLength", "ratePerc", "rateInK", "shr", "rateInKAll", "live"]
        #df = df[cols]
        
        return df
    
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {e}")
        return None

In [56]:
# Init df
df = pd.DataFrame()

# Init dates
start_date = "2016-10-1" # oldest working date on website
#start_date = "2025-5-14" # test
end_date = (datetime.now() - timedelta(days=2)).strftime("%Y-%-m-%-d")

# Convert string dates to datetime objects (used for loop condition)
start = datetime.strptime(start_date, "%Y-%m-%d")
end = datetime.strptime(end_date, "%Y-%m-%d")

# Loop through dates, get data and add to df
current_date = start
while current_date <= end:
    # Format date back to string
    date_str = current_date.strftime("%Y-%-m-%-d")
    print(date_str)

    # Get data
    df_current_day = get_cim_tv_data(date_str)

    # Add to df
    if not df_current_day.empty:
        df = pd.concat([df, df_current_day], ignore_index=True)
    else:
        print(f"No data for date: {date_str}")

    # Move to next day
    current_date += timedelta(days=1)

# Save dataframe as csv
csv = df.to_csv("data/ratings.csv", index=False)

2016-10-1
2016-10-2
2016-10-3
2016-10-4
2016-10-5
2016-10-6
2016-10-7
2016-10-8
2016-10-9
2016-10-10
2016-10-11
2016-10-12
2016-10-13
2016-10-14
2016-10-15
2016-10-16
2016-10-17
2016-10-18
2016-10-19
2016-10-20
2016-10-21
2016-10-22
2016-10-23
2016-10-24
2016-10-25
2016-10-26
2016-10-27
2016-10-28
2016-10-29
2016-10-30
2016-10-31
No data for date: 2016-10-31
2016-11-1
2016-11-2
2016-11-3
2016-11-4
2016-11-5
2016-11-6
2016-11-7
2016-11-8
2016-11-9
2016-11-10
2016-11-11
2016-11-12
2016-11-13
2016-11-14
2016-11-15
2016-11-16
2016-11-17
2016-11-18
2016-11-19
2016-11-20
2016-11-21
2016-11-22
2016-11-23
2016-11-24
2016-11-25
2016-11-26
2016-11-27
2016-11-28
2016-11-29
2016-11-30
2016-12-1
2016-12-2
2016-12-3
2016-12-4
2016-12-5
2016-12-6
2016-12-7
2016-12-8
2016-12-9
2016-12-10
2016-12-11
2016-12-12
2016-12-13
2016-12-14
2016-12-15
2016-12-16
2016-12-17
2016-12-18
2016-12-19
2016-12-20
2016-12-21
2016-12-22
2016-12-23
2016-12-24
2016-12-25
2016-12-26
2016-12-27
2016-12-28
2016-12-29
2016-12-

AttributeError: 'NoneType' object has no attribute 'empty'

In [49]:
# Load csv
data = pd.read_csv("data/ratings.csv")

data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40 entries, 0 to 39
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   id           40 non-null     int64  
 1   description  40 non-null     object 
 2   category     0 non-null      float64
 3   channel      40 non-null     object 
 4   dateDiff     40 non-null     object 
 5   startTime    40 non-null     object 
 6   rLength      40 non-null     object 
 7   ratePerc     0 non-null      float64
 8   rateInK      40 non-null     float64
 9   shr          0 non-null      float64
 10  rateInKAll   0 non-null      float64
 11  live         40 non-null     int64  
dtypes: float64(5), int64(2), object(5)
memory usage: 3.9+ KB


In [55]:
test = get_cim_tv_data(2016-10-31)
test.head()