In [8]:
import pandas as pd
import funolympics_data_processing as fdp
import numpy as np

In [2]:
def process_data(api_token, url):
    new_df = fdp.process_data(api_token, url)
    return new_df


## 1. Functional Testing
### FR01: Ingestion of Web Server Log Data

In [10]:
api_token = "Z7yqfOQVNRuDbp6QQmU-RCilJDqDA2ia86U8EUebJy4"
url = "http://localhost:5000/api/olympicdata"

def test_ingest_data(mock_api_call):
    # Mock API call to return sample data
    mock_api_call.return_value = [{"timestamp": "2022-01-01 12:00:00", "resource": "/events", "status_code": 200}]
    data = process_data("api_token", "url")
    assert len(data) == 1
    assert data[0]["timestamp"] == "2022-01-01 12:00:00"
    assert data[0]["resource"] == "/events"
    assert data[0]["status_code"] == 200

In [13]:
data = process_data(api_token, url)
data.head(1)['timestamp']

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged_df['session_duration'].fillna(0, inplace=True)


5946   2024-05-13 01:00:27
Name: timestamp, dtype: datetime64[ns]

In [6]:
test_web_server_log_ingestion()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged_df['session_duration'].fillna(0, inplace=True)


### FRO2

In [9]:
from funolympics_data_processing import get_olympic_data, parse_log_data

api_token = "Z7yqfOQVNRuDbp6QQmU-RCilJDqDA2ia86U8EUebJy4"
url = "http://localhost:5000/api/olympicdata"

data = get_olympic_data(api_token, url, limit=12000)
parsed_data = parse_log_data(data)
df = pd.DataFrame(parsed_data)

# Simulate missing or corrupt data
corrupt_data = df.copy()
corrupt_data.at[0, 'Device'] = None  # Introduce a missing value
corrupt_data = process_data(corrupt_data)  # Process corrupt data

# Check if missing data is handled correctly
assert corrupt_data['Device'].isnull().sum() == 1  # Ensure missing data is identified and handled

