In [1]:
%pip install pandas

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.1.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
%pip install pyspark

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.1.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
%pip install flask





[notice] A new release of pip is available: 23.1.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
import pandas as pd
from datetime import datetime, timedelta
from pyspark.sql import SparkSession
from pyspark.sql.functions import concat_ws, to_timestamp, col, to_date, struct, collect_list
from flask import Flask, request, jsonify
import threading

In [5]:
def load_csv(file_path):
    data = pd.read_csv(file_path)
    data['timestamp'] = pd.to_datetime(data['date'] + ' ' + data['time'])
    return data

In [6]:
def load_from_db(db_url, query):
    spark = SparkSession.builder \
        .appName("Compliance Analysis") \
        .getOrCreate()
    df = spark.read.format("jdbc").option("url", db_url).option("query", query).load()
    df = df.withColumn("timestamp", to_timestamp(concat_ws(' ', df['date'], df['time'])))
    return df

In [7]:
import logging
from datetime import datetime, timedelta

# Configure logging
logging.basicConfig(level=logging.DEBUG)

def calculate_compliance_logic(user_groups, time_ranges, daily_compliance_hours, overall_compliance_percentage):
    def calculate_total_time_span(time_ranges):
        total_time = timedelta()
        for from_time, to_time in time_ranges:
            from_time = datetime.strptime(from_time, '%H:%M')
            to_time = datetime.strptime(to_time, '%H:%M')
            total_time += (to_time - from_time)
        return total_time.total_seconds() / 3600

    user_compliance = {}
    compliant_users_data = []

    total_time_span = calculate_total_time_span(time_ranges) * 60  # Convert hours to minutes
    daily_compliance_minutes = daily_compliance_hours * 60  # Convert hours to minutes

    for user_id, user_data in user_groups:
        daily_compliance = []
        compliant_days = []

        if not isinstance(user_data, pd.DataFrame):
            user_data = user_data.toPandas()

        for date, group in user_data.groupby('date'):
            total_active_minutes = 0
            for from_time, to_time in time_ranges:
                from_time = datetime.combine(date, datetime.strptime(from_time, '%H:%M').time())
                to_time = datetime.combine(date, datetime.strptime(to_time, '%H:%M').time())

                day_data = group[(group['timestamp'] >= from_time) & (group['timestamp'] <= to_time) & (group['heart_rate'] != 0)]
                total_active_minutes += len(day_data) if not day_data.empty else 0

            daily_compliance_percentage = (total_active_minutes / total_time_span) * 100
            daily_compliance.append({
                'date': date,
                'compliance_hours': total_active_minutes / 60,
                'compliance_percentage': daily_compliance_percentage
            })

            if total_active_minutes >= daily_compliance_minutes:
                compliant_days.append(date)
                compliant_users_data.append(group[group['date'] == date])

        results_df = pd.DataFrame(daily_compliance)
        total_days = len(results_df)
        compliant_days_count = len(compliant_days)
        overall_compliance_percentage_calc = (compliant_days_count / total_days) * 100 if total_days > 0 else 0

        user_compliance[user_id] = {
            'compliance_report': {
                'daily_compliance': results_df.to_dict(orient='records'),
                'overall_compliance_percentage': overall_compliance_percentage_calc,
                'is_compliant': overall_compliance_percentage_calc >= overall_compliance_percentage
            },
            'compliant_data': pd.concat(compliant_users_data).to_dict(orient='records') if compliant_users_data else []
        }

        logging.debug(f"Processed user_id: {user_id}, compliance_report: {user_compliance[user_id]['compliance_report']}")

    return user_compliance


In [8]:
def calculate_compliance_from_csv(file_path, time_ranges, daily_compliance_hours, overall_compliance_percentage):
    data = load_csv(file_path)
    data['date'] = data['timestamp'].dt.date
    user_groups = data.groupby('user_id')
    return calculate_compliance_logic(user_groups, time_ranges, daily_compliance_hours, overall_compliance_percentage)

In [9]:
def calculate_compliance_from_db(db_url, query, time_ranges, daily_compliance_hours, overall_compliance_percentage):
    df = load_from_db(db_url, query)
    df = df.withColumn('date', to_date('timestamp'))
    user_groups = [(row['user_id'], row) for row in df.groupBy('user_id').agg(collect_list(struct('timestamp', 'date', 'heart_rate')).alias('data')).collect()]
    user_groups = [(user_id, pd.DataFrame(data)) for user_id, data in user_groups]
    return calculate_compliance_logic(user_groups, time_ranges, daily_compliance_hours, overall_compliance_percentage)

In [10]:
from flask import Flask, request, jsonify
import threading

app = Flask(__name__)

@app.route('/calculate_compliance', methods=['POST'])
def calculate_compliance_api():
    source_type = request.json.get('source_type')
    time_ranges = request.json.get('time_ranges')
    daily_compliance_hours = request.json.get('daily_compliance_hours')
    overall_compliance_percentage = request.json.get('overall_compliance_percentage')
    
    logging.debug(f"Received request with source_type: {source_type}, time_ranges: {time_ranges}, "
                  f"daily_compliance_hours: {daily_compliance_hours}, overall_compliance_percentage: {overall_compliance_percentage}")
    
    if source_type == 'csv':
        file_path = request.json.get('file_path')
        results = calculate_compliance_from_csv(file_path, time_ranges, daily_compliance_hours, overall_compliance_percentage)
    elif source_type == 'db':
        db_url = request.json.get('db_url')
        query = request.json.get('query')
        results = calculate_compliance_from_db(db_url, query, time_ranges, daily_compliance_hours, overall_compliance_percentage)
    else:
        return jsonify({'error': 'Invalid source type'}), 400

    logging.debug(f"Results: {results}")
    return jsonify(results)

# Function to run the Flask app in a separate thread
def run_app():
    app.run(debug=True, use_reloader=False)

# Start the Flask app
thread = threading.Thread(target=run_app)
thread.start()


In [12]:
import requests
import json
import logging

# Configure logging
logging.basicConfig(level=logging.DEBUG)

# Define the URL for the API endpoint
url = 'http://127.0.0.1:5000/calculate_compliance'

# Example JSON request payload for CSV
payload = {
    "source_type": "csv",
    "file_path": "sampleHeartRateData.csv",
    "time_ranges": [
        ["08:00", "12:00"],
        ["14:00", "18:00"]
    ],
    "daily_compliance_hours": 4,
    "overall_compliance_percentage": 80
}

logging.debug(f"Sending request to {url} with payload: {payload}")

# Send the POST request
response = requests.post(url, data=json.dumps(payload), headers={'Content-Type': 'application/json'})

# Print the response
logging.debug(f"Response: {response.json()}")
print(response.json())


DEBUG:root:Sending request to http://127.0.0.1:5000/calculate_compliance with payload: {'source_type': 'csv', 'file_path': 'sampleHeartRateData.csv', 'time_ranges': [['08:00', '12:00'], ['14:00', '18:00']], 'daily_compliance_hours': 4, 'overall_compliance_percentage': 80}
DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): 127.0.0.1:5000
DEBUG:root:Received request with source_type: csv, time_ranges: [['08:00', '12:00'], ['14:00', '18:00']], daily_compliance_hours: 4, overall_compliance_percentage: 80
DEBUG:root:Processed user_id: 02f77d2, compliance_report: {'daily_compliance': [{'date': datetime.date(2018, 7, 12), 'compliance_hours': 2.05, 'compliance_percentage': 25.624999999999996}, {'date': datetime.date(2018, 7, 13), 'compliance_hours': 7.25, 'compliance_percentage': 90.625}, {'date': datetime.date(2018, 7, 14), 'compliance_hours': 3.65, 'compliance_percentage': 45.625}, {'date': datetime.date(2018, 7, 20), 'compliance_hours': 0.03333333333333333, 'compliance_percentag