In [17]:
import pandas as pd
import sys
import os

microbiology = pd.read_csv("../ressources/microbiology.csv")
transfers = pd.read_csv("../ressources/transfers.csv")

micro_data = [{"filename": "test_micro.csv", "content": microbiology.to_csv()}]
transfer_data = [
    {"filename": "test_transfers.csv", "content": transfers.to_csv()}]

print(microbiology.head())
print(transfers.head())

%load_ext autoreload
%autoreload 2

     test_id patient_id collection_date infection    result
0  MWF90KM3T      P0001      2025-04-21       CRE  negative
1  MOHSWQ40Z      P0002      2025-04-10       CRE  negative
2  M8KK791ZY      P0003      2025-06-04       CRE  negative
3  MIDLRYE89      P0004      2025-05-05       CRE  negative
4  MUFLSEHUD      P0010      2025-05-01       CRE  negative
  transfer_id patient_id ward_in_time ward_out_time          location
0   T9V20YAR8      P0001   2025-04-03    2025-04-09            Ward-2
1   TSBELRQVS      P0001   2025-04-19    2025-04-23            Ward-7
2   T1N3IUZGP      P0002   2025-04-07    2025-04-15  Surgical-Theatre
3   TYN9382KK      P0003   2025-05-15    2025-05-20  Surgical-Theatre
4   TQTTY5Y2J      P0003   2025-06-01    2025-06-08            Ward-3
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [18]:
# TEST WITH MOCK ANALYSIS

sys.path.append(os.path.join(os.getcwd(), '..', 'src'))
from modules.analysis_orchestrator import AnalysisType
from modules import analysis_orchestrator

print("Available analysis types:")
for analysis_type in AnalysisType:
    print(f"  {analysis_type.display_name}: {analysis_type.description}")

result = analysis_orchestrator.run_analysis_workflow(
    micro_data,
    transfer_data,
    AnalysisType.MOCK
)

if result['success']:
    print(f"Analysis type: {result['analysis_type'].display_name}")
    print("\nFormatted results:")
    print(result['formatted_results'])

Available analysis types:
  🧬 Episode-Based Cluster Detection: Advanced infection cluster detection using episode-based graph analysis
  🧪 Mock Analysis: Mock data analysis for testing and development purposes
Analysis type: 🧪 Mock Analysis

Formatted results:
🔬 DATA ANALYSIS RESULTS
📊 Total files processed: 2
✅ Successfully analyzed: 2

📈 SUMMARY STATISTICS
------------------------------
Total rows across all files: 1081
Average columns per file: 6.0

Most common numeric columns:
   • Unnamed: 0: appears in 2 file(s)

📋 DETAILED FILE ANALYSIS
------------------------------

📄 File: microbiology_test_micro.csv
   Rows: 384, Columns: 6
   Column sums:
      Unnamed: 0: 73,536.00
   Text columns: test_id, patient_id, collection_date, infection, result

📄 File: transfers_test_transfers.csv
   Rows: 697, Columns: 6
   Column sums:
      Unnamed: 0: 242,556.00
   Text columns: transfer_id, patient_id, ward_in_time, ward_out_time, location


In [19]:
# TEST WITH STANDARD ANALYSIS
result = analysis_orchestrator.run_analysis_workflow(
    micro_data,
    transfer_data,
    AnalysisType.STANDARD
)

# Print cluster results
print(result["raw_results"]["clusters"])

[{'cluster_id': 'cluster_cre_2025-03-22_3F2B', 'display_name': 'CRE — Wards Ward-2,Ward-3,Ward-8 — 2025-03-22 → 2025-05-04 — 11 patients', 'infection_type': 'CRE', 'patient_count': 11, 'episode_count': 11, 'locations': ['Ward-3', 'Ward-2', 'Ward-8'], 'date_range': {'start': '2025-03-22', 'end': '2025-05-04'}, 'duration_days': 43, 'risk_score': 12.636363636363637, 'contacts_count': 139}, {'cluster_id': 'cluster_cre_2025-06-13_DE38', 'display_name': 'CRE — Wards Ward-3 — 2025-06-13 → 2025-07-14 — 4 patients', 'infection_type': 'CRE', 'patient_count': 4, 'episode_count': 4, 'locations': ['Ward-3'], 'date_range': {'start': '2025-06-13', 'end': '2025-07-14'}, 'duration_days': 31, 'risk_score': 2.5, 'contacts_count': 10}, {'cluster_id': 'cluster_cre_2025-05-27_5C46', 'display_name': 'CRE — Wards ICU,Ward-6 — 2025-05-27 → 2025-07-09 — 8 patients', 'infection_type': 'CRE', 'patient_count': 8, 'episode_count': 8, 'locations': ['Ward-6', 'ICU'], 'date_range': {'start': '2025-05-27', 'end': '2025