In [19]:
import pandas as pd
import numpy as np
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler
import pickle
import os

# Selected features for anomaly detection
selected_features = ['Age', 'Dosage', 'Quantity']
medication_col = 'Medication_Name'

# Function to extract medication combinations
def get_medication_combination(data):
    """
    Extract a sorted tuple of medications for each transaction by grouping rows with the same Transaction_ID.
    Assumes a 'Transaction_ID' column exists in the dataset.
    """
    try:
        # Group by Transaction_ID and collect medications as a set for each transaction
        grouped = data.groupby('Transaction_ID')[medication_col].apply(
            lambda meds: tuple(sorted(set(meds.dropna().astype(str).str.strip())))
        )
        return grouped
    except Exception as e:
        print(f"Error processing medication combinations: {e}")
        return pd.Series(dtype=object)


# Train an SVM model for anomaly detection on pharmacy data
def train_svm_model(data, selected_features):
    scaler = StandardScaler()
    features = scaler.fit_transform(data[selected_features])
    model = OneClassSVM(kernel='rbf', nu=0.05, gamma='scale').fit(features)
    return model, scaler

# Test the SVM model and detect anomalies
def test_svm_model_with_combinations(model, scaler, test_data, output_file):
    # Transform selected features
    features = scaler.transform(test_data[selected_features])
    predictions = model.predict(features)
    anomalies = test_data[predictions == -1]
    
    # Get medication combinations grouped by Transaction_ID
    medication_combinations = get_medication_combination(test_data)
    combination_counts = medication_combinations.value_counts()
    rare_combinations = combination_counts[combination_counts < 3].index

    # Add a cause for anomalies based on rare medication combinations
    anomalies['Cause'] = anomalies['Transaction_ID'].map(
        lambda tid: "Abnormal medication combination" 
        if medication_combinations.get(tid, ()) in rare_combinations else "Feature anomaly"
    )

    # Save anomalies to CSV
    anomalies.to_csv(output_file, index=False)
    return anomalies


# Aggregation function: averaging models and scalers
def aggregate_models(models, scalers):
    # Averaging the SVM models - using a weighted average based on support vectors
    # Placeholder logic: A more sophisticated approach is required for actual model averaging
    # For simplicity, we'll average the scalers and use the first model as a placeholder
    
    # Averaging the scalers
    avg_mean = np.mean([scaler.mean_ for scaler in scalers], axis=0)
    avg_scale = np.mean([scaler.scale_ for scaler in scalers], axis=0)
    avg_scaler = StandardScaler()
    avg_scaler.mean_ = avg_mean
    avg_scaler.scale_ = avg_scale
    
    # Placeholder: return the first model (advanced averaging is needed for actual models)
    # A full model averaging might involve custom logic for combining support vectors
    avg_model = models[0]  # Simple placeholder, implement a better aggregation for OneClassSVM if needed
    
    return avg_model, avg_scaler

# Directory to save intermediate and final results
output_dir = "anomaly_results"
os.makedirs(output_dir, exist_ok=True)

# Train models for each pharmacy and save them
pharmacies = []
zones = []
cities = []
national = []

# Load and train for each pharmacy
for city in range(1, 4):  # 3 cities
    city_models = []
    city_scalers = []  # Collect scalers for city aggregation
    for zone in range(1, 4):  # 3 zones per city
        zone_models = []
        zone_scalers = []  # Collect scalers for zone aggregation
        for pharmacy in range(1, 5):  # 4 pharmacies per zone
            pharmacy_name = f"Ph{pharmacy:02d}_Z{zone:02d}_C{city:02d}"
            train_file = f"{pharmacy_name}_train.csv"
            train_data = pd.read_csv(train_file)
            
            # Train the SVM model
            model, scaler = train_svm_model(train_data, selected_features)
            pharmacy_model = {'model': model, 'scaler': scaler}
            zone_models.append(pharmacy_model)
            zone_scalers.append(scaler)  # Collect scaler
            print(f"Trained model for {pharmacy_name}")
            
            # Test the model on the TEST dataset
            test_data = pd.read_csv("TEST.csv")
            test_results_file = os.path.join(output_dir, f"{pharmacy_name}_test_results.csv")
            test_svm_model_with_combinations(model, scaler, test_data, test_results_file)
        
        # Aggregate zone models
        zone_model, zone_scaler = aggregate_models([m['model'] for m in zone_models], zone_scalers)
        zone_models = {'model': zone_model, 'scaler': zone_scaler}
        city_models.append(zone_models)
        city_scalers.append(zone_scaler)  # Collect scalers for city aggregation
        print(f"Aggregated model for Zone {zone} of City {city}")
        
        # Test zone model
        test_results_file = os.path.join(output_dir, f"Zone{zone:02d}_City{city:02d}_test_results.csv")
        test_svm_model_with_combinations(zone_model, zone_scaler, test_data, test_results_file)
    
    # Aggregate city models
    city_model, city_scaler = aggregate_models([m['model'] for m in city_models], city_scalers)
    cities.append({'model': city_model, 'scaler': city_scaler})
    print(f"Aggregated model for City {city}")
    
    # Test city model
    test_results_file = os.path.join(output_dir, f"City{city:02d}_test_results.csv")
    test_svm_model_with_combinations(city_model, city_scaler, test_data, test_results_file)

# Aggregate national model
national_model, national_scaler = aggregate_models([c['model'] for c in cities], [c['scaler'] for c in cities])
national = {'model': national_model, 'scaler': national_scaler}
print("Aggregated National model")

# Test national model
test_results_file = os.path.join(output_dir, f"National_test_results.csv")
test_svm_model_with_combinations(national_model, national_scaler, test_data, test_results_file)

print("All models trained, tested, and results saved.")


Trained model for Ph01_Z01_C01
Trained model for Ph02_Z01_C01
Trained model for Ph03_Z01_C01


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(


Trained model for Ph04_Z01_C01
Aggregated model for Zone 1 of City 1
Trained model for Ph01_Z02_C01


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(


Trained model for Ph02_Z02_C01
Trained model for Ph03_Z02_C01
Trained model for Ph04_Z02_C01


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a 

Aggregated model for Zone 2 of City 1
Trained model for Ph01_Z03_C01
Trained model for Ph02_Z03_C01


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(


Trained model for Ph03_Z03_C01
Trained model for Ph04_Z03_C01
Aggregated model for Zone 3 of City 1


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a 

Aggregated model for City 1
Trained model for Ph01_Z01_C02
Trained model for Ph02_Z01_C02


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(


Trained model for Ph03_Z01_C02
Trained model for Ph04_Z01_C02
Aggregated model for Zone 1 of City 2


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(


Trained model for Ph01_Z02_C02
Trained model for Ph02_Z02_C02
Trained model for Ph03_Z02_C02


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a 

Trained model for Ph04_Z02_C02
Aggregated model for Zone 2 of City 2
Trained model for Ph01_Z03_C02


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(


Trained model for Ph02_Z03_C02
Trained model for Ph03_Z03_C02
Trained model for Ph04_Z03_C02


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a 

Aggregated model for Zone 3 of City 2
Aggregated model for City 2
Trained model for Ph01_Z01_C03
Trained model for Ph02_Z01_C03
Trained model for Ph03_Z01_C03
Trained model for Ph04_Z01_C03


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a 

Aggregated model for Zone 1 of City 3
Trained model for Ph01_Z02_C03
Trained model for Ph02_Z02_C03


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(


Trained model for Ph03_Z02_C03
Trained model for Ph04_Z02_C03
Aggregated model for Zone 2 of City 3


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(


Trained model for Ph01_Z03_C03
Trained model for Ph02_Z03_C03
Trained model for Ph03_Z03_C03


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(


Trained model for Ph04_Z03_C03
Aggregated model for Zone 3 of City 3
Aggregated model for City 3


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(


Aggregated National model
All models trained, tested, and results saved.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anomalies['Cause'] = anomalies['Transaction_ID'].map(
