In [None]:
# 📦 Importing Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Plot settings
sns.set(style='whitegrid')

In [None]:
# 📂 Load the dataset
# Replace with your actual file path
df = pd.read_csv('your_file.csv')
df.head()

In [None]:
# 🛠 Feature Engineering
df['mr_rate'] = df['total_records_received'] / df['total_records_requested'].replace({0: pd.NA})
df['low_mr_rate'] = df['mr_rate'] < 0.4
df['cancellation_rate'] = df['cancelled_claims'] / df['total_records_requested'].replace({0: pd.NA})
df['received_not_cancelled'] = df['received_not_cancelled'].astype(bool)

In [None]:
# 📊 Provider-Level Summary
provider_summary = df.groupby('providertaxid').agg({
    'total_records_requested': 'sum',
    'total_records_received': 'sum',
    'cancelled_claims': 'sum',
    'received_not_cancelled': 'sum',
    'add_doc_request_flag': 'sum',
    'add_doc_received_flag': 'sum',
    'number_of_findings': 'sum',
    'no_findings': 'sum'
})

provider_summary['mr_rate'] = provider_summary['total_records_received'] / provider_summary['total_records_requested']
provider_summary['cancellation_rate'] = provider_summary['cancelled_claims'] / provider_summary['total_records_requested']
provider_summary.head()

In [None]:
# 🧮 MR Rate Tiers
df['mr_rate_bin'] = pd.cut(df['mr_rate'], bins=[0, 0.4, 0.7, 1.0], labels=['Low', 'Medium', 'High'])
pd.crosstab(df['mr_rate_bin'], df['received_not_cancelled'])

In [None]:
# 📉 Distribution of MR Rate
sns.histplot(df['mr_rate'].dropna(), bins=30)
plt.title('Distribution of MR Rate')
plt.show()

In [None]:
# 📊 MR Rate vs Cancellation Rate
sns.scatterplot(x='mr_rate', y='cancellation_rate', data=provider_summary.reset_index())
plt.title('MR Rate vs Cancellation Rate by Provider')
plt.show()

In [None]:
# 📋 Category-Level Summary
cat_summary = df.groupby('sel_category').agg({
    'total_records_requested': 'sum',
    'total_records_received': 'sum',
    'cancelled_claims': 'sum',
    'number_of_findings': 'sum',
    'no_findings': 'sum'
})
cat_summary['mr_rate'] = cat_summary['total_records_received'] / cat_summary['total_records_requested']
cat_summary['cancellation_rate'] = cat_summary['cancelled_claims'] / cat_summary['total_records_requested']
cat_summary.sort_values(by='mr_rate')

In [None]:
# 📌 Pivot Table of MR Rate by Provider and Category
pivot = pd.pivot_table(df, index='providertaxid', columns='sel_category',
                       values='mr_rate', aggfunc='mean')
pivot.head()

In [None]:
# 🔍 Providers with Low MR Rate by Category
low_rate_threshold = 0.4  # Adjustable threshold
low_mr_mask = df['mr_rate'] < low_rate_threshold

low_mr_ct = df[low_mr_mask].groupby(['providertaxid', 'sel_category']).agg({
    'total_records_requested': 'sum',
    'total_records_received': 'sum',
    'cancelled_claims': 'sum',
    'number_of_findings': 'sum',
    'no_findings': 'sum'
}).reset_index()
low_mr_ct.sort_values(by=['providertaxid', 'sel_category'], inplace=True)
low_mr_ct