# Analysis Revenue Reports

In [881]:
import pandas as pd
import numpy as np
import os
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings(action='once')

## Read in CSV and take Sample 

In [826]:
df = pd.read_excel(r'./Section_RVU/RVUdataset.xlsx')
df.head(10)

Unnamed: 0,State,Patient Number,Cpt Code,Cpt Desc,Cpt Status Code,Provider Number,Provider Specialty,Place Of Service Number,Place of Service Desc,F/NF,...,MP RVU,TRVU,wRVU - GPCI,PE RVU - GPCI,MP RVU - GPCI,TRVU - GPCI,Chg,Pmt,Adj,Net
0,Alaska,AL83985207,3077F,Syst bp >/= 140 mm hg,Not Valid for Medicare,Provider 1073,Internal Medicine,11,Office,NF,...,,,,,,,0.0,0.0,0.0,0.0
1,Alaska,AL83985207,3077F,Syst bp >/= 140 mm hg,Not Valid for Medicare,Provider 1073,Internal Medicine,11,Office,NF,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Alaska,AL83985207,3077F,Syst bp >/= 140 mm hg,Not Valid for Medicare,Provider 1073,Internal Medicine,11,Office,NF,...,,,,,,,0.0,0.0,0.0,0.0
3,Alaska,AL83985207,3077F,Syst bp >/= 140 mm hg,Not Valid for Medicare,Provider 1073,Internal Medicine,11,Office,NF,...,,,,,,,0.0,0.0,0.0,0.0
4,Alaska,AL83985207,3079F,Diast bp 80-89 mm hg,Not Valid for Medicare,Provider 1073,Internal Medicine,11,Office,NF,...,,,,,,,0.0,0.0,0.0,0.0
5,Alaska,AL83985207,3079F,Diast bp 80-89 mm hg,Not Valid for Medicare,Provider 1073,Internal Medicine,11,Office,NF,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Alaska,AL83985207,3079F,Diast bp 80-89 mm hg,Not Valid for Medicare,Provider 1073,Internal Medicine,11,Office,NF,...,,,,,,,0.0,0.0,0.0,0.0
7,Alaska,AL83985207,3079F,Diast bp 80-89 mm hg,Not Valid for Medicare,Provider 1073,Internal Medicine,11,Office,NF,...,,,,,,,0.0,0.0,0.0,0.0
8,Alaska,AL83985207,96365,Ther/proph/diag iv inf init,Active Code,Provider 1073,Internal Medicine,11,Office,NF,...,,,,,,,0.0,-126.042589,0.0,-126.042589
9,Alaska,AL83985207,96365,Ther/proph/diag iv inf init,Active Code,Provider 1073,Internal Medicine,11,Office,NF,...,,,,,,,0.0,0.0,-23.499272,-23.499272


In [827]:
df.keys()

Index(['State', 'Patient Number', 'Cpt Code', 'Cpt Desc', 'Cpt Status Code',
       'Provider Number', 'Provider Specialty', 'Place Of Service Number',
       'Place of Service Desc', 'F/NF', 'Payer Name', 'Transactions',
       'CPT Modifier All', 'Units', 'Units w/Charge', 'Unique Patients',
       'wRVU', 'PE RVU', 'MP RVU', 'TRVU', 'wRVU - GPCI', 'PE RVU - GPCI',
       'MP RVU - GPCI', 'TRVU - GPCI', 'Chg', 'Pmt', 'Adj', 'Net'],
      dtype='object')

## COLUMN MANIPULATION

### 1) Create New Dataset

In [828]:
health = df[['State', 'Patient Number', 'Cpt Code', 'Cpt Desc', 'Cpt Status Code',
       'Provider Number', 'Provider Specialty', 
       'Place of Service Desc',  'Payer Name', 'Transactions',
       'Units', 'Units w/Charge', 'Unique Patients','Chg', 'Pmt', 'Adj', 'Net']]
health.head()

Unnamed: 0,State,Patient Number,Cpt Code,Cpt Desc,Cpt Status Code,Provider Number,Provider Specialty,Place of Service Desc,Payer Name,Transactions,Units,Units w/Charge,Unique Patients,Chg,Pmt,Adj,Net
0,Alaska,AL83985207,3077F,Syst bp >/= 140 mm hg,Not Valid for Medicare,Provider 1073,Internal Medicine,Office,Commercial,Insurance Payment,0,0,1,0.0,0.0,0.0,0.0
1,Alaska,AL83985207,3077F,Syst bp >/= 140 mm hg,Not Valid for Medicare,Provider 1073,Internal Medicine,Office,Commercial,Blood pressure,1,0,0,0.0,0.0,0.0,0.0
2,Alaska,AL83985207,3077F,Syst bp >/= 140 mm hg,Not Valid for Medicare,Provider 1073,Internal Medicine,Office,Commercial,Bill,0,0,0,0.0,0.0,0.0,0.0
3,Alaska,AL83985207,3077F,Syst bp >/= 140 mm hg,Not Valid for Medicare,Provider 1073,Internal Medicine,Office,Commercial,Invoice - Transfer,0,0,0,0.0,0.0,0.0,0.0
4,Alaska,AL83985207,3079F,Diast bp 80-89 mm hg,Not Valid for Medicare,Provider 1073,Internal Medicine,Office,Commercial,Insurance Payment,0,0,0,0.0,0.0,0.0,0.0


### 2) Rename Columns

In [829]:
health.columns

Index(['State', 'Patient Number', 'Cpt Code', 'Cpt Desc', 'Cpt Status Code',
       'Provider Number', 'Provider Specialty', 'Place of Service Desc',
       'Payer Name', 'Transactions', 'Units', 'Units w/Charge',
       'Unique Patients', 'Chg', 'Pmt', 'Adj', 'Net'],
      dtype='object')

In [830]:
columns = ['State', 'PatientNumber', 'CptCode', 'CptDesc', 'CptStatus','ProviderNumber', 'ProviderSpecialty', 'PlaceofService',
           'PayerName', 'Transactions', 'Units', 'UnitsCharge','UniquePatients', 'Charge', 'Payment', 'Adjustment', 'Net']
health.columns = columns
health.head()   

Unnamed: 0,State,PatientNumber,CptCode,CptDesc,CptStatus,ProviderNumber,ProviderSpecialty,PlaceofService,PayerName,Transactions,Units,UnitsCharge,UniquePatients,Charge,Payment,Adjustment,Net
0,Alaska,AL83985207,3077F,Syst bp >/= 140 mm hg,Not Valid for Medicare,Provider 1073,Internal Medicine,Office,Commercial,Insurance Payment,0,0,1,0.0,0.0,0.0,0.0
1,Alaska,AL83985207,3077F,Syst bp >/= 140 mm hg,Not Valid for Medicare,Provider 1073,Internal Medicine,Office,Commercial,Blood pressure,1,0,0,0.0,0.0,0.0,0.0
2,Alaska,AL83985207,3077F,Syst bp >/= 140 mm hg,Not Valid for Medicare,Provider 1073,Internal Medicine,Office,Commercial,Bill,0,0,0,0.0,0.0,0.0,0.0
3,Alaska,AL83985207,3077F,Syst bp >/= 140 mm hg,Not Valid for Medicare,Provider 1073,Internal Medicine,Office,Commercial,Invoice - Transfer,0,0,0,0.0,0.0,0.0,0.0
4,Alaska,AL83985207,3079F,Diast bp 80-89 mm hg,Not Valid for Medicare,Provider 1073,Internal Medicine,Office,Commercial,Insurance Payment,0,0,0,0.0,0.0,0.0,0.0


## Data Cleaninsing

In [831]:
#health['PatientNumber'].loc[AL83985207] return error

In [832]:
### Identify DataFrame Data Types
health.dtypes

State                 object
PatientNumber         object
CptCode               object
CptDesc               object
CptStatus             object
ProviderNumber        object
ProviderSpecialty     object
PlaceofService        object
PayerName             object
Transactions          object
Units                  int64
UnitsCharge            int64
UniquePatients         int64
Charge               float64
Payment              float64
Adjustment           float64
Net                  float64
dtype: object

In [833]:
### Assess data quality identifyinng the number of rows##
health.count()

State                101880
PatientNumber        101880
CptCode              101880
CptDesc              101880
CptStatus            101880
ProviderNumber       101880
ProviderSpecialty    101880
PlaceofService       101880
PayerName            101880
Transactions         101880
Units                101880
UnitsCharge          101880
UniquePatients       101880
Charge               101880
Payment              101880
Adjustment           101880
Net                  101880
dtype: int64

In [834]:
len(health)

101880

In [835]:
health.shape

(101880, 17)

In [836]:
health.size

1731960

<H4> Confirmed </H4>
<p>I have confrimmed that the dataset has 101880 rows and 17 columns of data</p>

### Assess data quality by identifying the number of times a value occurs

In [837]:
health.head(5)

Unnamed: 0,State,PatientNumber,CptCode,CptDesc,CptStatus,ProviderNumber,ProviderSpecialty,PlaceofService,PayerName,Transactions,Units,UnitsCharge,UniquePatients,Charge,Payment,Adjustment,Net
0,Alaska,AL83985207,3077F,Syst bp >/= 140 mm hg,Not Valid for Medicare,Provider 1073,Internal Medicine,Office,Commercial,Insurance Payment,0,0,1,0.0,0.0,0.0,0.0
1,Alaska,AL83985207,3077F,Syst bp >/= 140 mm hg,Not Valid for Medicare,Provider 1073,Internal Medicine,Office,Commercial,Blood pressure,1,0,0,0.0,0.0,0.0,0.0
2,Alaska,AL83985207,3077F,Syst bp >/= 140 mm hg,Not Valid for Medicare,Provider 1073,Internal Medicine,Office,Commercial,Bill,0,0,0,0.0,0.0,0.0,0.0
3,Alaska,AL83985207,3077F,Syst bp >/= 140 mm hg,Not Valid for Medicare,Provider 1073,Internal Medicine,Office,Commercial,Invoice - Transfer,0,0,0,0.0,0.0,0.0,0.0
4,Alaska,AL83985207,3079F,Diast bp 80-89 mm hg,Not Valid for Medicare,Provider 1073,Internal Medicine,Office,Commercial,Insurance Payment,0,0,0,0.0,0.0,0.0,0.0


### Assess data quality by checking for nulls

In [838]:
health.isnull()

Unnamed: 0,State,PatientNumber,CptCode,CptDesc,CptStatus,ProviderNumber,ProviderSpecialty,PlaceofService,PayerName,Transactions,Units,UnitsCharge,UniquePatients,Charge,Payment,Adjustment,Net
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
101875,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
101876,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
101877,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
101878,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


### Assess data quality by determining the percentage of nulls

In [839]:
#Determine percentage of nulls
health.isnull().mean()*100 

State                0.0
PatientNumber        0.0
CptCode              0.0
CptDesc              0.0
CptStatus            0.0
ProviderNumber       0.0
ProviderSpecialty    0.0
PlaceofService       0.0
PayerName            0.0
Transactions         0.0
Units                0.0
UnitsCharge          0.0
UniquePatients       0.0
Charge               0.0
Payment              0.0
Adjustment           0.0
Net                  0.0
dtype: float64

### Assess data quality by determining the percentage of nulls

In [840]:
health.isnull().sum()

State                0
PatientNumber        0
CptCode              0
CptDesc              0
CptStatus            0
ProviderNumber       0
ProviderSpecialty    0
PlaceofService       0
PayerName            0
Transactions         0
Units                0
UnitsCharge          0
UniquePatients       0
Charge               0
Payment              0
Adjustment           0
Net                  0
dtype: int64

# Summary
- There is no null values although I expect 0 in the dataset
- Will not check for duplication because each columns has many to many relationship
- This dataset is fairly clean dataset

## Summary Statistics

In [842]:
#health[['Charge','Payment', 'Adjustment', 'Net']] = health[['Charge','Payment', 'Adjustment', 'Net']].abs()

In [843]:
health.describe()

Unnamed: 0,Units,UnitsCharge,UniquePatients,Charge,Payment,Adjustment,Net
count,101880.0,101880.0,101880.0,101880.0,101880.0,101880.0,101880.0
mean,0.227915,0.140852,0.080153,24.219971,10.648543,11.471705,46.340219
std,1.895585,0.347871,0.271531,110.222722,45.862106,68.631274,132.841269
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,1.386863
75%,0.0,0.0,0.0,0.0,0.0,0.837469,47.15
max,200.0,1.0,1.0,5968.740763,2519.271443,5486.994813,5968.740763


In [844]:
health.head(5)

Unnamed: 0,State,PatientNumber,CptCode,CptDesc,CptStatus,ProviderNumber,ProviderSpecialty,PlaceofService,PayerName,Transactions,Units,UnitsCharge,UniquePatients,Charge,Payment,Adjustment,Net
0,Alaska,AL83985207,3077F,Syst bp >/= 140 mm hg,Not Valid for Medicare,Provider 1073,Internal Medicine,Office,Commercial,Insurance Payment,0,0,1,0.0,0.0,0.0,0.0
1,Alaska,AL83985207,3077F,Syst bp >/= 140 mm hg,Not Valid for Medicare,Provider 1073,Internal Medicine,Office,Commercial,Blood pressure,1,0,0,0.0,0.0,0.0,0.0
2,Alaska,AL83985207,3077F,Syst bp >/= 140 mm hg,Not Valid for Medicare,Provider 1073,Internal Medicine,Office,Commercial,Bill,0,0,0,0.0,0.0,0.0,0.0
3,Alaska,AL83985207,3077F,Syst bp >/= 140 mm hg,Not Valid for Medicare,Provider 1073,Internal Medicine,Office,Commercial,Invoice - Transfer,0,0,0,0.0,0.0,0.0,0.0
4,Alaska,AL83985207,3079F,Diast bp 80-89 mm hg,Not Valid for Medicare,Provider 1073,Internal Medicine,Office,Commercial,Insurance Payment,0,0,0,0.0,0.0,0.0,0.0


In [845]:
# numerical feature
# centrality measures
print('Mean Net : {0}'.format(health.Net.mean())) # mean
print('Median Net : {0}'.format(health.Net.median())) # median
print('Mean Adjustment : {0}'.format(health.Adjustment.mean())) # mean
print('Median Adjustment : {0}'.format(health.Adjustment.median())) # median
print('Mean Payment : {0}'.format(health.Payment.mean())) # mean
print('Median Payment : {0}'.format(health.Payment.median())) # median
print('Mean Charge : {0}'.format(health.Charge.mean())) # mean
print('Median Charge : {0}'.format(health.Charge.median())) # median

Mean Net : 46.34021853674322
Median Net : 1.3868625187131607
Mean Adjustment : 11.471704504752251
Median Adjustment : 0.0
Mean Payment : 10.648542741890516
Median Payment : 0.0
Mean Charge : 24.219971290100453
Median Charge : 0.0


### Analysis Approach for Quantative feature
<p>AThe median for most of the above features is 0. This is not a good starting point to measure and explore the features and make decisions on. I better approach is to determine what would return a more realistic view of the dataset.  The dataset is predicated on Cpt Codes and some Cpt Codes value is 0 in fact, every visit (encounter) will have at least one Cpt Code representation value 0 or no charge. Thus, your standard deviations will be way off, and we won't know how spread out each feature is. For example, I have decided to represent the minimum to be greater than 1. This won't skew the data much and I believe it will give a better exploring of the dataset. However, I am sure they are a more robust approach, but I believe this be a more complete  
</p>

In [846]:
print('Min Net : {0}'.format(health.Adjustment[health.Adjustment> .9].min()))

Min Net : 0.9012689610142953


In [847]:
health_net = health[health.Net > 1]
# dispersion measures
print('Min Net : {0}'.format(health_net.Net.min())) # minimum
print('Max Net : {0}'.format(health_net.Net.max())) # maximum
print('Net range : {0}'.format(health_net.Net.max()  - health.Net.min())) # range
print('25 percentile : {0}'.format(health_net.Net.quantile(.25))) # 25 percentile
print('50 percentile : {0}'.format(health_net.Net.quantile(.5))) # 50 percentile
print('75 percentile : {0}'.format(health_net.Net.quantile(.75))) # 75 percentile
print('Variance Net : {0}'.format(health_net.Net.var())) # variance
print('Standard deviation Net : {0}'.format(health_net.Net.std())) # standard deviation

Min Net : 1.0005130753454852
Max Net : 5968.740763074681
Net range : 5968.740763074681
25 percentile : 10.76320991410182
50 percentile : 41.61993170379222
75 percentile : 107.11046601935628
Variance Net : 30047.557691796694
Standard deviation Net : 173.34231362191025


In [824]:
#health_net['num'] = range(1, len(health_net) + 1)
#health_net.set_index('num');

In [848]:
health_adj = health[health.Adjustment > 1]
print('Min Adjustment : {0}'.format(health_adj.Adjustment.min())) # minimum
print('Max Adjustment : {0}'.format(health_adj.Adjustment.max())) # maximum
print('Adjustment range : {0}'.format(health_adj.Adjustment.max()  - health.Net.min())) # range
print('25 percentile : {0}'.format(health_adj.Adjustment.quantile(.25))) # 25 percentile
print('50 percentile : {0}'.format(health_adj.Adjustment.quantile(.5))) # 50 percentile
print('75 percentile : {0}'.format(health_adj.Adjustment.quantile(.75))) # 75 percentile
print('Variance Net : {0}'.format(health_adj.Adjustment.var())) # variance
print('Standard deviation Net : {0}'.format(health_adj.Adjustment.std())) # standard deviation

Min Adjustment : 1.0005130753454852
Max Adjustment : 5486.994813104587
Adjustment range : 5486.994813104587
25 percentile : 2.487283434677882
50 percentile : 13.625536502434631
75 percentile : 47.66648175464857
Variance Net : 17896.23865695193
Standard deviation Net : 133.77682406512696


In [850]:
health_chg = health[health.Charge > 1]
print('Min Adjustment : {0}'.format(health_chg.Charge.min())) # minimum
print('Max Adjustment : {0}'.format(health_chg.Charge.max())) # maximum
print('Adjustment range : {0}'.format(health_chg.Charge.max()  - health.Net.min())) # range
print('25 percentile : {0}'.format(health_chg.Charge.quantile(.25))) # 25 percentile
print('50 percentile : {0}'.format(health_chg.Charge.quantile(.5))) # 50 percentile
print('75 percentile : {0}'.format(health_chg.Charge.quantile(.75))) # 75 percentile
print('Variance Net : {0}'.format(health_chg.Charge.var())) # variance
print('Standard deviation Net : {0}'.format(health_chg.Charge.std())) # standard deviation

Min Adjustment : 2.0
Max Adjustment : 5968.740763074681
Adjustment range : 5968.740763074681
25 percentile : 51.53987083817424
50 percentile : 131.77430205606277
75 percentile : 203.73542308840038
Variance Net : 60898.06667172683
Standard deviation Net : 246.77533643321578


In [853]:
health_pay = health[health.Payment > 1]
print('Min Adjustment : {0}'.format(health_pay.Payment.min())) # minimum
print('Max Adjustment : {0}'.format(health_pay.Payment.max())) # maximum
print('Adjustment range : {0}'.format(health_pay.Payment.max()  - health.Net.min())) # range
print('25 percentile : {0}'.format(health_pay.Payment.quantile(.25))) # 25 percentile
print('50 percentile : {0}'.format(health_pay.Payment.quantile(.5))) # 50 percentile
print('75 percentile : {0}'.format(health_pay.Payment.quantile(.75))) # 75 percentile
print('Variance Net : {0}'.format(health_pay.Payment.var())) # variance
print('Standard deviation Net : {0}'.format(health_pay.Payment.std())) # standard deviation

Min Adjustment : 1.0382395661649138
Max Adjustment : 2519.2714429633247
Adjustment range : 2519.2714429633247
25 percentile : 19.0
50 percentile : 54.33768493905726
75 percentile : 102.46272036567312
Variance Net : 10009.656990593452
Standard deviation Net : 100.04827330140911


# DataFrame Basic Analysis (Exploring)

**Anlysis the following features: State, PatientNumber, CptCode, CptDesc, CptStatus, ProviderNumber, ProviderSpecialty, PlaceofService, PayerName,Transactions the following questions:**
* Find Max, Min, Mean
* What is the largest for the first 10 rows
* What is the smallest for the first 10 rows


## 1) Exploring Patient Features

In [677]:
#### Show only two states in this column Alaska and Alabama. I didnt want the dataset to be too complicated
health['State'].value_counts()

Alaska     50940
Alabama    50940
Name: State, dtype: int64

### Exploring Patient Number Columns

In [678]:
health['PatientNumber'].nunique()

8166

In [679]:
health['PatientNumber'].describe()

count         101880
unique          8166
top       AL18475636
freq              75
Name: PatientNumber, dtype: object

In [680]:
health['PatientNumber'].value_counts().nlargest(5).to_frame()

Unnamed: 0,PatientNumber
AL18475636,75
AB18475636,75
AB83670426,71
AL83670426,71
AB82795364,66


In [681]:
health['PatientNumber'].value_counts().nsmallest(3).to_frame()

Unnamed: 0,PatientNumber
AL47685140,1
AL78466586,1
AL93495788,1


In [682]:
health['PatientNumber'].value_counts().agg({'count','idxmax','max','min','idxmin','mean','median'}).to_frame()

Unnamed: 0,PatientNumber
median,12.0
max,75
idxmax,AL18475636
mean,12.47612
min,1
idxmin,AL47685140
count,8166


## Combining State and Patient Number Columns

In [23]:
df1 = health[['State','PatientNumber']]
df1 = df1.groupby(['State'])['PatientNumber'].value_counts().to_frame()
df1 = df1.rename(columns={'PatientNumber':'count'})
df1.reset_index(inplace=True)

In [24]:
df1.groupby('State')['PatientNumber','count'].agg({'max','min'})

  df1.groupby('State')['PatientNumber','count'].agg({'max','min'})


Unnamed: 0_level_0,PatientNumber,PatientNumber,count,count
Unnamed: 0_level_1,min,max,min,max
State,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Alabama,AB11114452,AB99970098,1,75
Alaska,AL11114452,AL99970098,1,75


# PatientNumber and State Columns Summary

**Anlysis the following features: State, PatientNumber following questions:**
* In the State columns they are two states Alabama and Alaska
* Each States has a count of 50940 row of Data
* In the Patient Number column there is 101880 total rows of data
## Hypothesis 
* They are 8166 row of data that may represent each patient encounter. Will need to confirm with Cpt Code Visit
* The maximum occurance of the patient number is 75. This could reprerent patient encounters. Will need to confirm with Cpt Code Visit 
* The minimum occurance of the patient number is 1. This could reprerent patient encounters. more than likely this is correct 

## 2) Exploring CPT Columns

In [25]:
health['CptCode'].nunique()

605

In [26]:
health['CptCode'].describe()

count     101880
unique       605
top        99214
freq        8898
Name: CptCode, dtype: object

In [27]:
health['CptCode'].value_counts().nlargest(10).to_frame()

Unnamed: 0,CptCode
99214,8898
99213,6586
3078F,6128
3074F,6004
93010,3682
3079F,3152
99232,2430
99233,2240
90471,2130
3075F,2094


In [28]:
health['CptCode'].value_counts().nsmallest(10).to_frame()

Unnamed: 0,CptCode
950147,2
950897,2
960063,2
950919,2
67042,4
72190,4
93270,4
3052F,4
99255,4
76140,4


In [29]:
health['CptCode'].value_counts().agg({'count','idxmax','max','min','idxmin','mean','median'}).to_frame()

Unnamed: 0,CptCode
median,24.0
max,8898.0
idxmax,99214.0
mean,168.396694
min,2.0
idxmin,950147.0
count,605.0


### Exploring CptDesc Column

In [30]:
health['CptDesc'].nunique()

535

In [31]:
health['CptDesc'].value_counts()

Office/outpatient visit est    16804
Diast bp <80 mm hg              6128
Syst bp lt 130 mm hg            6004
Subsequent hospital care        4940
Electrocardiogram report        3682
                               ...  
Mccd,maintenance rate              4
Sbrt management                    4
X-ray exam of pelvis               4
Inpatient consultation             4
Nasopharyngoscopy                  4
Name: CptDesc, Length: 535, dtype: int64

In [32]:
health['CptDesc'].describe()

count                          101880
unique                            535
top       Office/outpatient visit est
freq                            16804
Name: CptDesc, dtype: object

In [33]:
health['CptDesc'].value_counts().nlargest(20).to_frame()

Unnamed: 0,CptDesc
Office/outpatient visit est,16804
Diast bp <80 mm hg,6128
Syst bp lt 130 mm hg,6004
Subsequent hospital care,4940
Electrocardiogram report,3682
Diast bp 80-89 mm hg,3152
Emergency dept visit,3054
Immunization admin,2130
Syst bp ge 130 - 139mm hg,2094
Office/outpatient visit new,2094


In [34]:
health['CptDesc'].value_counts().nsmallest(10).to_frame()

Unnamed: 0,CptDesc
Vit for macular hole,4
Remote 30 day ecg rev/report,4
X-ray consultation,4
"Mccd,maintenance rate",4
Sbrt management,4
X-ray exam of pelvis,4
Inpatient consultation,4
Nasopharyngoscopy,4
Prostate ca screening; dre,6
Mr angiography head w/dye,6


In [35]:
health['CptDesc'].value_counts().agg({'count','idxmax','max','min','idxmin','mean','median'}).to_frame()

Unnamed: 0,CptDesc
median,24.0
max,16804
idxmax,Office/outpatient visit est
mean,190.429907
min,4
idxmin,Vit for macular hole
count,535


### Exploring CptStatus Column

In [36]:
health['CptStatus'].nunique()

11

In [37]:
health['CptStatus'].describe()

count          101880
unique             11
top       Active Code
freq            69698
Name: CptStatus, dtype: object

In [38]:
health['CptStatus'].value_counts().nlargest(10).to_frame()

Unnamed: 0,CptStatus
Active Code,69698
Not Valid for Medicare,20070
Statutory Exclusion,6890
Non-Covered Services,2150
Measurement Code,1056
Excluded from PFS,884
Part B,492
Bundled Code,406
Transaction,160
Restricted Coverage,38


In [39]:
health['CptStatus'].value_counts().agg({'count','idxmax','max','min','idxmin','mean','median'}).to_frame()

Unnamed: 0,CptStatus
median,884.0
max,69698
idxmax,Active Code
mean,9261.818182
min,36
idxmin,Injections
count,11


### Combine CPT Columns

In [882]:
cpt1 = health[['CptCode','CptDesc','CptStatus']]
cpt1 = cpt1.groupby('CptCode')['CptDesc','CptStatus'].value_counts().to_frame()
cpt1.reset_index(inplace=True)
cpt1.set_index('CptCode',inplace=True)
cpt1 = cpt1.rename(columns={0:'count'})
cpt1.sort_values('count', ascending=False).head(30)

  cpt1 = cpt1.groupby('CptCode')['CptDesc','CptStatus'].value_counts().to_frame()


Unnamed: 0_level_0,CptDesc,CptStatus,count
CptCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
99214,Office/outpatient visit est,Active Code,8898
99213,Office/outpatient visit est,Active Code,6586
3078F,Diast bp <80 mm hg,Not Valid for Medicare,6128
3074F,Syst bp lt 130 mm hg,Not Valid for Medicare,6004
93010,Electrocardiogram report,Active Code,3682
3079F,Diast bp 80-89 mm hg,Not Valid for Medicare,3152
99232,Subsequent hospital care,Active Code,2430
99233,Subsequent hospital care,Active Code,2240
90471,Immunization admin,Active Code,2130
3075F,Syst bp ge 130 - 139mm hg,Not Valid for Medicare,2094


## New Patient Office Visit (E/M
<p> They are much we can explore with Cpt Codes but lets start with the basics
 New Patient Office Visit (E/M) Services (CPT Codes 99201-99205) - Overview of Key Components
The key components of E/M including those services billed for New Patient Office Visit (E/M) Services

* History
* Examination
* Medical decision-making</p>

In [915]:
warnings.filterwarnings(action='ignore')

cpt2 = health[health['CptCode'].between('99201', '99205', inclusive=False)]
cpt2 = cpt2[['CptCode','CptDesc','CptStatus']]
cpt2 = cpt2.groupby('CptCode')['CptDesc','CptStatus'].value_counts().to_frame()
cpt2.reset_index(inplace=True)
cpt2.set_index('CptCode',inplace=True)
cpt2 = cpt2.rename(columns={0:'count'})
cpt2.sort_values('count', ascending=False)

Unnamed: 0_level_0,CptDesc,CptStatus,count
CptCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
99204,Office/outpatient visit new,Active Code,1018
99203,Office/outpatient visit new,Active Code,564
99202,Office/outpatient visit new,Active Code,82


## Establish Patient Visit


In [905]:
warnings.filterwarnings(action='ignore')

cpt3 = health[health['CptCode'].between('99211', '99215', inclusive=False)]
cpt3 = cpt3[['CptCode','CptDesc','CptStatus']]
cpt3 = cpt3.groupby('CptCode')['CptDesc','CptStatus'].value_counts().to_frame()
cpt3.reset_index(inplace=True)
cpt3.set_index('CptCode',inplace=True)
cpt3 = cpt3.rename(columns={0:'count'})
cpt3.sort_values('count', ascending=False)

Unnamed: 0_level_0,CptDesc,CptStatus,count
CptCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
99214,Office/outpatient visit est,Active Code,8898
99213,Office/outpatient visit est,Active Code,6586
99212,Office/outpatient visit est,Active Code,384


In [951]:
warnings.filterwarnings(action='ignore')

cpt4 = health[health['CptCode'].between('99211', '99215', inclusive=False)]
cpt4 = cpt4[['State','CptCode']]
cpt4 = cpt4.groupby(['State'])['CptCode'].value_counts().to_frame()
# cpt4.reset_index(inplace=True)
# #cpt4.set_index('CptCode','PatientNumber',inplace=True)
cpt4 = cpt4.rename(columns={0:'cptcount'})
# cpt4.sort_values('cptcount', ascending=False).head(10)
cpt4

Unnamed: 0_level_0,Unnamed: 1_level_0,CptCode
State,CptCode,Unnamed: 2_level_1
Alabama,99214,4449
Alabama,99213,3293
Alabama,99212,192
Alaska,99214,4449
Alaska,99213,3293
Alaska,99212,192


<p>Above show that count across state are the same. This very unrealistic however I am demonstarting my process</p>

## 3) Exploring Provider Columns

In [73]:
health['ProviderNumber'].describe()

count           101880
unique             855
top       Provider 713
freq               854
Name: ProviderNumber, dtype: object

In [74]:
health['ProviderNumber'].value_counts().nlargest(20).to_frame()

Unnamed: 0,ProviderNumber
Provider 713,854
Provider 372,788
Provider 230,760
Provider 244,714
Provider 13,666
Provider 245,632
Provider 142,628
Provider 572,626
Provider 50,622
Provider 194,598


In [44]:
health['ProviderNumber'].value_counts().agg({'count','idxmax','max','min','idxmin','mean','median'}).to_frame()

Unnamed: 0,ProviderNumber
median,72.0
max,854
idxmax,Provider 713
mean,119.157895
min,2
idxmin,Provider 786
count,855


## Exploring Provider Specialty Column

In [45]:
health['ProviderSpecialty'].nunique()

35

In [46]:
health['ProviderSpecialty'].describe()

count              101880
unique                 35
top       Family Medicine
freq                18666
Name: ProviderSpecialty, dtype: object

In [47]:
health['ProviderSpecialty'].value_counts().nlargest(10)

Family Medicine        18666
Radiology              17366
Cardiology             17052
Internal Medicine      11686
Pediatrics              4502
Nurse Practitioner      4364
OB/GYN                  3486
Emergency Medicine      3460
Surgery                 3432
Physician Assistant     2290
Name: ProviderSpecialty, dtype: int64

In [48]:
health['ProviderSpecialty'].value_counts().nsmallest(10)

Optometrist             10
Otorhinolaryngology     74
Allergy                 88
Hospice                104
Sleep Medicine         114
Ophthalmology          174
Podiatry               314
Urology                324
Endocrinology          352
Psychiatry             354
Name: ProviderSpecialty, dtype: int64

In [49]:
health['ProviderSpecialty'].value_counts().agg({'count','idxmax','max','min','idxmin','mean','median'}).to_frame()

Unnamed: 0,ProviderSpecialty
median,720.0
max,18666
idxmax,Family Medicine
mean,2910.857143
min,10
idxmin,Optometrist
count,35


### Combine Provider Columns

In [78]:
provider1 = health[['ProviderNumber','ProviderSpecialty']]
provider1 = provider1.groupby('ProviderNumber')['ProviderNumber','ProviderSpecialty'].value_counts().to_frame()
provider1.reset_index(inplace=True)
provider1.set_index('ProviderSpecialty',inplace=True)
provider1 = provider1.rename(columns={0:'count'})
provider1.sort_values('count', ascending=False).head(30)

  provider1 = provider1.groupby('ProviderNumber')['ProviderNumber','ProviderSpecialty'].value_counts().to_frame()


Unnamed: 0_level_0,ProviderNumber,count
ProviderSpecialty,Unnamed: 1_level_1,Unnamed: 2_level_1
Cardiology,Provider 713,854
Cardiology,Provider 372,788
Cardiology,Provider 230,760
Radiology,Provider 244,714
Cardiology,Provider 13,666
Radiology,Provider 245,632
Radiology,Provider 142,628
Family Medicine,Provider 572,626
Radiology,Provider 50,622
Cardiology,Provider 223,598


In [93]:
provider1.sort_index().head(50)

Unnamed: 0_level_0,ProviderNumber,count
ProviderSpecialty,Unnamed: 1_level_1,Unnamed: 2_level_1
Allergy,Provider 564,88
Cardiology,Provider 130,260
Cardiology,Provider 13,666
Cardiology,Provider 549,580
Cardiology,Provider 538,594
Cardiology,Provider 238,74
Cardiology,Provider 792,146
Cardiology,Provider 111,284
Cardiology,Provider 230,760
Cardiology,Provider 11,592


In [52]:
health['PlaceofService'].nunique()

9

In [53]:
health['PlaceofService'].describe()

count     101880
unique         9
top       Office
freq       45674
Name: PlaceofService, dtype: object

In [54]:
health['PlaceofService'].value_counts().nlargest(10)

Office                            45674
On Campus-Outpatient Hospital     23198
Inpatient hospital                14244
Off Campus-Outpatient Hospital     9652
Emergency room - hospital          8440
Skilled nursing facility            382
Nursing facility                    106
Home                                 96
Ambulatory surgical center           88
Name: PlaceofService, dtype: int64

In [55]:
health['PlaceofService'].value_counts().agg({'count','idxmax','max','min','idxmin','mean','median'}).to_frame()

Unnamed: 0,PlaceofService
median,8440.0
max,45674
idxmax,Office
mean,11320.0
min,88
idxmin,Ambulatory surgical center
count,9


In [56]:
health['PayerName'].nunique()

3

In [57]:
health['PayerName'].describe()

count       101880
unique           3
top       Medicare
freq         53714
Name: PayerName, dtype: object

In [58]:
health['PayerName'].value_counts().nlargest(10).to_frame()

Unnamed: 0,PayerName
Medicare,53714
Commercial,45474
Medicaid,2692


In [59]:
health['PayerName'].value_counts().agg({'count','idxmax','max','min','idxmin','mean','median'}).to_frame()

Unnamed: 0,PayerName
median,45474.0
max,53714
idxmax,Medicare
mean,33960.0
min,2692
idxmin,Medicaid
count,3


In [60]:
health['Transactions'].nunique()

105

In [61]:
health['Transactions'].describe()

count                 101880
unique                   105
top       Invoice - Transfer
freq                   20620
Name: Transactions, dtype: object

In [645]:
health['Transactions'].value_counts().nlargest(10).to_frame()

Unnamed: 0,Transactions
Invoice - Transfer,20620
Insurance Payment,20384
Contractual,13350
withholding,9862
Procedure,6870
Medicare 2% reduction,6770
Bill,5566
Blood pressure,3576
Denial,1324
Office Visit - 25 min,1264


In [646]:
health[health['Transactions'].str.contains('adjustment')]

Unnamed: 0,State,PatientNumber,CptCode,CptDesc,CptStatus,ProviderNumber,ProviderSpecialty,PlaceofService,PayerName,Transactions,Units,UnitsCharge,UniquePatients,Charge,Payment,Adjustment,Net
43,Alaska,AL37210458,96372,Ther/proph/diag inj sc/im,Active Code,Provider 980,OB/GYN,Office,Medicare,Medicare paid - Medicaid adjustment,0,0,0,0.0,0.0,-9.599826,-9.599826
66,Alaska,AL34062645,81025,Urine pregnancy test,Statutory Exclusion,Provider 609,Family Medicine,Office,Commercial,Medicare paid - Medicaid adjustment,0,0,0,0.0,0.0,-1.480000,-1.480000
73,Alaska,AL34062645,96372,Ther/proph/diag inj sc/im,Active Code,Provider 609,Family Medicine,Office,Commercial,Medicare paid - Medicaid adjustment,0,0,0,0.0,0.0,-8.288510,-8.288510
652,Alaska,AL82795364,71045,X-ray exam chest 1 view,Active Code,Provider 50,Radiology,Emergency room - hospital,Medicare,Medicare paid - Medicaid adjustment,0,0,0,0.0,0.0,-2.563391,-2.563391
665,Alaska,AL82795364,72125,Ct neck spine w/o dye,Active Code,Provider 1051,Radiology,Emergency room - hospital,Medicare,Medicare paid - Medicaid adjustment,0,0,0,0.0,0.0,-15.009309,-15.009309
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100869,Alabama,AB48922223,74018,X-ray exam abdomen 1 view,Active Code,Provider 259,Radiology,On Campus-Outpatient Hospital,Medicare,Medicare paid - Medicaid adjustment,0,0,0,0.0,0.0,-1.783775,-1.783775
101035,Alabama,AB75147964,99308,Nursing fac care subseq,Active Code,Provider 908,Physician Assistant,Skilled nursing facility,Medicare,Medicare paid - Medicaid adjustment,0,0,0,0.0,0.0,-11.167410,-11.167410
101398,Alabama,AB98482053,93280,Pm device progr eval dual,Active Code,Provider 713,Cardiology,Inpatient hospital,Medicare,Medicare paid - Medicaid adjustment,0,0,0,0.0,0.0,-7.514242,-7.514242
101410,Alabama,AB49639212,72148,Mri lumbar spine w/o dye,Active Code,Provider 658,Radiology,On Campus-Outpatient Hospital,Medicare,Medicare paid - Medicaid adjustment,0,0,0,0.0,0.0,-14.519649,-14.519649


In [621]:
health['Transactions'].value_counts().agg({'count','idxmax','max','min','idxmin','mean','median'}).to_frame()

Unnamed: 0,Transactions
median,74.0
max,20620
idxmax,Invoice - Transfer
mean,970.285714
min,2
idxmin,hearing test
count,105


In [615]:
#change the name of dataframe
rvu = health

In [616]:
# store the DataFrame in memory
%store rvu

Stored 'rvu' (DataFrame)


In [323]:
def calc(s):
    return s.sum() / s.sum()
health.groupby("ProviderSpecialty").agg({'Adjustment':'sum'})

Unnamed: 0_level_0,Adjustment
ProviderSpecialty,Unnamed: 1_level_1
Allergy,-522.995141
Cardiology,-308577.297701
Emergency Medicine,-73367.088309
Endocrinology,-1454.937518
Family Medicine,-73986.660405
Gastroenterology,-8293.050625
Geriatrics,-4437.122619
Hematology/Oncology,-6202.421797
Hospice,-1058.433614
Hospitalist,-21100.695652


## Export Write off file

In [651]:
df1 = pd.read_excel(r'./Section_Write_off/Write_Off.xlsx',sheet_name='Flat File')
df1.head()

Unnamed: 0,Post Date Month,Provider Name,Specialty,Department Name,Adjustment Name,Adjustments
0,201902,Provider 478,Surgery: Plastic and Reconstruction,Dept 839,Contractual,259428.208
1,201903,Provider 627,Cardiology: Interventional,Dept 233,Contractual,220272.832
2,201903,Provider 2349,Cardiology: Noninvasive,Dept 336,Contractual,218104.0746
3,201902,Provider 627,Cardiology: Interventional,Dept 233,Contractual,197405.628
4,201902,Provider 804,Surgery: Neurological,Dept 168,Contractual,189671.101


In [652]:
df1.keys()

Index(['Post Date Month', 'Provider Name', 'Specialty', 'Department Name',
       'Adjustment Name', 'Adjustments'],
      dtype='object')

In [653]:
writeoff = df1[['Provider Name', 'Specialty', 'Department Name',
       'Adjustment Name', 'Adjustments']]
writeoff.head()

Unnamed: 0,Provider Name,Specialty,Department Name,Adjustment Name,Adjustments
0,Provider 478,Surgery: Plastic and Reconstruction,Dept 839,Contractual,259428.208
1,Provider 627,Cardiology: Interventional,Dept 233,Contractual,220272.832
2,Provider 2349,Cardiology: Noninvasive,Dept 336,Contractual,218104.0746
3,Provider 627,Cardiology: Interventional,Dept 233,Contractual,197405.628
4,Provider 804,Surgery: Neurological,Dept 168,Contractual,189671.101


In [655]:
%store writeoff

Stored 'writeoff' (DataFrame)


# Export Denial file

In [957]:
df2 = pd.read_excel(r'./Section_Denials/Denials.xlsx',sheet_name='Flat Data')
df2.head()

Unnamed: 0,State,Patient Number,Payer Name,(Non)Facility,Medicare POS Number,Medicare POS Desc,Location Name,Cpt Code,CPT Modifier,CPT Desc,...,RARC,CARC,Denial Category,Charges,Denial Amount,Payments,Adjustments,Net Ar,Denial Count,Units
0,Utah,80894,Medicare,F,22,On Campus-Outpatient Hospital,Northridge Medical Center,77067,26,Scr mammo bi incl cad,...,,,,0.0,0.0,-38.184324,0.0,-38.184324,0,0
1,Utah,80894,Medicare,F,22,On Campus-Outpatient Hospital,Northridge Medical Center,77067,26,Scr mammo bi incl cad,...,,,,0.0,0.0,0.0,-53.976784,-53.976784,0,0
2,Utah,80894,Medicare,F,22,On Campus-Outpatient Hospital,Northridge Medical Center,77067,26,Scr mammo bi incl cad,...,,,,0.0,0.0,0.0,0.720459,0.720459,0,0
3,Utah,80894,Medicare,F,22,On Campus-Outpatient Hospital,Northridge Medical Center,77067,26,Scr mammo bi incl cad,...,,,,0.0,0.0,0.0,-0.778096,-0.778096,0,0
4,Utah,80894,Medicare,F,22,On Campus-Outpatient Hospital,Northridge Medical Center,77067,26,Scr mammo bi incl cad,...,,,,92.218745,0.0,0.0,0.0,92.218745,0,1


In [958]:
denial = df2

In [959]:
%store denial

Stored 'denial' (DataFrame)
