In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer



In [2]:
df = pd.read_csv(r"C:\Users\Luis Ysturiz\Downloads\Machine Learning model\Phase 2\One Hot Columns(CPT, REV, SEX) (1).csv")


In [3]:
df.drop('CaseID', axis=1, inplace=True) # type: ignore

In [4]:
df

Unnamed: 0,CPTCodes,RevenueCodes,PatientGender,ClaimReturned
0,11102,,M,1
1,"96127, 99396",,F,1
2,"81003, 90471, 90714, 99396",,M,1
3,"90460, 90461, 90647, 90670, 90723, 99391",,M,1
4,"80050, 82248, 83010, 84165",,F,1
...,...,...,...,...
12192,88374,0,F,0
12193,88374,0,F,0
12194,70480,0,F,0
12195,88305,0,M,0


In [5]:
df['PatientGender'] = df['PatientGender'].str.lower().str.strip()

In [6]:
df.head(100)

Unnamed: 0,CPTCodes,RevenueCodes,PatientGender,ClaimReturned
0,11102,,m,1
1,"96127, 99396",,f,1
2,"81003, 90471, 90714, 99396",,m,1
3,"90460, 90461, 90647, 90670, 90723, 99391",,m,1
4,"80050, 82248, 83010, 84165",,f,1
...,...,...,...,...
95,"87481, 87486, 87491, 87541, 87581, 87591, 8763...",,f,1
96,"J0180, J3490, S9357",,m,1
97,"J0180, J3490, S9357",,m,1
98,"93005, J2270, J7030","0636, 0730",f,1


In [7]:
df['CPTCodeList'] = df['CPTCodes'].dropna().apply(lambda x: x.split(", "))
df['RevCodeList'] = df['RevenueCodes'].dropna().apply(lambda x: x.split(", "))

In [8]:
mlb_cpt = MultiLabelBinarizer()
mlb_rev = MultiLabelBinarizer()

In [9]:
one_hot_cpt_codes = mlb_cpt.fit_transform(df['CPTCodeList'].dropna())
one_hot_rev_codes = mlb_rev.fit_transform(df['RevCodeList'].dropna())

In [10]:
one_hot_cpt_df = pd.DataFrame(one_hot_cpt_codes, columns=mlb_cpt.classes_, dtype=float)
one_hot_rev_df = pd.DataFrame(one_hot_rev_codes, columns=mlb_rev.classes_, dtype=float)

In [11]:
df = df.join(one_hot_cpt_df, how='left', rsuffix='_CPT').join(one_hot_rev_df,how='left',rsuffix='_REV').fillna(0)

In [12]:
df.drop(['CPTCodes','CPTCodeList','RevenueCodes', 'RevCodeList'], axis=1, inplace=True)

In [13]:
gender_dummies = pd.get_dummies(df['PatientGender'], prefix='Gender', dtype=float)
df.drop(['PatientGender'], axis=1, inplace=True)
df = pd.concat([df, gender_dummies], axis=1)

In [14]:
try:
    X = torch.tensor(df.drop('ClaimReturned', axis=1).values, dtype=torch.float32)
    y = torch.tensor(df['ClaimReturned'].values, dtype=torch.float32)
except TypeError as e:
    print(e)
    for column in df.columns:
        try:
            torch.tensor(df[column].values, dtype=torch.float32)
        except TypeError as te:
            print(f'Error converting {column}: {te}')

In [15]:
X_train, X_test, y_train,y_test = train_test_split(X,y, test_size=0.2, random_state = 42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

In [16]:
class Net(nn.Module):
    def __init__(self, num_features):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(num_features, 50)
        self.dropout1 = nn.Dropout(0)
        self.fc2 = nn.Linear(50, 20)
        self.dropout2 = nn.Dropout(0)
        self.fc3 = nn.Linear(20, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout1(x)
        x = torch.relu(self.fc2(x))
        x = self.dropout2(x)
        x = self.sigmoid(self.fc3(x))
        return x

In [17]:
model = Net(num_features=X.shape[1])

In [18]:
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.001)

In [19]:
for epoch in range(1000):
    model.train()
    outputs_train = model(X_train)
    loss_train = criterion(outputs_train.squeeze(), y_train)

    optimizer.zero_grad()
    loss_train.backward()
    optimizer.step()

    model.eval()
    with torch.no_grad():
        outputs_val = model(X_val)
        loss_val = criterion(outputs_val.squeeze(), y_val)

    if(epoch+1) % 100 == 0:
        print(f'Epoch[{epoch+1}/1000], Training Loss: {loss_train.item():.4f}, Validation Loss: {loss_val.item():.4f}')

Epoch[100/1000], Training Loss: 0.2726, Validation Loss: 0.2912
Epoch[200/1000], Training Loss: 0.1166, Validation Loss: 0.1553
Epoch[300/1000], Training Loss: 0.1025, Validation Loss: 0.1464
Epoch[400/1000], Training Loss: 0.0962, Validation Loss: 0.1417
Epoch[500/1000], Training Loss: 0.0922, Validation Loss: 0.1381
Epoch[600/1000], Training Loss: 0.0894, Validation Loss: 0.1355
Epoch[700/1000], Training Loss: 0.0873, Validation Loss: 0.1335
Epoch[800/1000], Training Loss: 0.0854, Validation Loss: 0.1316
Epoch[900/1000], Training Loss: 0.0835, Validation Loss: 0.1302
Epoch[1000/1000], Training Loss: 0.0819, Validation Loss: 0.1291


In [20]:

model.eval()
with torch.no_grad():
    predicted_test = model(X_test) > 0.5
    accuracy = (predicted_test.squeeze().float() == y_test).float().mean()
    print(f'Test Accuracy: {accuracy:.4f}')

    joblib.dump(mlb_cpt, 'mlb_cpt.jotlib')
    joblib.dump(mlb_rev, 'mlb_rev.joblib')
    torch.save(model.state_dict(), 'trained model.pth')



Test Accuracy: 0.9430
