In [3]:
import pandas as pd
from fuzzywuzzy import fuzz
from collections import defaultdict
import re

# Load datasets from CSV files
df_new = pd.read_csv('new_data1.csv', low_memory=False)
historical_data = pd.read_csv('historical_data1.csv', low_memory=False)

# Preprocess historical_data
historical_data = historical_data[['FIRSTDIAGNOSIS', 'ReasonText']].dropna()

# Preprocess df_new
df_new = df_new[['AUTHENTICATION_TYPE', 'POLICYSTATUS', 'FIRSTDIAGNOSIS', 'ReasonText', 'INVOICEDAMOUNT']]

# Extract prescription names
def extract_prescription_and_name(text):
    if isinstance(text, str):
        parts = text.rsplit(',', 2)
        prescription = parts[-2].strip() if len(parts) > 1 else ''
        match = re.search(r'^(.*?)\s*\(', prescription)
        return match.group(1).strip() if match else prescription
    return ''

df_new['Prescription'] = df_new['ReasonText'].apply(extract_prescription_and_name)
historical_data['Prescription'] = historical_data['ReasonText'].apply(extract_prescription_and_name)

# Fill nulls in historical_data
historical_data = historical_data.fillna(historical_data.mode().iloc[0])

# Fill nulls in df_new
df_new['INVOICEDAMOUNT'] = df_new['INVOICEDAMOUNT'].fillna(df_new['INVOICEDAMOUNT'].mean())
df_new.fillna(df_new.mode().iloc[0], inplace=True)

# Filter POLICYSTATUS and AUTHENTICATION_TYPE
df_new['Status'] = df_new['POLICYSTATUS'].apply(lambda x: "proceed" if x == "Live" else "decline")
df_new = df_new[df_new['Status'] == "proceed"]
df_new['Status'] = df_new['AUTHENTICATION_TYPE'].apply(lambda x: "decline" if x in ["Blank", "UNAUTHORISED", "Off Smart"] else "proceed")
df_new = df_new[df_new['Status'] == "proceed"]

# Fuzzy matching for prescriptions
historical_dict = defaultdict(list)
for diagnosis, prescription in zip(historical_data['FIRSTDIAGNOSIS'], historical_data['Prescription']):
    historical_dict[diagnosis].append(prescription)

def check_diagnosis_and_prescription(row, threshold=80):
    diagnosis = row['FIRSTDIAGNOSIS']
    prescription = row['Prescription']
    if diagnosis in historical_dict:
        max_score = max(fuzz.ratio(prescription, hist_prescription) for hist_prescription in historical_dict[diagnosis])
        return "proceed" if max_score >= threshold else "decline"
    return "new diagnosis"

df_new['Status'] = df_new.apply(lambda row: check_diagnosis_and_prescription(row), axis=1)
df_new = df_new[df_new['Status'] == "proceed"]

# Compare Prescribed Amount and INVOICEDAMOUNT
df_new['Prescribed Amount'] = df_new['ReasonText'].apply(lambda text: float(re.search(r'=(.*?)\)', text).group(1).strip()) if isinstance(text, str) and re.search(r'=(.*?)\)', text) else None)
df_new['Status'] = df_new.apply(lambda row: "decline" if row['Prescribed Amount'] != row['INVOICEDAMOUNT'] else "proceed", axis=1)
df_final = df_new[df_new['Status'] == "proceed"]

# Save the filtered dataframe
df_final.to_csv('filtered_data_set.csv', index=False)


In [5]:
import pandas as pd
from fuzzywuzzy import fuzz
from collections import defaultdict
import re

# Load datasets from CSV files
df_new = pd.read_csv('new_data1.csv', low_memory=False)
historical_data = pd.read_csv('historical_data1.csv', low_memory=False)

# Preprocess historical_data
historical_data = historical_data[['FIRSTDIAGNOSIS', 'ReasonText']].dropna()

# Preprocess df_new
df_new = df_new[['AUTHENTICATION_TYPE', 'POLICYSTATUS', 'FIRSTDIAGNOSIS', 'ReasonText', 'INVOICEDAMOUNT']]

# Extract prescription names
def extract_prescription_and_name(text):
    if isinstance(text, str):
        parts = text.rsplit(',', 2)
        prescription = parts[-2].strip() if len(parts) > 1 else ''
        match = re.search(r'^(.*?)\s*\(', prescription)
        return match.group(1).strip() if match else prescription
    return ''

df_new['Prescription'] = df_new['ReasonText'].apply(extract_prescription_and_name)
historical_data['Prescription'] = historical_data['ReasonText'].apply(extract_prescription_and_name)

# Fill nulls in historical_data
historical_data = historical_data.fillna(historical_data.mode().iloc[0])

# Fill nulls in df_new
df_new['INVOICEDAMOUNT'] = df_new['INVOICEDAMOUNT'].fillna(df_new['INVOICEDAMOUNT'].mean())
df_new.fillna(df_new.mode().iloc[0], inplace=True)

# Filter POLICYSTATUS and AUTHENTICATION_TYPE
df_new['Status'] = df_new['POLICYSTATUS'].apply(lambda x: "proceed" if x == "Live" else "decline")
df_new['DeclineReason'] = df_new['Status'].apply(lambda x: "Policy Status Declined" if x == "decline" else None)
df_new = df_new[df_new['Status'] == "proceed"]

df_new['Status'] = df_new['AUTHENTICATION_TYPE'].apply(lambda x: "decline" if x in ["Blank", "UNAUTHORISED", "Off Smart"] else "proceed")
df_new['DeclineReason'] = df_new.apply(lambda row: "Authentication Declined" if row['Status'] == "decline" else row['DeclineReason'], axis=1)
df_new = df_new[df_new['Status'] == "proceed"]

# Fuzzy matching for prescriptions
historical_dict = defaultdict(list)
for diagnosis, prescription in zip(historical_data['FIRSTDIAGNOSIS'], historical_data['Prescription']):
    historical_dict[diagnosis].append(prescription)

def check_diagnosis_and_prescription(row, threshold=80):
    diagnosis = row['FIRSTDIAGNOSIS']
    prescription = row['Prescription']
    if diagnosis in historical_dict:
        max_score = max(fuzz.ratio(prescription, hist_prescription) for hist_prescription in historical_dict[diagnosis])
        if max_score < threshold:
            row['Status'] = "decline"
            row['DeclineReason'] = "Incorrect Prescription"
            return row
    row['Status'] = "proceed"
    return row

df_new = df_new.apply(lambda row: check_diagnosis_and_prescription(row), axis=1)
df_new = df_new[df_new['Status'] == "proceed"]

# Compare Prescribed Amount and INVOICEDAMOUNT
df_new['Prescribed Amount'] = df_new['ReasonText'].apply(lambda text: float(re.search(r'=(.*?)\)', text).group(1).strip()) if isinstance(text, str) and re.search(r'=(.*?)\)', text) else None)

def compare_prescribed_and_invoiced(row):
    if row['Prescribed Amount'] != row['INVOICEDAMOUNT']:
        row['Status'] = "decline"
        row['DeclineReason'] = "Prescribed Amount Mismatch"
    return row

df_new = df_new.apply(lambda row: compare_prescribed_and_invoiced(row), axis=1)
df_final = df_new[df_new['Status'] == "proceed"]

# Save the filtered dataframe
df_final.to_csv('filtered_data_set.csv', index=False)

# For declined rows, save them with reasons
df_declined = df_new[df_new['Status'] == "decline"]
df_declined.to_csv('declined_data_set.csv', index=False)


# model 254

In [7]:
import pandas as pd
from fuzzywuzzy import fuzz
from collections import defaultdict
import re

# Load datasets from CSV files
df_new = pd.read_csv('new_data1.csv', low_memory=False)
historical_data = pd.read_csv('historical_data1.csv', low_memory=False)

# Preprocess historical_data
historical_data = historical_data[['FIRSTDIAGNOSIS', 'ReasonText']].dropna()

# Preprocess df_new
df_new = df_new[['AUTHENTICATION_TYPE', 'POLICYSTATUS', 'FIRSTDIAGNOSIS', 'ReasonText', 'INVOICEDAMOUNT']]

# Extract prescription names
def extract_prescription_and_name(text):
    if isinstance(text, str):
        parts = text.rsplit(',', 2)
        prescription = parts[-2].strip() if len(parts) > 1 else ''
        match = re.search(r'^(.*?)\s*\(', prescription)
        return match.group(1).strip() if match else prescription
    return ''

df_new['Prescription'] = df_new['ReasonText'].apply(extract_prescription_and_name)
historical_data['Prescription'] = historical_data['ReasonText'].apply(extract_prescription_and_name)

# Fill nulls in historical_data
historical_data = historical_data.fillna(historical_data.mode().iloc[0])

# Fill nulls in df_new
df_new['INVOICEDAMOUNT'] = df_new['INVOICEDAMOUNT'].fillna(df_new['INVOICEDAMOUNT'].mean())
df_new.fillna(df_new.mode().iloc[0], inplace=True)

# Filter POLICYSTATUS and AUTHENTICATION_TYPE
df_new['Status'] = df_new['POLICYSTATUS'].apply(lambda x: "proceed" if x == "Live" else "decline")
df_new['DeclineReason'] = df_new['Status'].apply(lambda x: "Policy Status Declined" if x == "decline" else None)
# Apply additional filter
df_new['Status'] = df_new['AUTHENTICATION_TYPE'].apply(lambda x: "decline" if x in ["Blank", "UNAUTHORISED", "Off Smart"] else "proceed")
df_new['DeclineReason'] = df_new.apply(lambda row: "Authentication Declined" if row['Status'] == "decline" else row['DeclineReason'], axis=1)

# Fuzzy matching for prescriptions
historical_dict = defaultdict(list)
for diagnosis, prescription in zip(historical_data['FIRSTDIAGNOSIS'], historical_data['Prescription']):
    historical_dict[diagnosis].append(prescription)

def check_diagnosis_and_prescription(row, threshold=80):
    diagnosis = row['FIRSTDIAGNOSIS']
    prescription = row['Prescription']
    if diagnosis in historical_dict:
        max_score = max(fuzz.ratio(prescription, hist_prescription) for hist_prescription in historical_dict[diagnosis])
        if max_score < threshold:
            row['Status'] = "decline"
            row['DeclineReason'] = "Incorrect Prescription"
            return row
    row['Status'] = "proceed"
    return row

df_new = df_new.apply(lambda row: check_diagnosis_and_prescription(row), axis=1)

# Compare Prescribed Amount and INVOICEDAMOUNT
df_new['Prescribed Amount'] = df_new['ReasonText'].apply(lambda text: float(re.search(r'=(.*?)\)', text).group(1).strip()) if isinstance(text, str) and re.search(r'=(.*?)\)', text) else None)

def compare_prescribed_and_invoiced(row):
    if row['Prescribed Amount'] != row['INVOICEDAMOUNT']:
        row['Status'] = "decline"
        row['DeclineReason'] = "Prescribed Amount Mismatch"
    return row

df_new = df_new.apply(lambda row: compare_prescribed_and_invoiced(row), axis=1)

# Save the final dataframe with all rows and status/decline reason
df_new.to_csv('processed_data_set.csv', index=False)



# new model 254

In [4]:
import pandas as pd
import joblib
from collections import defaultdict
import re

# Load historical data
historical_data = pd.read_csv('historical_data.csv', low_memory=False)

# Preprocess historical_data
historical_data = historical_data[['FIRSTDIAGNOSIS', 'ReasonText']].dropna()

# Extract prescription names
def extract_prescription_and_name(text):
    if isinstance(text, str):
        parts = text.rsplit(',', 2)
        prescription = parts[-2].strip() if len(parts) > 1 else ''
        match = re.search(r'^(.*?)\s*\(', prescription)
        return match.group(1).strip() if match else prescription
    return ''

historical_data['Prescription'] = historical_data['ReasonText'].apply(extract_prescription_and_name)

# Fill nulls in historical_data
historical_data = historical_data.fillna(historical_data.mode().iloc[0])

# Create historical dictionary
historical_dict = defaultdict(list)
for diagnosis, prescription in zip(historical_data['FIRSTDIAGNOSIS'], historical_data['Prescription']):
    historical_dict[diagnosis].append(prescription)

# Save the historical dictionary
joblib.dump(historical_dict, 'historical_data_dict.pkl')
print("Historical data dictionary saved!")


Historical data dictionary saved!


In [3]:
import pandas as pd
from fuzzywuzzy import fuzz
from collections import defaultdict
import re
import joblib

# Load datasets from CSV files
df_new = pd.read_csv('new_data1.csv', low_memory=False)

# Load pre-saved historical data dictionary
historical_dict = joblib.load('historical_data_dict.pkl')

# Preprocess df_new
df_new = df_new[['AUTHENTICATION_TYPE', 'POLICYSTATUS', 'FIRSTDIAGNOSIS', 'ReasonText', 'INVOICEDAMOUNT']]

# Extract prescription names
def extract_prescription_and_name(text):
    if isinstance(text, str):
        parts = text.rsplit(',', 2)
        prescription = parts[-2].strip() if len(parts) > 1 else ''
        match = re.search(r'^(.*?)\s*\(', prescription)
        return match.group(1).strip() if match else prescription
    return ''

df_new['Prescription'] = df_new['ReasonText'].apply(extract_prescription_and_name)

# Fill nulls in df_new
df_new['INVOICEDAMOUNT'] = df_new['INVOICEDAMOUNT'].fillna(df_new['INVOICEDAMOUNT'].mean())
df_new.fillna(df_new.mode().iloc[0], inplace=True)

# Filter POLICYSTATUS and AUTHENTICATION_TYPE
df_new['Status'] = df_new['POLICYSTATUS'].apply(lambda x: "proceed" if x == "Live" else "decline")
df_new['DeclineReason'] = df_new['Status'].apply(lambda x: "Policy Status Declined" if x == "decline" else None)
df_new['Status'] = df_new['AUTHENTICATION_TYPE'].apply(lambda x: "decline" if x in ["Blank", "UNAUTHORISED", "Off Smart"] else "proceed")
df_new['DeclineReason'] = df_new.apply(lambda row: "Authentication Declined" if row['Status'] == "decline" else row['DeclineReason'], axis=1)

# Fuzzy matching for prescriptions
def check_diagnosis_and_prescription(row, threshold=80):
    diagnosis = row['FIRSTDIAGNOSIS']
    prescription = row['Prescription']
    if diagnosis in historical_dict:
        max_score = max(fuzz.ratio(prescription, hist_prescription) for hist_prescription in historical_dict[diagnosis])
        if max_score < threshold:
            row['Status'] = "decline"
            row['DeclineReason'] = "Incorrect Prescription"
            return row
    row['Status'] = "proceed"
    return row

df_new = df_new.apply(lambda row: check_diagnosis_and_prescription(row), axis=1)

# Compare Prescribed Amount and INVOICEDAMOUNT
df_new['Prescribed Amount'] = df_new['ReasonText'].apply(
    lambda text: float(re.search(r'=(.*?)\)', text).group(1).strip()) if isinstance(text, str) and re.search(r'=(.*?)\)', text) else None)

def compare_prescribed_and_invoiced(row):
    if row['Prescribed Amount'] != row['INVOICEDAMOUNT']:
        row['Status'] = "decline"
        row['DeclineReason'] = "Prescribed Amount Mismatch"
    return row

df_new = df_new.apply(lambda row: compare_prescribed_and_invoiced(row), axis=1)

# Save the final dataframe with all rows and status/decline reason
df_new.to_csv('processed_data_set.csv', index=False)
