In [14]:
with open("Insurance_auto_data.csv", "r") as file:
    lines = file.readlines()

header = lines[0].strip().split(",")
print("Headers:", header)


Headers: ['CLAIM_ID', 'CLAIM_DATE', 'CUSTOMER_ID', 'CLAIM_AMOUNT', 'PREMIUM_COLLECTED', 'PAID_AMOUNT', 'CITY', 'REJECTION_REMARKS']


In [41]:
# Step 1: Read and Clean Data
def clean_data(filepath):
    with open(filepath, 'r') as f:
        lines = f.readlines()

    header = lines[0].strip().split(',')
    data = []

    for line in lines[1:]:
        row = line.strip().split(',')
        cleaned_row = []
        for value in row:
            value = value.strip()
            if value == '' or value.lower() == 'na':
                cleaned_row.append(None)
            else:
                try:
                    if '.' in value:
                        cleaned_row.append(float(value))
                    else:
                        cleaned_row.append(int(value))
                except:
                    cleaned_row.append(value)
        data.append(cleaned_row)

    return header, data

# Load the data
filename = "Insurance_auto_data.csv"
header, data = clean_data(filename)
print("Data Loaded. Rows:", len(data))



Data Loaded. Rows: 100


In [37]:
# Print header to verify column names
print("CSV Columns:", header)

# Step 2: City-Wise Analysis

def city_analysis(header, data):
    city_index = header.index('CITY')
    claim_index = header.index('CLAIM_AMOUNT')
    rejection_remarks_index = header.index('REJECTION_REMARKS')

    city_stats = {}
    for row in data:
        city = row[city_index]
        claim = row[claim_index]
        rejection_remarks = row[rejection_remarks_index]

        if city not in city_stats:
            city_stats[city] = {'claims': 0, 'amount': 0, 'rejected': 0}

        city_stats[city]['claims'] += 1
        if isinstance(claim, (int, float)):
            city_stats[city]['amount'] += claim

        # If rejection remarks present, consider claim rejected
        if rejection_remarks and rejection_remarks.strip():
            city_stats[city]['rejected'] += 1

    return city_stats

# Run analysis and print
stats = city_analysis(header, data)
print("City Analysis:")
for city, stat in stats.items():
    print(f"{city}: {stat}")



CSV Columns: ['CLAIM_ID', 'CLAIM_DATE', 'CUSTOMER_ID', 'CLAIM_AMOUNT', 'PREMIUM_COLLECTED', 'PAID_AMOUNT', 'CITY', 'REJECTION_REMARKS']
City Analysis:
PUNE: {'claims': 37, 'amount': 1543919.0, 'rejected': 3}
GUWAHATI: {'claims': 24, 'amount': 1148030.0, 'rejected': 3}
RANCHI: {'claims': 17, 'amount': 570328.0, 'rejected': 2}
KOLKATA: {'claims': 16, 'amount': 847893.0, 'rejected': 0}
None: {'claims': 6, 'amount': 195448.0, 'rejected': 2}


In [38]:
# Step 3: Fix complex_rejection_classifier from rejection_reason.py
def complex_rejection_classifier(remark):
    if not isinstance(remark, str):
        return "Invalid Remark"
    remark = remark.lower()

    if 'document' in remark or 'missing' in remark:
        return 'Documentation Issue'
    elif 'fraud' in remark or 'fake' in remark:
        return 'Fraudulent Claim'
    elif 'not covered' in remark or 'policy' in remark:
        return 'Policy Limitation'
    else:
        return 'Other Reason'


In [39]:
# Step 4: Apply rejection classifier to remarks
def apply_rejection_classification(header, data):
    remarks_index = header.index("REJECTION_REMARKS")
    rejection_classes = []

    for row in data:
        remark = row[remarks_index]
        if remark:
            rejection_class = complex_rejection_classifier(remark)
        else:
            rejection_class = "No Remark"
        rejection_classes.append(rejection_class)

    return rejection_classes

# Apply classification
rejection_classes = apply_rejection_classification(header, data)
print("Sample Rejection Classifications:", rejection_classes[:5])


Sample Rejection Classifications: ['No Remark', 'No Remark', 'No Remark', 'No Remark', 'No Remark']
