In [15]:
#Question 1 - Preprocessing the given data -  without any libraries
#Reading CSV File
def read_csv_basic(filepath):
    with open(filepath, 'r') as file:
        lines = file.read().splitlines()
    header = lines[0].split(',')
    data = [line.split(',') for line in lines[1:] if line.strip()]
    return header, data
    


In [16]:
#Convert Rows to Dictionary 
def convert_to_dicts(header, data_rows):
    result = []
    for row in data_rows:
        if len(row) == len(header):
            row_dict = {header[i]: row[i].strip() for i in range(len(header))}
            result.append(row_dict)
    return result
    


In [17]:
#function for cleaning each row
def clean_record(record):
    cleaned = {}
    for key, value in record.items():
        val = value.strip()
        if val == "":
            cleaned[key] = None
            continue
        if "AMOUNT" in key.upper() or "PREMIUM" in key.upper():
            try:
                cleaned[key] = float(val)
            except ValueError:
                cleaned[key] = None
        else:
            cleaned[key] = val
    return cleaned


In [18]:
#function to cleans all rows
def clean_data(records):
    return [clean_record(record) for record in records]


In [19]:
#calling all function for cleaning data, "cleaned-records" contains is the final data after cleaning
def preprocess_insurance_data(filepath):
    header, data_rows = read_csv_basic(filepath)
    records = convert_to_dicts(header, data_rows)
    cleaned_records = clean_data(records)
    return cleaned_records


In [20]:
file_path = "Insurance_auto_data.csv" 
cleaned_data = preprocess_insurance_data(file_path) # Main functioncall to clean data

cleaned_data



[{'CLAIM_ID': 'CLM100021',
  'CLAIM_DATE': '2025-04-01',
  'CUSTOMER_ID': 'CUST14285',
  'CLAIM_AMOUNT': 10419.0,
  'PREMIUM_COLLECTED': 2198.59,
  'PAID_AMOUNT': 6964.46,
  'CITY': 'PUNE',
  'REJECTION_REMARKS': None},
 {'CLAIM_ID': 'CLM100013',
  'CLAIM_DATE': '2025-04-01',
  'CUSTOMER_ID': 'CUST26471',
  'CLAIM_AMOUNT': 42468.0,
  'PREMIUM_COLLECTED': 8982.2,
  'PAID_AMOUNT': 30119.67,
  'CITY': 'GUWAHATI',
  'REJECTION_REMARKS': None},
 {'CLAIM_ID': 'CLM100099',
  'CLAIM_DATE': '2025-04-02',
  'CUSTOMER_ID': 'CUST29309',
  'CLAIM_AMOUNT': 55897.0,
  'PREMIUM_COLLECTED': 1861.78,
  'PAID_AMOUNT': 55657.15,
  'CITY': 'GUWAHATI',
  'REJECTION_REMARKS': None},
 {'CLAIM_ID': 'CLM100044',
  'CLAIM_DATE': '2025-04-02',
  'CUSTOMER_ID': 'CUST30275',
  'CLAIM_AMOUNT': 71785.0,
  'PREMIUM_COLLECTED': 13154.99,
  'PAID_AMOUNT': 53629.3,
  'CITY': 'PUNE',
  'REJECTION_REMARKS': None},
 {'CLAIM_ID': 'CLM100014',
  'CLAIM_DATE': '2025-04-02',
  'CUSTOMER_ID': 'CUST38169',
  'CLAIM_AMOUNT': 18565

In [21]:
#Analysing the data
def analyze_city_performance(cleaned_data):
    city_stats = {}

    for record in cleaned_data:
        city = record.get("CITY", "Unknown")
        if city not in city_stats:
            city_stats[city] = {
                "total_claims": 0,
                "total_claim_amount": 0.0,
                "total_premium_collected": 0.0,
                "total_paid_amount": 0.0,
                "paid_claims": 0,
                "rejected_claims": 0
            }

        
        city_stats[city]["total_claims"] += 1

        # claim amount
        claim_amt = record.get("CLAIM_AMOUNT")
        if claim_amt:
            city_stats[city]["total_claim_amount"] += claim_amt

        # premiun collected
        premium = record.get("PREMIUM_COLLECTED")
        if premium:
            city_stats[city]["total_premium_collected"] += premium

        # paid amount
        paid_amt = record.get("PAID_AMOUNT")
        status = record.get("PAYMENT_STATUS", "").strip().lower()
        if status == "paid":
            city_stats[city]["paid_claims"] += 1
            if paid_amt:
                city_stats[city]["total_paid_amount"] += paid_amt
        elif status == "rejected":
            city_stats[city]["rejected_claims"] += 1

    return city_stats


In [22]:
city_performance = analyze_city_performance(cleaned_data)

# Displaying each city's stats
for city, stats in city_performance.items():
    print(f"\nCity: {city}")
    for metric, value in stats.items():
        print(f"  {metric}: {value}")



City: PUNE
  total_claims: 37
  total_claim_amount: 1543919.0
  total_premium_collected: 369254.7900000001
  total_paid_amount: 0.0
  paid_claims: 0
  rejected_claims: 0

City: GUWAHATI
  total_claims: 24
  total_claim_amount: 1148030.0
  total_premium_collected: 261314.84000000005
  total_paid_amount: 0.0
  paid_claims: 0
  rejected_claims: 0

City: RANCHI
  total_claims: 17
  total_claim_amount: 570328.0
  total_premium_collected: 148858.6
  total_paid_amount: 0.0
  paid_claims: 0
  rejected_claims: 0

City: KOLKATA
  total_claims: 16
  total_claim_amount: 847893.0
  total_premium_collected: 140279.78
  total_paid_amount: 0.0
  paid_claims: 0
  rejected_claims: 0

City: None
  total_claims: 6
  total_claim_amount: 195448.0
  total_premium_collected: 73993.21
  total_paid_amount: 0.0
  paid_claims: 0
  rejected_claims: 0


In [23]:
def add_performance_metrics(city_stats):
    for city, stats in city_stats.items():
        premium = stats["total_premium_collected"]
        claims = stats["total_claims"]
        paid = stats["paid_claims"]
        rejected = stats["rejected_claims"]
        total_claim_amt = stats["total_claim_amount"]

        
        if premium != 0:
            stats["loss_ratio"] = total_claim_amt / premium
        else:
            stats["loss_ratio"] = 0

        if claims != 0:
            stats["settlement_rate"] = paid / claims
            stats["rejection_rate"] = rejected / claims
        else:
            stats["settlement_rate"] = 0
            stats["rejection_rate"] = 0



In [24]:
add_performance_metrics(city_performance)

# Print with metrics
for city, stats in city_performance.items():
    print(f"\n---- {city} — Performance:")
    print(f"Total Claims: {stats['total_claims']}")
    print(f"Premium Collected: {stats['total_premium_collected']}")
    print(f"Claim Amount: {stats['total_claim_amount']}")
    print(f"Paid Claims: {stats['paid_claims']}")
    print(f"Rejected Claims: {stats['rejected_claims']}")
    print(f"Loss Ratio: {stats['loss_ratio']:.2f}")
    print(f"Settlement Rate: {stats['settlement_rate']:.2%}")
    print(f"Rejection Rate: {stats['rejection_rate']:.2%}")



---- PUNE — Performance:
Total Claims: 37
Premium Collected: 369254.7900000001
Claim Amount: 1543919.0
Paid Claims: 0
Rejected Claims: 0
Loss Ratio: 4.18
Settlement Rate: 0.00%
Rejection Rate: 0.00%

---- GUWAHATI — Performance:
Total Claims: 24
Premium Collected: 261314.84000000005
Claim Amount: 1148030.0
Paid Claims: 0
Rejected Claims: 0
Loss Ratio: 4.39
Settlement Rate: 0.00%
Rejection Rate: 0.00%

---- RANCHI — Performance:
Total Claims: 17
Premium Collected: 148858.6
Claim Amount: 570328.0
Paid Claims: 0
Rejected Claims: 0
Loss Ratio: 3.83
Settlement Rate: 0.00%
Rejection Rate: 0.00%

---- KOLKATA — Performance:
Total Claims: 16
Premium Collected: 140279.78
Claim Amount: 847893.0
Paid Claims: 0
Rejected Claims: 0
Loss Ratio: 6.04
Settlement Rate: 0.00%
Rejection Rate: 0.00%

---- None — Performance:
Total Claims: 6
Premium Collected: 73993.21
Claim Amount: 195448.0
Paid Claims: 0
Rejected Claims: 0
Loss Ratio: 2.64
Settlement Rate: 0.00%
Rejection Rate: 0.00%


In [None]:
def recommend_city_for_closure(city_stats):
    scores = {}
    max_loss = max(city["loss_ratio"] for city in city_stats.values())
    max_rejection = max(city["rejection_rate"] for city in city_stats.values())
    max_settlement = max(city["settlement_rate"] for city in city_stats.values())
    max_premium = max(city["total_premium_collected"] for city in city_stats.values())
    

    for city, stats in city_stats.items():
        loss_score = stats["loss_ratio"] / max_loss if max_loss else 0
        rejection_score = stats["rejection_rate"] / max_rejection if max_rejection else 0
        settlement_score = 1 - (stats["settlement_rate"] / max_settlement) if max_settlement else 1
        premium_score = 1 - (stats["total_premium_collected"] / max_premium) if max_premium else 1
        # print(loss_score)
        # print(rejection_score)
        # print(settlement_score)
        # print(premium_score)
        total_score = (loss_score + rejection_score + settlement_score + premium_score) / 4
        scores[city] = round(total_score, 3)

    # Sort cities by score descending
#     score_items = []
#         for city in scores:
#             score_items.append((city, scores[city]))


#     for i in range(len(score_items)):
#         for j in range(i + 1, len(score_items)):
#             if score_items[j][1] > score_items[i][1]:
#             # Swap
#                 temp = score_items[i]
#                 score_items[i] = score_items[j]
#                 score_items[j] = temp

# # Now score_items is sorted from highest to lowest
#     ranked = score_items
    ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)

    print("\nRisk Score by City:")
    for city, score in ranked:
        print(f"  {city}: {score*100}%")

    recommended_city = ranked[0][0]
    print(f"\nRecommended City for Closure: {recommended_city}")
    return recommended_city


In [35]:
recommended_city = recommend_city_for_closure(city_performance)


0.691755148858866
0
1
0.0
0.7268473132571954
0
1
0.29231834744784224
0.6338767020241673
0
1
0.5968675179542018
1.0
0
1
0.6201003106824966
0.437012026157675
0
1
0.7996147592289866

Risk Score by City:
  KOLKATA: 65.5%
  None: 55.900000000000006%
  RANCHI: 55.800000000000004%
  GUWAHATI: 50.5%
  PUNE: 42.3%

Recommended City for Closure: KOLKATA


In [27]:
# Mapping Dictionary
REJECTION_REASONS_MAP = {
    "Fake_document": "Fake_document",
    "Not_Covered": "Not_Covered",
    "Policy_expired": "Policy_expired"
}

# Function 1: Error Handler
def handle_error(error_message):
    print(f"Error: {error_message}")
    return "Error"

# Function 2: Keyword Match Checker
def contains_rejection_reason(rejection_text, reason):
    try:
        if rejection_text and isinstance(rejection_text, str):
            return reason.lower() in rejection_text.lower()
    except Exception as e:
        handle_error(f"Error in contains_rejection_reason: {str(e)}")
    return False

# Function 3: Map Rejection Reason
def map_rejection_reason(rejection_text):
    try:
        if rejection_text and isinstance(rejection_text, str):
            for reason, rejection_class in REJECTION_REASONS_MAP.items():
                if contains_rejection_reason(rejection_text, reason):
                    return rejection_class
            return "Unknown"
        else:
            return "No Remark"
    except Exception as e:
        handle_error(f"Error in map_rejection_reason: {str(e)}")
        return "Error"

# Function 4: Main Classifier
def complex_rejection_classifier(remark_text):
    try:
        if remark_text is None or not isinstance(remark_text, str) or len(remark_text.strip()) == 0:
            return "NoRemark"

        # Match reasons
        fake_doc = contains_rejection_reason(remark_text, "Fake_document")
        not_covered = contains_rejection_reason(remark_text, "Not_Covered")
        policy_expired = contains_rejection_reason(remark_text, "Policy_expired")

        if fake_doc:
            return "Fake_document"
        elif not_covered:
            return "Not_Covered"
        elif policy_expired:
            return "Policy_expired"
        else:
            return map_rejection_reason(remark_text)

    except Exception as e:
        handle_error(f"Error in complex_rejection_classifier: {str(e)}")
        return "Error"


In [28]:
def classify_rejections(data):
    for record in data:
        remark = record.get("REJECTION_REMARKS")
        record["REJECTION_CLASS"] = complex_rejection_classifier(remark)
classify_rejections(cleaned_data)

# Print a few results
for rec in cleaned_data[:10]:
    print(f"Remark: {rec.get('REJECTION_REMARKS')} → Class: {rec['REJECTION_CLASS']}")



Remark: None → Class: NoRemark
Remark: None → Class: NoRemark
Remark: None → Class: NoRemark
Remark: None → Class: NoRemark
Remark: None → Class: NoRemark
Remark: None → Class: NoRemark
Remark: None → Class: NoRemark
Remark: None → Class: NoRemark
Remark: None → Class: NoRemark
Remark: None → Class: NoRemark
