In [2]:
# importing necessary libraries
import pandas as pd
import numpy as np
import scipy.stats as stats

# Load the dataset
df = pd.read_csv("synthetic_health_insurance_survey.csv")

In [3]:
df.head()

Unnamed: 0,Age,Gender,Marital Status,Household Size,Occupation,Monthly Income,Healthcare Facility Used,Visit Frequency,Common Illness,Healthcare Challenges,Awareness of Insurance,Interest in Insurance,Yearly Premium Willing to Pay,Preferred Payment Model,Co-payment Per Visit,Encouragement Factor
0,46 – 60,Female,Widowed,1,Artisan,"Less than 10,000",Pharmacy/Chemist,More than 8 times,Malaria/Fever,Lack of insurance coverage,No,Yes,"₦7,001 – ₦10,000",Completely free after payment,"More than ₦2,000",Family/friends recommendation
1,Above 60,Female,Divorced,3,Other,"Less than 10,000",Self-medication,3 – 5 times,Diabetes,Distance to facility,No,Yes,"₦7,001 – ₦10,000",10% co-payment per visit,₦200 – ₦500,Availability of drugs & services
2,31 – 45,Female,Single,8,Artisan,"30,001 – 50,000",Self-medication,3 – 5 times,Typhoid,Distance to facility,No,Yes,"₦5,001 – ₦7,000",Completely free after payment,"₦501 – ₦1,000",Government assurance
3,Above 60,Female,Divorced,3,Unemployed,"30,001 – 50,000",Pharmacy/Chemist,More than 8 times,Malaria/Fever,Long waiting times,Yes,No,"₦7,001 – ₦10,000",Completely free after payment,"₦1,001 – ₦2,000",Quality healthcare services
4,Above 60,Female,Widowed,8,Other,"10,000 – 30,000",Self-medication,3 – 5 times,Respiratory infections,Poor quality of services,Yes,Yes,"₦5,001 – ₦7,000",Completely free after payment,"More than ₦2,000",Low cost


In [4]:
# Mapping yearly premium ranges to midpoint values (assumed amounts in NGN)
premium_mapping = {
    "Less than ₦3,000": 2500,
    "₦3,000 – ₦5,000": 4000,
    "₦5,001 – ₦7,000": 6000,
    "₦7,001 – ₦10,000": 8500,
    "More than ₦10,000": 11000
}

In [6]:
# Convert categorical values to numerical midpoints
df["Yearly Premium (NGN)"] = df["Yearly Premium Willing to Pay"].map(premium_mapping)

In [7]:
df.head()

Unnamed: 0,Age,Gender,Marital Status,Household Size,Occupation,Monthly Income,Healthcare Facility Used,Visit Frequency,Common Illness,Healthcare Challenges,Awareness of Insurance,Interest in Insurance,Yearly Premium Willing to Pay,Preferred Payment Model,Co-payment Per Visit,Encouragement Factor,Yearly Premium (NGN)
0,46 – 60,Female,Widowed,1,Artisan,"Less than 10,000",Pharmacy/Chemist,More than 8 times,Malaria/Fever,Lack of insurance coverage,No,Yes,"₦7,001 – ₦10,000",Completely free after payment,"More than ₦2,000",Family/friends recommendation,8500
1,Above 60,Female,Divorced,3,Other,"Less than 10,000",Self-medication,3 – 5 times,Diabetes,Distance to facility,No,Yes,"₦7,001 – ₦10,000",10% co-payment per visit,₦200 – ₦500,Availability of drugs & services,8500
2,31 – 45,Female,Single,8,Artisan,"30,001 – 50,000",Self-medication,3 – 5 times,Typhoid,Distance to facility,No,Yes,"₦5,001 – ₦7,000",Completely free after payment,"₦501 – ₦1,000",Government assurance,6000
3,Above 60,Female,Divorced,3,Unemployed,"30,001 – 50,000",Pharmacy/Chemist,More than 8 times,Malaria/Fever,Long waiting times,Yes,No,"₦7,001 – ₦10,000",Completely free after payment,"₦1,001 – ₦2,000",Quality healthcare services,8500
4,Above 60,Female,Widowed,8,Other,"10,000 – 30,000",Self-medication,3 – 5 times,Respiratory infections,Poor quality of services,Yes,Yes,"₦5,001 – ₦7,000",Completely free after payment,"More than ₦2,000",Low cost,6000


In [8]:
# Compute sample statistics
sample_mean = np.mean(df["Yearly Premium (NGN)"])  # Mean willingness to pay
sample_std = np.std(df["Yearly Premium (NGN)"], ddof=1)  # Sample standard deviation
n = len(df["Yearly Premium (NGN)"])  # Sample size

In [9]:
# Compute 95% confidence interval using stats.norm.interval
confidence_level = 0.95
lower_bound, upper_bound = stats.norm.interval(confidence_level, loc=sample_mean, scale=sample_std / np.sqrt(n))

In [10]:
# Display Results
print(f"Estimated Yearly Contribution Willing to Pay: {sample_mean:.2f} NGN")
print(f"95% Confidence Interval: [{lower_bound:.2f}, {upper_bound:.2f}] NGN")

Estimated Yearly Contribution Willing to Pay: 6523.00 NGN
95% Confidence Interval: [6332.78, 6713.22] NGN


In [11]:
# Decision Making - Compare with Profitability Threshold
required_threshold = 5000  # Minimum required amount per person for scheme sustainability

if lower_bound >= required_threshold:
    print("✅ The insurance scheme is likely financially viable.")
else:
    print("⚠️ The estimated contributions are too low. Reconsider pricing or seek alternative funding.")


✅ The insurance scheme is likely financially viable.
