In [2]:
import pandas as pd
import numpy as np
from scipy import stats

In [24]:
df = pd.read_csv(
    "C:/Users/Abhi/Desktop/Python_Data_Analytics_Projects/car_insurance_pricing_analytics/data/processed/car_insurance_pricing_features.csv"
)

df.head()

Unnamed: 0,policy_id,policy_start_date,vehicle_type,policy_type,region,vehicle_age,driver_age,base_premium,discount_pct,final_premium,...,policy_year,underwriting_margin,premium_per_vehicle_age,discount_applied,discount_bucket,avg_vehicle_premium,premium_deviation_pct,risk_adjusted_discount,pricing_status,effective_price_index
0,POL10001,2023-04-13,SUV,Third Party,West,12,41,16600,0,16600.0,...,2023,8924.196385,1276.923077,0,No Discount,16035.203145,3.522231,0.0,Profitable,1.035222
1,POL10002,2023-11-27,Sedan,Third Party,West,7,23,14100,0,14100.0,...,2023,2989.708434,1762.5,0,No Discount,13138.717949,7.316407,0.0,Profitable,1.073164
2,POL10003,2024-05-06,Luxury,Third Party,West,4,53,19200,0,19200.0,...,2024,7489.870605,3840.0,0,No Discount,20650.765125,-7.025237,0.0,Profitable,0.929748
3,POL10004,2024-07-14,Sedan,Third Party,West,11,36,13300,0,13300.0,...,2024,7497.973768,1108.333333,0,No Discount,13138.717949,1.227533,0.0,Profitable,1.012275
4,POL10005,2024-07-16,Sedan,Comprehensive,North,2,57,13100,5,12445.0,...,2024,6618.130912,4148.333333,1,Low,13138.717949,-5.279952,1.666667,Profitable,0.9472


In [8]:
discount_impact = (
    df.groupby("discount_bucket")["premium_deviation_pct"]
      .mean()
      .reset_index()
      .sort_values("premium_deviation_pct")
)

discount_impact


Unnamed: 0,discount_bucket,premium_deviation_pct
0,High,-11.011101
2,Medium,-5.940255
1,Low,-0.0141
3,No Discount,5.364792


In [10]:
margin_by_discount = (
    df.groupby("discount_bucket")["underwriting_margin"]
      .mean()
      .reset_index()
)

margin_by_discount


Unnamed: 0,discount_bucket,underwriting_margin
0,High,4752.812961
1,Low,5473.957701
2,Medium,5285.388385
3,No Discount,5816.846218


In [12]:
discounted = df[df["discount_applied"] == 1]["premium_deviation_pct"]
non_discounted = df[df["discount_applied"] == 0]["premium_deviation_pct"]

t_stat, p_value = stats.ttest_ind(
    discounted,
    non_discounted,
    equal_var=False
)

t_stat, p_value


(-18.53617438778054, 1.934722081642445e-72)

In [14]:
if p_value < 0.05:
    print("Discounts significantly affect pricing (Reject H₀)")
else:
    print("No significant pricing effect (Fail to reject H₀)")


Discounts significantly affect pricing (Reject H₀)


In [16]:
df["price_sensitivity_zone"] = pd.cut(
    df["effective_price_index"],
    bins=[0, 0.9, 1.0, 1.1, np.inf],
    labels=[
        "Highly Discounted",
        "Competitive",
        "Market Priced",
        "Overpriced"
    ]
)
df.head()

Unnamed: 0,policy_id,policy_start_date,vehicle_type,policy_type,region,vehicle_age,driver_age,base_premium,discount_pct,final_premium,...,underwriting_margin,premium_per_vehicle_age,discount_applied,discount_bucket,avg_vehicle_premium,premium_deviation_pct,risk_adjusted_discount,pricing_status,effective_price_index,price_sensitivity_zone
0,POL10001,2023-04-13,SUV,Third Party,West,12,41,16600,0,16600.0,...,8924.196385,1276.923077,0,No Discount,16035.203145,3.522231,0.0,Profitable,1.035222,Market Priced
1,POL10002,2023-11-27,Sedan,Third Party,West,7,23,14100,0,14100.0,...,2989.708434,1762.5,0,No Discount,13138.717949,7.316407,0.0,Profitable,1.073164,Market Priced
2,POL10003,2024-05-06,Luxury,Third Party,West,4,53,19200,0,19200.0,...,7489.870605,3840.0,0,No Discount,20650.765125,-7.025237,0.0,Profitable,0.929748,Competitive
3,POL10004,2024-07-14,Sedan,Third Party,West,11,36,13300,0,13300.0,...,7497.973768,1108.333333,0,No Discount,13138.717949,1.227533,0.0,Profitable,1.012275,Market Priced
4,POL10005,2024-07-16,Sedan,Comprehensive,North,2,57,13100,5,12445.0,...,6618.130912,4148.333333,1,Low,13138.717949,-5.279952,1.666667,Profitable,0.9472,Competitive


In [18]:
zone_summary = (
    df.groupby("price_sensitivity_zone")
      .agg(
          policies=("policy_id", "count"),
          avg_margin=("underwriting_margin", "mean"),
          avg_discount=("discount_pct", "mean")
      )
      .reset_index()
)

zone_summary


  df.groupby("price_sensitivity_zone")


Unnamed: 0,price_sensitivity_zone,policies,avg_margin,avg_discount
0,Highly Discounted,739,4362.246334,7.686062
1,Competitive,728,5394.746274,5.377747
2,Market Priced,823,5996.950256,3.882139
3,Overpriced,710,6265.111983,2.521127


In [20]:
discount_ineffective = df[
    (df["discount_applied"] == 1) &
    (df["effective_price_index"] > 1)
]

discount_ineffective.shape


(665, 22)

In [22]:
zone_summary.to_csv(
     "C:/Users/Abhi/Desktop/Python_Data_Analytics_Projects/car_insurance_pricing_analytics/data/processed/price_sensitivity_summary.csv",
    index=False
)