In [2]:
import pandas as pd
import random

# Number of rows
n = 200  

data = []
for i in range(n):
    BeneficiaryID = f"B{1000+i}"          # Unique ID
    FamilySize = random.randint(1, 8)     # 1–8 members
    MonthlyLimit = FamilySize * 5         # 5 kg per person
    Date = pd.date_range("2025-09-01", "2025-09-30").to_series().sample().iloc[0].date()
    ShopID = f"S{random.randint(1, 10)}"  # 10 shops
    
    # Fraud or legit?
    fraud = random.choices([0, 1], weights=[85, 15])[0]  # 15% fraud
    if fraud == 0:
        Claimed = random.randint(int(MonthlyLimit*0.7), MonthlyLimit)
    else:
        Claimed = random.randint(MonthlyLimit+1, MonthlyLimit+10)  # Over limit fraud
    
    data.append([BeneficiaryID, FamilySize, MonthlyLimit, Claimed, Date, ShopID, fraud])

# Create DataFrame
df = pd.DataFrame(data, columns=["BeneficiaryID", "FamilySize", "MonthlyLimit", "Claimed", "Date", "ShopID", "FraudFlag"])

# Save to dataset folder
df.to_csv("../dataset/ration_dummy_dataset.csv", index=False)

df.head()

Unnamed: 0,BeneficiaryID,FamilySize,MonthlyLimit,Claimed,Date,ShopID,FraudFlag
0,B1000,4,20,18,2025-09-28,S9,0
1,B1001,5,25,22,2025-09-18,S7,0
2,B1002,3,15,14,2025-09-12,S9,0
3,B1003,2,10,7,2025-09-03,S1,0
4,B1004,4,20,14,2025-09-17,S10,0


In [3]:
df['DetectedFraud'] = df.apply(lambda row: 1 if row['Claimed'] > row['MonthlyLimit'] else 0, axis=1)
print("Detection Accuracy:", (df['FraudFlag'] == df['DetectedFraud']).mean())
df[df['DetectedFraud'] == 1].head()

Detection Accuracy: 1.0


Unnamed: 0,BeneficiaryID,FamilySize,MonthlyLimit,Claimed,Date,ShopID,FraudFlag,DetectedFraud
5,B1005,2,10,17,2025-09-13,S5,1,1
6,B1006,1,5,7,2025-09-08,S5,1,1
7,B1007,5,25,35,2025-09-24,S7,1,1
14,B1014,6,30,31,2025-09-18,S7,1,1
15,B1015,8,40,49,2025-09-06,S3,1,1
