In [1]:
import numpy as np
import pandas as pd

# Define payment methods and features
payment_methods = ["Amazon Pay UPI", "Google Pay", "PhonePe", "Credit Card", "Debit Card", "EMI", "Net Banking", "Cash on Delivery"]
num_payment_methods = len(payment_methods)

# User types in cyclic order
user_types = ["cashback_oriented", "cost_oriented", "history_oriented"]

# Function to generate user data
def generate_user_data(num_users):
    data = []
    user_payment_history = {user_id: {method: 0 for method in payment_methods} for user_id in range(1, num_users + 1)}
    payment_success_rates = {method: {"success": 0, "total": 0} for method in payment_methods}

    for user_id in range(1, num_users + 1):
        user_type_index = 0  # Start with the first user type
        num_products = np.random.randint(5, 51)
        for product_id in range(1, num_products ):  # Assuming each user orders 10 products
            price = np.random.randint(100, 10000)  # Random price between 100 and 10000
            payment_options = []

            for method in payment_methods:
                cost = np.random.uniform(0.01, 0.05) * price  # Cost as percentage of the product price
                cashback = np.random.uniform(0.01, 0.10) * price  # Cashback as percentage of the product price

                # Calculate current success rate based on past transactions
                success = payment_success_rates[method]["success"]
                total = payment_success_rates[method]["total"]
                success_rate = (success / total) if total > 0 else np.random.uniform(0.80, 0.99)

                user_history = min(user_payment_history[user_id][method], 10)  # User history (how many times used in recent transactions)

                payment_options.append({
                    "method": method,
                    "cost": cost,
                    "cashback": cashback,
                    "success_rate": success_rate,
                    "user_history": user_history
                })

            user_type = user_types[user_type_index % len(user_types)]  # Determine the current user type in cyclic order

            # Determine the best payment method based on user type
            if user_type == "cashback_oriented":
                best_method = max(payment_options, key=lambda x: x["cashback"])["method"]
            elif user_type == "history_oriented":
                best_method = max(payment_options, key=lambda x: x["user_history"])["method"]
            elif user_type == "cost_oriented":
                best_method = min(payment_options, key=lambda x: x["cost"])["method"]

            for option in payment_options:
                label = 1 if option["method"] == best_method else 0
                data.append([user_id, product_id, user_type, option["method"], price, option["cost"], option["cashback"], option["success_rate"], option["user_history"], label])

            # Update user payment history and success rates after generating data for each product
            for option in payment_options:
                if option["method"] == best_method:
                    user_payment_history[user_id][option["method"]] += 1
                    if np.random.rand() > 0.05:  # Assuming a 95% success rate for each transaction
                        payment_success_rates[option["method"]]["success"] += 1
                    payment_success_rates[option["method"]]["total"] += 1

            user_type_index += 1  # Move to the next user type for the next product

    return data

# Number of users
num_users = 100

# Generate data for users
combined_data = generate_user_data(num_users)

# Define columns
columns = ["user_id", "product_id", "user_type", "payment_method", "price", "cost", "cashback", "success_rate", "user_history", "label"]

# Create DataFrame
df = pd.DataFrame(combined_data, columns=columns)

# Save to CSV
df.to_csv("user_payment_data.csv", index=False)


print(df.head(30))


    user_id  product_id          user_type    payment_method  price  \
0         1           1  cashback_oriented    Amazon Pay UPI   9722   
1         1           1  cashback_oriented        Google Pay   9722   
2         1           1  cashback_oriented           PhonePe   9722   
3         1           1  cashback_oriented       Credit Card   9722   
4         1           1  cashback_oriented        Debit Card   9722   
5         1           1  cashback_oriented               EMI   9722   
6         1           1  cashback_oriented       Net Banking   9722   
7         1           1  cashback_oriented  Cash on Delivery   9722   
8         1           2      cost_oriented    Amazon Pay UPI   3353   
9         1           2      cost_oriented        Google Pay   3353   
10        1           2      cost_oriented           PhonePe   3353   
11        1           2      cost_oriented       Credit Card   3353   
12        1           2      cost_oriented        Debit Card   3353   
13    

In [2]:
df[0 : 20]

Unnamed: 0,user_id,product_id,user_type,payment_method,price,cost,cashback,success_rate,user_history,label
0,1,1,cashback_oriented,Amazon Pay UPI,9722,453.139588,379.088099,0.988085,0,0
1,1,1,cashback_oriented,Google Pay,9722,278.893667,363.658093,0.814054,0,0
2,1,1,cashback_oriented,PhonePe,9722,413.884934,189.373006,0.86783,0,0
3,1,1,cashback_oriented,Credit Card,9722,170.663767,476.919014,0.809076,0,0
4,1,1,cashback_oriented,Debit Card,9722,417.602308,210.423615,0.886429,0,0
5,1,1,cashback_oriented,EMI,9722,396.546189,194.060436,0.969589,0,0
6,1,1,cashback_oriented,Net Banking,9722,251.747103,784.255132,0.891198,0,1
7,1,1,cashback_oriented,Cash on Delivery,9722,173.105501,683.811172,0.877745,0,0
8,1,2,cost_oriented,Amazon Pay UPI,3353,46.81638,121.701221,0.984371,0,0
9,1,2,cost_oriented,Google Pay,3353,128.426112,300.032614,0.916473,0,0


In [3]:
df.shape

(20664, 10)