# Fintect Project
- https://www.sganalytics.com/case-study/we-built-a-win-back-model-for-our-fintech-client-to-target-high-propensity-customers-for-a-higher-conversion-rate/

# Generate Data

In [4]:
import numpy as np
import pandas as pd
import random
from faker import Faker
from datetime import datetime, timedelta

fake = Faker()

# Seed for reproducibility
np.random.seed(42)

# Number of rows to generate
num_rows = 10000

# Function to generate dates within a range
def random_date(start, end):
    return start + timedelta(days=random.randint(0, (end - start).days))

# Generate columns
data = {
    'customer_id': [fake.uuid4() for _ in range(num_rows)],
    'unsub_date': [random_date(datetime(2023, 1, 1), datetime(2024, 1, 1)) for _ in range(num_rows)],
    're_engagement_date': [
        random_date(datetime(2023, 1, 1), datetime(2024, 1, 31)) if random.random() > 0.1 else None 
        for _ in range(num_rows)],  # 10% missing values
    'user_tenure': np.random.exponential(scale=365, size=num_rows).astype(int),  # Exponential, right-skewed
    'num_logins_last_30_days': np.clip(np.random.normal(loc=15, scale=5, size=num_rows), 1, 30).astype(int),  # Normal
    'num_transactions_last_30_days': np.random.poisson(3, size=num_rows),  # Poisson for right skew
    'customer_segment': np.random.choice(['High-Value', 'Medium-Value', 'Low-Value'], p=[0.2, 0.5, 0.3], size=num_rows),
    'campaign_type': np.random.choice(['Email', 'Push Notification', 'SMS'], p=[0.5, 0.3, 0.2], size=num_rows),
    'discount_offered': np.round(np.random.uniform(5, 30, size=num_rows), 2),  # Uniform
    'prev_winback_success': np.random.choice([0, 1], p=[0.9, 0.1], size=num_rows),  # 10% success
    'app_usage_time': np.clip(np.random.normal(loc=300, scale=100, size=num_rows), 50, 600).astype(int),  # Normal distribution
}

# Create a win-back propensity score (simulate as a random value, you can adjust for specific modeling)
data['winback_propensity_score'] = np.round(np.random.uniform(0, 1, size=num_rows), 3)

# Create the target column (win-back success) based on the propensity score (with a 23% success rate)
data['winback_success'] = (data['winback_propensity_score'] > 0.77).astype(int)

# Insert some missing values in 'discount_offered' (e.g., 5% missing)
mask = np.random.rand(num_rows) < 0.05
data['discount_offered'][mask] = np.nan

# Convert to DataFrame
df = pd.DataFrame(data)

df.to_csv('win_back.csv', index = False)