In [None]:
import pandas as pd
import numpy as np
from scipy import stats

# Set the seed for reproducibility
np.random.seed(0)

# Define the number of rows
n_rows = 1000

# Generate columns
order_id = np.arange(1, n_rows + 1)
asin = np.random.choice([f'B{x:08d}' for x in np.random.randint(10000000, 99999999, 1000)], size=n_rows)
category = np.random.choice(['Electronics', 'Fashion', 'Home & Kitchen', 'Sports'], size=n_rows)
order_date = pd.date_range(start='2022-01-01', periods=n_rows)
price = np.round(np.random.uniform(10, 100, size=n_rows), 2)
size_category = np.random.choice(['S', 'M', 'L', 'XL'], size=n_rows)
will_return = np.random.choice([0, 1], size=n_rows, p=[0.7, 0.3])
shipped_month = np.random.choice(np.arange(1, 13), size=n_rows)

# Generate return_date and return_days with exponential distribution
return_days = np.random.exponential(scale=30, size=n_rows)
return_days = np.round(return_days)

# Ensure return_days is non-negative
return_days = np.clip(return_days, 0, None)

# Create a pandas Series for return_date
return_date = pd.Series(pd.to_datetime('2022-01-01') + pd.to_timedelta(return_days, unit='D'))
return_month = return_date.dt.month

# Generate return_reason
return_reason = np.random.choice(['Damaged', 'Not as Expected', 'Changed Mind'], size=n_rows)

# Generate within_3_months
within_3_months = np.where(return_days <= 90, 1, 0)

# Create the DataFrame
data = {
    'order_id': order_id,
    'asin': asin,
    'category': category,
    'order_date': order_date,
    'price': price,
    'size_category': size_category,
    'will_return': will_return,
    'shipped_month': shipped_month,
    'return_date': return_date,
    'return_days': return_days,
    'return_reason': return_reason,
    'return_month': return_month,
    'within_3_months': within_3_months
}


# Save the DataFrame to a csv file
df.to_csv('amazon_product_return.csv', index=False)