In [1]:
import pandas as pd

customers = pd.read_csv('data/olist_customers_dataset.csv')
geolocations = pd.read_csv('data/olist_geolocation_dataset.csv')
order_items = pd.read_csv('data/olist_order_items_dataset.csv')
payments = pd.read_csv('data/olist_order_payments_dataset.csv')
reviews = pd.read_csv('data/olist_order_reviews_dataset.csv')
orders = pd.read_csv('data/olist_orders_dataset.csv')
products = pd.read_csv('data/olist_products_dataset.csv')
sellers = pd.read_csv('data/olist_sellers_dataset.csv')
product_category_translation = pd.read_csv('data/product_category_name_translation.csv')

## Dynamic Pricing Based on Churn Analysis

### Overview

In our application, we aim to implement a dynamic pricing strategy that leverages customer churn analysis. By identifying customers who have churned, we can offer personalized pricing incentives to re-engage them and reduce overall churn rates.

### Churn Calculation Logic

The process of calculating customer churn involves several steps:

1. **Data Preparation**:
   - Convert the order purchase timestamps to a datetime format for accurate date comparisons.

2. **Define the Churn Period**:
   - The churn period is set as a specific number of days (e.g., 90 days). Customers who have not made a purchase within this period are considered churned.

3. **Determine the Reference Date**:
   - The reference date is the most recent order date in the dataset, representing the current point of analysis.

4. **Calculate the Last Purchase Date**:
   - For each customer, find the most recent purchase date using the `orders` dataset.

5. **Identify Churned Customers**:
   - Compare the last purchase date of each customer with the reference date minus the churn period. Customers with a last purchase date older than this are identified as churned.

6. **Calculate the Churn Rate**:
   - The churn rate is calculated as the percentage of churned customers relative to the total customer base.

7. **Identify Retained Customers**:
   - Customers who have made purchases within the churn period are identified as retained.

### Dynamic Pricing Strategy

Using the identified churned customers, we can implement dynamic pricing strategies such as:

- **Discounts and Promotions**:
  - Offer special discounts or promotions to churned customers to encourage re-engagement and purchases.
  
- **Personalized Offers**:
  - Create personalized offers based on past purchase behavior to entice churned customers to return.

- **Loyalty Rewards**:
  - Introduce loyalty programs that provide rewards for churned customers who make a return purchase, thereby enhancing customer retention.

By integrating churn analysis into our pricing model, we can proactively address customer attrition and enhance the overall customer experience.

### Conclusion

By leveraging customer churn analysis, we can implement targeted pricing strategies that reduce churn rates, enhance customer loyalty, and increase the lifetime value of our customer base. This approach not only improves profitability but also strengthens customer relationships by offering them value-based incentives tailored to their engagement levels.


In [3]:
# Display the first few rows of each dataset
customers.head()


Unnamed: 0,customer_id,customer_unique_id,customer_zip_code_prefix,customer_city,customer_state
0,06b8999e2fba1a1fbc88172c00ba8bc7,861eff4711a542e4b93843c6dd7febb0,14409,franca,SP
1,18955e83d337fd6b2def6b18a428ac77,290c77bc529b7ac935b93aa66c333dc3,9790,sao bernardo do campo,SP
2,4e7b3e00288586ebd08712fdd0374a03,060e732b5b29e8181a18229c7b0b2b5e,1151,sao paulo,SP
3,b2b6027bc5c5109e529d4dc6358b12c3,259dac757896d24d7702b9acbbff3f3c,8775,mogi das cruzes,SP
4,4f2d8ab171c80ec8364f7c12e35b23ad,345ecd01c38d18a9036ed96c73b8d066,13056,campinas,SP


In [4]:
orders.head()



Unnamed: 0,order_id,customer_id,order_status,order_purchase_timestamp,order_approved_at,order_delivered_carrier_date,order_delivered_customer_date,order_estimated_delivery_date
0,e481f51cbdc54678b7cc49136f2d6af7,9ef432eb6251297304e76186b10a928d,delivered,2017-10-02 10:56:33,2017-10-02 11:07:15,2017-10-04 19:55:00,2017-10-10 21:25:13,2017-10-18 00:00:00
1,53cdb2fc8bc7dce0b6741e2150273451,b0830fb4747a6c6d20dea0b8c802d7ef,delivered,2018-07-24 20:41:37,2018-07-26 03:24:27,2018-07-26 14:31:00,2018-08-07 15:27:45,2018-08-13 00:00:00
2,47770eb9100c2d0c44946d9cf07ec65d,41ce2a54c0b03bf3443c3d931a367089,delivered,2018-08-08 08:38:49,2018-08-08 08:55:23,2018-08-08 13:50:00,2018-08-17 18:06:29,2018-09-04 00:00:00
3,949d5b44dbf5de918fe9c16f97b45f8a,f88197465ea7920adcdbec7375364d82,delivered,2017-11-18 19:28:06,2017-11-18 19:45:59,2017-11-22 13:39:59,2017-12-02 00:28:42,2017-12-15 00:00:00
4,ad21c59c0840e6cb83a9ceb5573f8159,8ab97904e6daea8866dbdbc4fb7aad2c,delivered,2018-02-13 21:18:39,2018-02-13 22:20:29,2018-02-14 19:46:34,2018-02-16 18:17:02,2018-02-26 00:00:00


In [5]:
order_items.head()




Unnamed: 0,order_id,order_item_id,product_id,seller_id,shipping_limit_date,price,freight_value
0,00010242fe8c5a6d1ba2dd792cb16214,1,4244733e06e7ecb4970a6e2683c13e61,48436dade18ac8b2bce089ec2a041202,2017-09-19 09:45:35,58.9,13.29
1,00018f77f2f0320c557190d7a144bdd3,1,e5f2d52b802189ee658865ca93d83a8f,dd7ddc04e1b6c2c614352b383efe2d36,2017-05-03 11:05:13,239.9,19.93
2,000229ec398224ef6ca0657da4fc703e,1,c777355d18b72b67abbeef9df44fd0fd,5b51032eddd242adc84c38acab88f23d,2018-01-18 14:48:30,199.0,17.87
3,00024acbcdf0a6daa1e931b038114c75,1,7634da152a4610f1595efa32f14722fc,9d7a1d34a5052409006425275ba1c2b4,2018-08-15 10:10:18,12.99,12.79
4,00042b26cf59d7ce69dfabb4e55b4fd9,1,ac6c3623068f30de03045865e4e10089,df560393f3a51e74553ab94004ba5c87,2017-02-13 13:57:51,199.9,18.14


In [8]:
orders['order_purchase_timestamp'] = pd.to_datetime(orders['order_purchase_timestamp'])

# Define the churn period (e.g., 90 days)
churn_period_days = 200

# Determine the reference date (e.g., the latest order date in the dataset)
reference_date = orders['order_purchase_timestamp'].max()

# Calculate the last purchase date for each customer
last_purchase_date = orders.groupby('customer_id')['order_purchase_timestamp'].max().reset_index()

# Identify churned customers: those who haven't purchased within the churn period
churned_customers = last_purchase_date[
    last_purchase_date['order_purchase_timestamp'] < reference_date - pd.Timedelta(days=churn_period_days)
]

# Count the total number of unique customers
total_customers = customers['customer_id'].nunique()

# Count the number of churned customers
num_churned_customers = churned_customers['customer_id'].nunique()

# Calculate churn rate
churn_rate = (num_churned_customers / total_customers) * 100

# Output the results
print(f"Total Customers: {total_customers}")
print(f"Churned Customers: {num_churned_customers}")
print(f"Churn Rate: {churn_rate:.2f}%")

Total Customers: 99441
Churned Customers: 66569
Churn Rate: 66.94%


In [11]:
# Output the lists of churned and retained customers
print("\nChurned Customers IDs:")
churned_customers['customer_id'].tolist()

# print("\nRetained Customers IDs:")
# print(retained_customers['customer_id'].tolist())


Churned Customers IDs:


['00012a2ce6f8dcda20d059ce98491703',
 '000161a058600d5901f007fab4c27140',
 '0001fd6190edaaf884bcaf3d49edf079',
 '0002414f95344307404f0ace7a26f1d5',
 '0004164d20a9e969af783496f3408652',
 '000419c5494106c306a97b5635748086',
 '00046a560d407e99b969756e0b10f282',
 '00050bf6e01e69d5c0fd612f1bcfb69c',
 '00062b33cb9f6fe976afdcff967ea74d',
 '00066ccbe787a588c52bd5ff404590e3',
 '00072d033fe2e59061ae5c3aff1a2be5',
 '0009a69b72033b2d0ec8c69fc70ef768',
 '000bf8121c3412d3057d32371c5d3395',
 '000e943451fc2788ca6ac98a682f2f49',
 '000f17e290c26b28549908a04cfe36c1',
 '0010068a73b7c56da5758c3f9e5c7375',
 '00104a47c29da701ce41ee52077587d9',
 '00114026c1b7b52ab1773f317ef4880b',
 '001226b2341ef620415ce7bbafcfac28',
 '0012a5c13793cf51e253f096a7e740dd',
 '0013280441d86a4f7a8006efdaf1b0fe',
 '001450ebb4a77efb3d68be5f7887cb1e',
 '00146ad30454993879adaa91c518f68c',
 '00155f0530cc7b2bf73cc3f81cb01c52',
 '001574cd5824c0b1ea90dd4f4ba6d5b8',
 '0015f7887e2fde13ddaa7b8e385af919',
 '0017a0b4c1f1bdb9c395fa0ac517109c',
 