# Exploring Customer Behavior and Market Patterns

Analyze customer behavior, sales patterns, and product performance using the Olist e-commerce dataset to generate insights that improve marketing strategy, boost customer retention, and optimize business performance.

In [3]:
import pandas as pd
import os

In [2]:
# Load datasets
customers = pd.read_csv("data/olist_customers_dataset.csv")
geolocation = pd.read_csv("data/olist_geolocation_dataset.csv")
order_items = pd.read_csv("data/olist_order_items_dataset.csv")
payments = pd.read_csv("data/olist_order_payments_dataset.csv")
reviews = pd.read_csv("data/olist_order_reviews_dataset.csv")
orders = pd.read_csv("data/olist_orders_dataset.csv")
products = pd.read_csv("data/olist_products_dataset.csv")
sellers = pd.read_csv("data/olist_sellers_dataset.csv")
category_translation = pd.read_csv("data/product_category_name_translation.csv")


In [None]:
dfs = {}
for name, path in files.items():
    if name in globals() and isinstance(globals()[name], pd.DataFrame):
        dfs[name] = globals()[name]
    else:
        if os.path.exists(path):
            dfs[name] = pd.read_csv(path)
        else:
            print(f"Warning: file not found -> {path}")

def show_overview(dfs_dict, show_head=True, head_n=5):
    print("Dataset overview\n" + "-"*60)
    for name, df in dfs_dict.items():
        print(f"\n>> {name}")
        try:
            print("  shape:", df.shape)
            print("  columns:", len(df.columns))
            print("  dtypes:")
            print(df.dtypes.apply(lambda x: x.name).to_string())
            print("  missing values (per column):")
            print(df.isnull().sum()[df.isnull().sum() > 0].to_string() or "   None")
            if show_head:
                print(f"\n  head({head_n}):")
                print(df.head(head_n).to_string(index=False))
        except Exception as e:
            print("  Error inspecting dataframe:", e)

# Run overview
# Set show_head=True if you want to print first rows for each dataset (useful for quick manual checks)
show_overview(dfs, show_head=True, head_n=3)

NameError: name 'files' is not defined

In [4]:

# Exploration
print(customers.shape)
print(customers.head())
print(customers.isnull().sum())

print(orders.head())
print(orders.order_status.value_counts())


(99441, 5)
                        customer_id                customer_unique_id  \
0  06b8999e2fba1a1fbc88172c00ba8bc7  861eff4711a542e4b93843c6dd7febb0   
1  18955e83d337fd6b2def6b18a428ac77  290c77bc529b7ac935b93aa66c333dc3   
2  4e7b3e00288586ebd08712fdd0374a03  060e732b5b29e8181a18229c7b0b2b5e   
3  b2b6027bc5c5109e529d4dc6358b12c3  259dac757896d24d7702b9acbbff3f3c   
4  4f2d8ab171c80ec8364f7c12e35b23ad  345ecd01c38d18a9036ed96c73b8d066   

   customer_zip_code_prefix          customer_city customer_state  
0                     14409                 franca             SP  
1                      9790  sao bernardo do campo             SP  
2                      1151              sao paulo             SP  
3                      8775        mogi das cruzes             SP  
4                     13056               campinas             SP  
customer_id                 0
customer_unique_id          0
customer_zip_code_prefix    0
customer_city               0
customer_state        