# Oscar Bolaños

### Physics Engineer.


[LinkeIn](https://www.linkedin.com/in/oscar-physics-engineer/)

### Libraries

In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
import csv
import itertools
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

## Transform data

```python
data = pd.read_excel('TESTING_BD.xlsx')
data.to_csv('testing.csv', encoding='utf-8', index=False, quoting=csv.QUOTE_NONE, escapechar='\\')
testing = pd.read_csv('testing.csv', sep='\\\,', encoding='utf-8', engine='python')
```

## Exploratory Analisys

In [None]:
testing = pd.read_csv("/kaggle/input/ibm-watson-marketing-customer-value-data/WA_Fn-UseC_-Marketing-Customer-Value-Analysis.csv")

In [None]:
testing.head()

Change date column.

In [None]:
date_col = 'Effective To Date'
testing.loc[:, date_col] = pd.to_datetime(testing.loc[:, date_col], format='%m/%d/%y')

In [None]:
testing.info()

In [None]:
testing.describe()

In [None]:
testing.describe(include='object')

In [None]:
# https://www.pewresearch.org/fact-tank/2018/09/06/the-american-middle-class-is-stable-in-size-but-losing-ground-financially-to-upper-income-families/
conds = [
    testing.loc[:, 'Income'] < 41_000,
    testing.loc[:, 'Income'] > 120_400,
]
choices = [
    'Low income',
    'Upper income',
]
testing['Income level'] = np.select(conds, choices, default='Middle income')

## Analyze it to understand how different customers behave and react to different marketing strategies
Split marketing strategies

In [None]:
offer_data = []
offers = list(testing.loc[:, 'Renew Offer Type'].unique())
for offer in offers:
    cond_offer = testing.loc[:, 'Renew Offer Type'] == offer
    offer_data.append(testing.loc[cond_offer])

In [None]:
def pie_chart(data: pd.DataFrame, title: str, fontsz: str = 14, figsize:tuple =(8,8), boxtitle = None):
    """Simple pie chart
    
    Args:
        data: data information for graph
        title: Title of graph
        fontsz: fontsize in chart
        boxtitle: title for categories
        
    Returns:
        Show a graph (notebook)
    """
    if not boxtitle:
        boxtitle = data.index.name
    fig, ax = plt.subplots(figsize=figsize, subplot_kw=dict(aspect="equal"))

    def func(pct, allvals):
        absolute = int(round(pct/100.*np.sum(allvals)))
        return "{:.1f}%\n{:d}".format(pct, absolute)


    wedges, texts, autotexts = ax.pie(data.iloc[0:,0], autopct=lambda pct: func(pct, data),
                                      textprops=dict(color="w"))
    ax.legend(wedges, data.index,
              title=boxtitle,
              loc="center left",
              bbox_to_anchor=(1, 0, 0.5, 1))
    plt.setp(autotexts, size=fontsz)# , weight="bold")
    ax.set_title(title)
    plt.show()


def bar_by_values(data: pd.DataFrame, show_by: str, cols: list):
    """Show data grouping by offer.
    
    Args:
        data: pd.DataFrame
        show_by: name of col to split
        cols: cols to grouping
    """
    dataframes = []
    values = list(data.loc[:, show_by].unique())
    for value in values:
        cond_value = data.loc[:, show_by] == value
        dataframes.append(data.loc[cond_value])
    a = int(np.sqrt(len(values)))
    b = int(len(values) / a)
    a, b = max(a, b), min(a, b)
    fig, axes = plt.subplots(a,b , figsize=(12, 12))

    for dataframe, ax, value in zip(dataframes, fig.axes, values):
        data.groupby(cols).count()['Customer'].unstack().plot(kind='bar', ax=ax, ylabel='Customers');
        ax.set_title(f'{value}')
    fig.tight_layout()

### Response by gender and marital status.

In [None]:
bar_by_values(testing, 'Renew Offer Type', ['Response', 'Gender', 'Marital Status'])

### Response by Education level.

In [None]:
bar_by_values(testing, 'Renew Offer Type', ['Response', 'Education'])

### Response by Income level and coverage.

In [None]:
bar_by_values(testing, 'Renew Offer Type', ['Response', 'Gender', 'Income level'])

## Customers who have responded

In [None]:
print(f'Number of customers: {testing.Customer.nunique()}')

In [None]:
cond_affirmative_response = testing.loc[:, 'Response'] == 'Yes'
customer_with_response = testing.loc[cond_affirmative_response, 'Customer'].nunique()
print(f'Customer with response: {customer_with_response} ({customer_with_response / testing.Customer.nunique() * 100:.2f} %)')

### Customers engaged

In [None]:
fig, ax = plt.subplots(figsize=(12,6))
response_group_data = testing.groupby(['Response']).agg({'Customer': 'count'})
response_group_data.plot(kind='bar', ax=ax, title='User response', ylabel='Users');

In [None]:
response_group_data['Percentage [%]'] = response_group_data['Customer'] / response_group_data['Customer'].sum() * 100
response_group_data

In [None]:
pie_chart(data=testing.groupby('Renew Offer Type').agg({'Customer': 'count'}), title='Offers')

### Percentage engaged customers grouped by renewal offer type

In [None]:
totals = testing.groupby(['Renew Offer Type']).agg({'Customer': 'count'})['Customer']

In [None]:
totals = 2 * list(totals)

In [None]:
response_offer_group = testing.groupby(['Renew Offer Type', 'Response']).agg({'Customer': 'count'})
response_offer_group['Percentage [%]'] = response_offer_group['Customer'] / totals[:7] * 100
response_offer_group

In [None]:
engaged_users_by_offer = []
offers = list(testing.loc[:, 'Renew Offer Type'].unique())
for offer in offers:
    cond_offer = testing.loc[:, 'Renew Offer Type'] == offer
    offer_data.append(testing.loc[cond_offer & cond_affirmative_response])

In [None]:
engaged_users = testing.loc[cond_affirmative_response]

In [None]:
engaged_users.groupby('Renew Offer Type').count()['Customer'].plot.bar();

In [None]:
pie_chart(data=engaged_users.groupby('Renew Offer Type').agg({'Customer': 'count'}), title='Offers')

### Offers by Vehicle class

In [None]:
vehicule_size_offer_group = engaged_users.groupby(['Renew Offer Type', 'Vehicle Class']).agg({'Customer': 'count'})
vehicule_size_offer_group['Percentage [%]'] = vehicule_size_offer_group['Customer'] / vehicule_size_offer_group['Customer'].sum() * 100
vehicule_size_offer_group

### Offers by Vehicle size

In [None]:
vehicule_size_offer_group = engaged_users.groupby(['Renew Offer Type', 'Vehicle Size']).agg({'Customer': 'count'})
vehicule_size_offer_group['Percentage [%]'] = vehicule_size_offer_group['Customer'] / vehicule_size_offer_group['Customer'].sum() * 100
vehicule_size_offer_group

In [None]:
engaged_users.groupby(['Renew Offer Type', 'Vehicle Size']).count()['Customer'].unstack().plot(kind='bar', title='Vehicle Size per Offer', ylabel='Customers');

### Categorical data

In [None]:
categorical_cols = [
    'State', 'Coverage', 'Education', 'EmploymentStatus', 
    'Gender', 'Marital Status', 'Number of Policies', 'Policy Type',
    'Policy', 'Renew Offer Type', 'Sales Channel', 'Vehicle Class',
    'Vehicle Size',
]
fig, axes = plt.subplots(5, 3, figsize=(15, 18))

for ax, col in zip(fig.axes, categorical_cols):
    engaged_users.groupby(col)['Customer'].count().plot(
        kind='bar', ax=ax)
    ax.set_ylabel('Customers')

fig.tight_layout()

### Gender segmentation

In [None]:
bar_by_values(engaged_users, 'Renew Offer Type', ['Vehicle Size', 'Gender'])

### Income level of customers by Vehicle size and class

In [None]:
bar_by_values(engaged_users, 'Vehicle Size', ['Vehicle Class', 'Income level'])

### Pivot the data and extract and transform the inner-level groups to columns

In [None]:
inner_groups_cols = [
    'State', 'Coverage', 'Education', 'EmploymentStatus', 
    'Gender', 'Marital Status', 'Policy Type',
    'Policy', 'Renew Offer Type', 'Sales Channel',
    'Vehicle Class', 'Vehicle Size',
]
tables = []
names = []
for cols in list(itertools.combinations(inner_groups_cols, r=2)):
    table = testing.pivot_table(values='Customer', index=cols[1], columns=cols[0], aggfunc='count').fillna(0)
    tables.append(table)
    names.append(' vs '.join(cols))
    display(table)

#### Visualize this data in bar plot

In [None]:
fig, axes = plt.subplots(22, 3, figsize=(18, 120))

for ax, table, name in zip(fig.axes, tables, names):
    table.plot(kind='bar', title=name, ax=ax);

fig.tight_layout();

### Engagement rates differ by different sales channels.

In [None]:
testing.groupby(['Sales Channel']).count()['Vehicle Size'].plot.bar();

In [None]:
sales_chanel_vs_vehicle_size = tables[-2]
for col in tables[-2].columns.values:
    sales_chanel_vs_vehicle_size[col + ' [%]'] = sales_chanel_vs_vehicle_size[col].values / sales_chanel_vs_vehicle_size[col].values.sum() * 100
sales_chanel_vs_vehicle_size

In [None]:
fig, ax = plt.subplots(figsize=(12,6))
response_pivot_data = testing.pivot_table(values='Customer', index=['Vehicle Size', 'Response'], columns='Sales Channel', aggfunc='count').fillna(0)
response_pivot_data.plot(kind='bar', ax=ax, title='User response by Sales Channel and Vehicle Size', ylabel='Customers');

In [None]:
response_pivot_data

In [None]:
bar_by_values(testing, 'Sales Channel', ['Vehicle Size', 'Response'])

In [None]:
graph = sns.pairplot(testing[['Months Since Policy Inception', 'Customer Lifetime Value']], diag_kind="kde")
graph.map_lower(sns.kdeplot, levels=4, color=".2")

graph.fig.set_figwidth(9)
graph.fig.set_figheight(9)

In [None]:
plt.figure(figsize = (8, 6))
plt.scatter(engaged_users['Months Since Policy Inception'], engaged_users['Customer Lifetime Value'])
plt.ylabel('Customer Lifetime Value')
plt.xlabel('Months Since Policy Inception')
plt.show()

### Correlation matrix: numeric columns

In [None]:
graph = sns.pairplot(testing, diag_kind="kde")
graph.map_lower(sns.kdeplot, levels=4, color=".2")