# Werbemaßnahmen Analyse
### Bibliotheken

In [207]:
import pandas as pd
import plotly.express as px
from dash import Dash, dcc, html, Input, Output

### Daten Laden

In [208]:
df = pd.read_csv("../../data/dunnhumby_BatF_transactions.csv")

In [209]:
df.head()

Unnamed: 0,WEEK_END_DATE,STORE_NUM,UPC,UNITS,VISITS,HHS,SPEND,PRICE,BASE_PRICE,FEATURE,DISPLAY,TPR_ONLY
0,14-Jan-09,367,1111009477,13,13,13,18.07,1.39,1.57,0,0,1
1,14-Jan-09,367,1111009497,20,18,18,27.8,1.39,1.39,0,0,0
2,14-Jan-09,367,1111009507,14,14,14,19.32,1.38,1.38,0,0,0
3,14-Jan-09,367,1111035398,4,3,3,14.0,3.5,4.49,0,0,1
4,14-Jan-09,367,1111038078,3,3,3,7.5,2.5,2.5,0,0,0


### Daten formatiere
Datum formatieren

In [210]:
df['WEEK_END_DATE'] = pd.to_datetime(df['WEEK_END_DATE'], format="%d-%b-%y")

Gesamte Spalte für Werbemaßnahmen

In [211]:
# Concat advertising columns in numeric representation
ad_cols = ["FEATURE", "DISPLAY", "TPR_ONLY"]
df['advertising_numeric'] = df[ad_cols].apply(lambda x: ''.join(x.values.astype(str)), axis=1)

In [212]:
# Map numeepresentation to a categorical value
df['advertising_category'] = df['advertising_numeric'].map({'000': 'None', '001': 'TPR_Only', '010': 'Display', '100': 'Flyer', '110': 'Flyer + Display'})

In [213]:
# Währungsumrechnung in Euro deutsches Format
def custom_format(number):
    if '.' in str(number):
        parts = str(number).split('.')
        main_part = "{:,}".format(int(parts[0])).replace(",", ".")
        decimal_part = parts[1]
        return main_part + "," + decimal_part
    else:
        return "{:,}".format(number).replace(",", ".")

### Dashboard

In [214]:
import dash_bootstrap_components as dbc

app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])


# Create a year column for filtering in the UI
df['Year'] = df['WEEK_END_DATE'].dt.year

# Filter out the 'None' advertising_category entries
df_advert_category = df[df['advertising_category'] != 'None']

# Define a color map for the advertising categories
color_map = {
    'None' : '#b2b2b2',
    'TPR_Only': '#e28743',
    'Flyer': '#1e81b0',
    'Display': '#8338ec',
    'Flyer + Display' : '#FF006E',
 }


# Calculate the total revenue and units sold per store
store_metrics = df.groupby('STORE_NUM').agg({'SPEND': 'sum', 'UNITS': 'sum'}).reset_index()

app.layout = dbc.Container([
    dbc.Row(dbc.Col(html.H1('Auswirkungen verschiedener Marketingmaßnahmen auf den Umsatz'), width=12)),
    dbc.Row(dbc.Col(html.H3('Analyse der Märkte'), width=12)),
    dbc.Row([
        dbc.Col(dcc.Graph(id='store-scatter-plot'), width=12)
    ]),
    dbc.Row(dbc.Col(html.Div(id='store-info'), width=12)),
    dbc.Row([
        dbc.Col(dcc.Dropdown(id='product-dropdown'), width=6),
        dbc.Col(dcc.DatePickerRange(id='date-picker-range',
                                    start_date=df['WEEK_END_DATE'].min(),
                                    end_date=df['WEEK_END_DATE'].max(),
                                    display_format='YYYY-MM-DD'), width=6)
    ]),
    dbc.Row(dbc.Col(dcc.Graph(id='revenue-scatter-plot'), width=12)),
    dbc.Row(dbc.Col(html.H3('Analyse der Werbemaßnahmen'), width=12)),
    dbc.Row([
        dbc.Col(html.Label('Jahr auswählen:'), width=3),
        dbc.Col(dcc.RadioItems(id='year-selector',
                               options=[{'label': str(year), 'value': year} for year in df_advert_category['Year'].unique()],
                               value=df_advert_category['Year'].min()), width=9)
    ]),
    dbc.Row(dbc.Col(dcc.Graph(id='advertising-measure-distribution'), width=12))
], fluid=True)


# Callback for the store scatter plot
@app.callback(
    Output('store-scatter-plot', 'figure'),
    Input('product-dropdown', 'value')
)
def update_store_figure(selected_product):
    # Create the scatter plot for stores
    fig = px.scatter(
        store_metrics,
        x='UNITS',
        y='SPEND',
        hover_name='STORE_NUM',
        title='Absatz und Umsatz der Märkte'
    )
    fig.update_traces(mode='markers', marker=dict(size=12, line=dict(width=2, color='DarkSlateGrey')),
                      selector=dict(mode='markers'))
    return fig

# Callback for the dropdown options and store information
@app.callback(
    [Output('product-dropdown', 'options'),
     Output('product-dropdown', 'value'),
     Output('store-info', 'children')],
    Input('store-scatter-plot', 'clickData')
)
def update_dropdown_and_store_info(clickData):
    selected_store = clickData['points'][0]['hovertext'] if clickData else store_metrics['STORE_NUM'].iloc[0]
    df_selected_store = df[df['STORE_NUM'] == selected_store].copy()
    total_revenue_per_product = df_selected_store.groupby('UPC')['SPEND'].sum().reset_index()
    top_products = total_revenue_per_product.nlargest(10, 'SPEND')['UPC']
    dropdown_options = [{'label': str(product), 'value': product} for product in top_products]
    dropdown_value = top_products.iloc[0]
    store_total_revenue = df_selected_store['SPEND'].sum()
    store_info = [
        html.H3(f'Markt {selected_store} mit einem Gesamtumsatz von {round(store_total_revenue, 2)} €'),
        html.H4(f'Die 10 umsatzstärksten Produkte von Store {selected_store}')
    ]
    return dropdown_options, dropdown_value, store_info

# Callback for the product bar plot
@app.callback(
    Output('revenue-scatter-plot', 'figure'),
    [Input('product-dropdown', 'value'),
     Input('store-scatter-plot', 'clickData'),
     Input('date-picker-range', 'start_date'),
     Input('date-picker-range', 'end_date')]
)
def update_product_figure(selected_product, clickData, start_date, end_date):
    selected_store = clickData['points'][0]['hovertext'] if clickData else store_metrics['STORE_NUM'].iloc[0]
    df_selected_time = df[(df['WEEK_END_DATE'] >= pd.to_datetime(start_date)) & 
                          (df['WEEK_END_DATE'] <= pd.to_datetime(end_date))].copy()
    df_selected_store = df_selected_time[df_selected_time['STORE_NUM'] == selected_store]
    df_selected_product = df_selected_store[df_selected_store['UPC'] == selected_product]
    fig = px.bar(
        df_selected_product,
        x='WEEK_END_DATE',
        y='SPEND',
        color='advertising_category',
        title=f'Produkt : {selected_product} aus dem Markt {selected_store}',
        color_discrete_map=color_map
    )
    fig.update_traces(mode='markers', marker=dict(size=10, line=dict(width=2, color='DarkSlateGrey')),
                      selector=dict(mode='markers'))
    return fig
# Callback for the advertising measure distribution bar chart
@app.callback(
    Output('advertising-measure-distribution', 'figure'),
    [Input('store-scatter-plot', 'clickData'),
     Input('year-selector', 'value')]
)
def update_advertising_measure_distribution(clickData, selected_year):
    selected_store = clickData['points'][0]['hovertext'] if clickData else store_metrics['STORE_NUM'].iloc[0]
    
    # Work with a copy to avoid SettingWithCopyWarning
    df_selected_store = df_advert_category[(df_advert_category['STORE_NUM'] == selected_store) & 
                                           (df_advert_category['Year'] == selected_year)].copy()
    
    # Create quantiles
    df_selected_store['Umsatz'] = pd.qcut(df_selected_store['SPEND'], 4, 
                                          labels=['Quantil 1', 'Quantil 2', 'Quantil 3', 'Quantil 4'])
    
    # Calculate count and percentage
    ad_measure_counts = df_selected_store.groupby(['Umsatz', 'advertising_category'], group_keys=True)['UPC'].count().reset_index()
    ad_measure_counts['Anteil_am_Umsatz'] = ad_measure_counts.groupby('Umsatz')['UPC'].apply(
        lambda x: x / float(x.sum()) * 100
    ).reset_index(drop=True)

    # Create bar chart
    fig = px.bar(
        ad_measure_counts,
        x='Umsatz',
        y='Anteil_am_Umsatz',
        color='advertising_category',
        barmode='group',
        color_discrete_map=color_map,
        title=f'Verteilung der Werbemaßnahmen pro Umsatz Quantil der Produkte aus dem Markt {selected_store}.'
    )
    return fig


Verbesserungen: Produkt Vergleich in Jahre aufteilen und übereinander lagern

Weitere Idee Darstellung der Verteilung der Werbemaßnahmen von Produktquantilen je Store
- Auswahl nach Jahren mit Checkbox oder Dropdown
- Ich würde beim zweiten Plot eher Bars verwenden anstatt des Scatters, so lässt sich das optisch besser zeitlich zuordnen
- Evlt. Checkboxen für Werbemaßnahmen -> [x]None, [x]Flyer + Display, []Display, []TPR_only, [x]Flyer
- Als weiteren Plot vllt. dann für ein Produkt schauen in welchem Zeitraum/Monat viel verkauft wurde oder gekauft wird
    - Man könnte den aktuellen Scatterplot in die Jahre 2009, 2010 und 2011(2012) einteilen und dann mit deinen vorgeschlagenen Checkboxen oder Dropdown das jeweilige Jahr auswählen, wodurch sich
        die Jahre überlagern und gut vergleichen lassen

In [215]:
app.run(jupyter_mode="tab", debug=True, host='127.0.0.1', port='8081')

Dash app running on http://127.0.0.1:8081/


<IPython.core.display.Javascript object>

In [216]:
# print(pd.__version__)


Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)


Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)

