# Werbemaßnahmen Analyse
### Bibliotheken

In [1]:
import pandas as pd
import dash
from dash import dcc, html, Input, Output
import plotly.express as px



### Daten Laden

In [67]:
df = pd.read_csv("../../data/dunnhumby_BatF_transactions.csv")

In [75]:
df.head()

Unnamed: 0,WEEK_END_DATE,STORE_NUM,UPC,UNITS,VISITS,HHS,SPEND,PRICE,BASE_PRICE,FEATURE,DISPLAY,TPR_ONLY,advertising_numeric,advertising_category
0,2009-01-14,367,1111009477,13,13,13,18.07,1.39,1.57,0,0,1,1,TPR_Only
1,2009-01-14,367,1111009497,20,18,18,27.8,1.39,1.39,0,0,0,0,
2,2009-01-14,367,1111009507,14,14,14,19.32,1.38,1.38,0,0,0,0,
3,2009-01-14,367,1111035398,4,3,3,14.0,3.5,4.49,0,0,1,1,TPR_Only
4,2009-01-14,367,1111038078,3,3,3,7.5,2.5,2.5,0,0,0,0,


Das Datum formatieren

In [68]:
df['WEEK_END_DATE'] = pd.to_datetime(df['WEEK_END_DATE'], format="%d-%b-%y")

### Gesamte Spalte für Werbemaßnahmen

In [69]:
# Concat advertising columns in numeric representation
ad_cols = ["FEATURE", "DISPLAY", "TPR_ONLY"]
df['advertising_numeric'] = df[ad_cols].apply(lambda x: ''.join(x.values.astype(str)), axis=1)

In [70]:
# Map numeepresentation to a categorical value
df['advertising_category'] = df['advertising_numeric'].map({'000': 'None', '001': 'TPR_Only', '010': 'Display', '100': 'Flyer', '110': 'Flyer + Display'})

In [83]:
import pandas as pd
import plotly.express as px
from dash import Dash, dcc, html, Input, Output, State

app = Dash(__name__)

# Calculate the total revenue and units sold per store
store_metrics = df.groupby('STORE_NUM').agg({'SPEND': 'sum', 'UNITS': 'sum'}).reset_index()

# Define the app layout
app.layout = html.Div([
    html.H1('Werbemaßnahmen Analyse'),
    dcc.Graph(id='store-scatter-plot'),
    html.Div(id='store-info', children=[
        html.H2('Store Info wird geladen...'),
        html.H4('Die 10 umsatzstärksten Produkte werden geladen...')
    ]),
    dcc.Dropdown(
        id='product-dropdown'
    ),
    dcc.DatePickerRange(
        id='date-picker-range',
        start_date=df['WEEK_END_DATE'].min().date(),
        end_date=df['WEEK_END_DATE'].max().date(),
        display_format='YYYY-MM-DD',
        style={'margin-top': 20},
    ),
    dcc.Graph(id='revenue-scatter-plot'),
])

# Define the callback to update the store scatter plot
@app.callback(
    Output('store-scatter-plot', 'figure'),
    Input('product-dropdown', 'value')
)
def update_store_figure(selected_product):
    # Create the scatter plot for stores
    fig = px.scatter(
        store_metrics,
        x='SPEND',
        y='UNITS',
        hover_name='STORE_NUM',  # Assuming STORE_NUM is unique and can be used as hover info
        title='Absatz und Umsatz der Märkte'
    )
    fig.update_traces(mode='markers', marker=dict(size=12, line=dict(width=2, color='DarkSlateGrey')),
                      selector=dict(mode='markers'))
    return fig

# Define the callback to update the dropdown options and store information based on the selected store
@app.callback(
    [Output('product-dropdown', 'options'),
     Output('product-dropdown', 'value'),
     Output('store-info', 'children')],
    Input('store-scatter-plot', 'clickData')
)
def update_dropdown_and_store_info(clickData):
    if clickData is not None:
        selected_store = clickData['points'][0]['hovertext']
    else:
        # If no store is clicked, default to the store with the highest revenue
        selected_store = store_metrics.loc[store_metrics['SPEND'].idxmax(), 'STORE_NUM']
    
    # Filter the dataset for the selected store
    df_selected_store = df[df['STORE_NUM'] == selected_store]
    
    # Calculate the total revenue per product within the selected store
    total_revenue_per_product = df_selected_store.groupby('UPC')['SPEND'].sum().reset_index()
    
    # Get the top 10 products with the highest revenue
    top_products = total_revenue_per_product.nlargest(10, 'SPEND')['UPC']
    
    # Update dropdown options
    dropdown_options = [{'label': str(product), 'value': product} for product in top_products]
    dropdown_value = top_products.iloc[0]
    
    # Update store information
    store_total_revenue = df_selected_store['SPEND'].sum()
    store_info = [
        html.H2(f'Store {selected_store} mit einem Gesamtumsatz von {round(store_total_revenue, 2)} Euro'),
        html.H4(f'Die 10 umsatzstärksten Produkte von Store {selected_store}')
    ]
    
    return dropdown_options, dropdown_value, store_info

# Define the callback to update the product scatter plot based on the selected store and product
@app.callback(
    Output('revenue-scatter-plot', 'figure'),
    [Input('product-dropdown', 'value'),
     Input('store-scatter-plot', 'clickData'),
     Input('date-picker-range', 'start_date'),
     Input('date-picker-range', 'end_date')
    ]
)
def update_product_figure(selected_product, clickData, start_date, end_date):
    if clickData is not None:
        selected_store = clickData['points'][0]['hovertext']
    else:
        selected_store = store_metrics.loc[store_metrics['SPEND'].idxmax(), 'STORE_NUM']

    # Filter the dataset for the selected store and product
    df_selected_time = df[(df['WEEK_END_DATE'] >= pd.to_datetime(start_date, format="%Y-%m-%d")) & (df['WEEK_END_DATE'] <= pd.to_datetime(end_date, format="%Y-%m-%d"))]
    df_selected_store = df_selected_time[(df_selected_time['STORE_NUM'] == selected_store)]
    df_selected_product = df_selected_store[df_selected_store['UPC'] == selected_product]

    # Create the scatter plot
    fig = px.scatter(
        df_selected_product,
        x='WEEK_END_DATE',
        y='SPEND',
        color='advertising_category',  # Assumes this column contains the advertising measures
        color_discrete_sequence=px.colors.qualitative.Bold,  # Uses a predefined color sequence
        title=f'Produkt :{selected_product} aus dem Markt {selected_store}'
    )
    fig.update_traces(mode='markers', marker=dict(size=10, line=dict(width=2, color='DarkSlateGrey')),
                      selector=dict(mode='markers'))
    return fig


Weitere Idee Darstellung der Verteilung der Werbemaßnahmen von Produktquantilen je Store
- Auswahl nach Jahren mit Checkbox oder Dropdown
- Ich würde beim zweiten Plot eher Bars verwenden anstatt des Scatters, so lässt sich das optisch besser zeitlich zuordnen
- Evlt. Checkboxen für Werbemaßnahmen -> [x]None, [x]Flyer + Display, []Display, []TPR_only, [x]Flyer
- Als weiteren Plot vllt. dann für ein Produkt schauen in welchem Zeitraum/Monat viel verkauft wurde oder gekauft wird
    - Man könnte den aktuellen Scatterplot in die Jahre 2009, 2010 und 2011(2012) einteilen und dann mit deinen vorgeschlagenen Checkboxen oder Dropdown das jeweilige Jahr auswählen, wodurch sich
        die Jahre überlagern und gut vergleichen lassen

In [None]:
app.run(jupyter_mode="tab", debug=True, host='127.0.0.1', port='8081')

Dash app running on http://127.0.0.1:8081/


<IPython.core.display.Javascript object>

(524950, 14)



The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



(524950, 14)



The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



(9422, 14)



The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



(164274, 14)



The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



(524950, 14)



The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



(524950, 14)



The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



(524950, 14)



The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



(524950, 14)



The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



(524950, 14)



The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result

