# Werbemaßnahmen Analyse
### Bibliotheken

In [1]:
import pandas as pd
import dash
from dash import dcc, html, Input, Output
import plotly.express as px



### Daten Laden

In [2]:
df = pd.read_csv("../../data/dunnhumby_BatF_transactions.csv")

In [13]:
df.head()

Unnamed: 0,WEEK_END_DATE,STORE_NUM,UPC,UNITS,VISITS,HHS,SPEND,PRICE,BASE_PRICE,FEATURE,DISPLAY,TPR_ONLY,advertising_numeric,advertising_category,Year
0,2009-01-14,367,1111009477,13,13,13,18.07,1.39,1.57,0,0,1,1,TPR_Only,2009
3,2009-01-14,367,1111035398,4,3,3,14.0,3.5,4.49,0,0,1,1,TPR_Only,2009
9,2009-01-14,367,1111087395,50,40,40,168.0,3.36,3.94,0,1,0,10,Display,2009
10,2009-01-14,367,1111087396,61,43,42,203.13,3.33,4.0,0,1,0,10,Display,2009
11,2009-01-14,367,1111087398,72,52,49,238.32,3.31,4.0,0,1,0,10,Display,2009


### Das Datum formatieren

In [4]:
df['WEEK_END_DATE'] = pd.to_datetime(df['WEEK_END_DATE'], format="%d-%b-%y")

In [20]:
df['WEEK_END_DATE'].iloc[0].year

2009

### Gesamte Spalte für Werbemaßnahmen

In [5]:
# Concat advertising columns in numeric representation
ad_cols = ["FEATURE", "DISPLAY", "TPR_ONLY"]
df['advertising_numeric'] = df[ad_cols].apply(lambda x: ''.join(x.values.astype(str)), axis=1)

In [6]:
# Map numeepresentation to a categorical value
df['advertising_category'] = df['advertising_numeric'].map({'000': 'None', '001': 'TPR_Only', '010': 'Display', '100': 'Flyer', '110': 'Flyer + Display'})

In [77]:
df_selected_store = df[(df['STORE_NUM'] == 367) & (df['Year'] == 2010)]
df_selected_store['Quantile'] = pd.qcut(df_selected_store['SPEND'], 4, labels=['Q1', 'Q2', 'Q3', 'Q4'])



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [73]:
ad_measure_counts = df_selected_store.groupby(['Quantile', 'advertising_category'])['UPC'].count().reset_index(name='Anzahl Produkte')
ad_measure_counts['Prozent'] = ad_measure_counts.groupby('Quantile')['UPC'].apply(lambda x: x / float(x.sum())) * 100







KeyError: 'Column not found: UPC'

In [76]:
df_selected_store.groupby(['Quantile', 'advertising_category'])['UPC'].count().reset_index(name='Anzahl Produkte')





Unnamed: 0,Quantile,advertising_category,Anzahl Produkte
0,Q1,Display,0
1,Q1,Flyer,13
2,Q1,Flyer + Display,0
3,Q1,TPR_Only,129
4,Q2,Display,9
5,Q2,Flyer,18
6,Q2,Flyer + Display,9
7,Q2,TPR_Only,106
8,Q3,Display,21
9,Q3,Flyer,20


In [78]:
import pandas as pd
import plotly.express as px
import numpy as np
from dash import Dash, dcc, html, Input, Output, State

app = Dash(__name__)

# Load your data
# df = pd.read_csv('your_data.csv') # Replace with your actual data file
# Assume WEEK_END_DATE is already in datetime format

# Filter out the 'None' advertising_category entries
df = df[df['advertising_category'] != 'None']

# Create a year column for filtering in the UI
df['Year'] = df['WEEK_END_DATE'].dt.year

# Calculate the total revenue and units sold per store
store_metrics = df.groupby('STORE_NUM').agg({'SPEND': 'sum', 'UNITS': 'sum'}).reset_index()

# Define the app layout
app.layout = html.Div([
    html.H1('Auswirkungen verschiedener Marketingmaßnahmen auf den Umsatz'),
    html.H3('Analyse der Märkte'),
    dcc.Graph(id='store-scatter-plot'),
    html.Div(id='store-info', children=[
        html.H2('Store Info wird geladen...'),
        html.H4('Die 10 umsatzstärksten Produkte werden geladen...')
    ]),
    dcc.Dropdown(id='product-dropdown'),
    # dcc.DatePickerRange(
    #     id='date-picker-range',
    #     start_date=df['WEEK_END_DATE'].min(),
    #     end_date=df['WEEK_END_DATE'].max(),
    #     display_format='YYYY-MM-DD',
    #     style={'margin-top': 20},
    # ),
    dcc.Graph(id='revenue-scatter-plot'),
    html.Label('Jahr auswählen:'),
    dcc.RadioItems(
        id='year-selector',
        options=[{'label': str(year), 'value': year} for year in df['Year'].unique()],
        value=df['Year'].min(),  # Default to the earliest year
        style={'margin-top': 20},
    ),
    dcc.Graph(id='advertising-measure-distribution')
])

# Callback for the store scatter plot
@app.callback(
    Output('store-scatter-plot', 'figure'),
    Input('product-dropdown', 'value')
)
def update_store_figure(selected_product):
    # Create the scatter plot for stores
    fig = px.scatter(
        store_metrics,
        x='UNITS',
        y='SPEND',
        hover_name='STORE_NUM',
        title='Absatz und Umsatz der Märkte'
    )
    fig.update_traces(mode='markers', marker=dict(size=12, line=dict(width=2, color='DarkSlateGrey')),
                      selector=dict(mode='markers'))
    return fig

# Callback for the dropdown options and store information
@app.callback(
    [Output('product-dropdown', 'options'),
     Output('product-dropdown', 'value'),
     Output('store-info', 'children')],
    Input('store-scatter-plot', 'clickData')
)
def update_dropdown_and_store_info(clickData):
    selected_store = clickData['points'][0]['hovertext'] if clickData else store_metrics['STORE_NUM'].iloc[0]
    df_selected_store = df[df['STORE_NUM'] == selected_store]
    total_revenue_per_product = df_selected_store.groupby('UPC')['SPEND'].sum().reset_index()
    top_products = total_revenue_per_product.nlargest(10, 'SPEND')['UPC']
    dropdown_options = [{'label': str(product), 'value': product} for product in top_products]
    dropdown_value = top_products.iloc[0]
    store_total_revenue = df_selected_store['SPEND'].sum()
    store_info = [
        html.H2(f'Store {selected_store} mit einem Gesamtumsatz von {round(store_total_revenue, 2)} Euro'),
        html.H4(f'Die 10 umsatzstärksten Produkte von Store {selected_store}')
    ]
    return dropdown_options, dropdown_value, store_info

# Callback for the product scatter plot
@app.callback(
    Output('revenue-scatter-plot', 'figure'),
    [Input('product-dropdown', 'value'),
     Input('store-scatter-plot', 'clickData'),
    #  Input('date-picker-range', 'start_date'),
    #  Input('date-picker-range', 'end_date')
])
def update_product_figure(selected_product, clickData): # , start_date, end_date):
    selected_store = clickData['points'][0]['hovertext'] if clickData else store_metrics['STORE_NUM'].iloc[0]
    # df_selected_time = df[(df['WEEK_END_DATE'] >= pd.to_datetime(start_date)) & 
    #                       (df['WEEK_END_DATE'] <= pd.to_datetime(end_date))]
    df_selected_store = df[df['STORE_NUM'] == selected_store]
    df_selected_product = df_selected_store[df_selected_store['UPC'] == selected_product]
    fig = px.bar(
        df_selected_product,
        x='WEEK_END_DATE',
        y='SPEND',
        color='advertising_category',
        color_discrete_sequence=px.colors.qualitative.Vivid,
        title=f'Produkt : {selected_product} aus dem Markt {selected_store}'
    )
    fig.update_traces(mode='markers', marker=dict(size=10, line=dict(width=2, color='DarkSlateGrey')),
                      selector=dict(mode='markers'))
    return fig

# Callback for the advertising measure distribution bar chart
@app.callback(
    Output('advertising-measure-distribution', 'figure'),
    [Input('store-scatter-plot', 'clickData'),
     Input('year-selector', 'value')
])
def update_advertising_measure_distribution(clickData, selected_year):
    selected_store = clickData['points'][0]['hovertext'] if clickData else store_metrics['STORE_NUM'].iloc[0]
    df_selected_store = df[(df['STORE_NUM'] == selected_store) & (df['Year'] == selected_year)]
    df_selected_store['Quantile'] = pd.qcut(df_selected_store['SPEND'], 4, labels=['Q1', 'Q2', 'Q3', 'Q4'])
    ad_measure_counts = df_selected_store.groupby(['Quantile', 'advertising_category'])['UPC'].count().reset_index(name='Anzahl Produkte')
    # ad_measure_counts['Prozent'] = ad_measure_counts.groupby('Quantile')['UPC'].apply(lambda x: x / float(x.sum())) * 100
    fig = px.bar(
        ad_measure_counts,
        x='Quantile',
        y='Anzahl Produkte',
        color='advertising_category',
        color_discrete_sequence=px.colors.qualitative.Vivid,
        barmode='group',
        title=f'Verteilung der Werbemaßnahmen nach dem Umsatz in Quantilen für das Jahr {selected_year}. Markt : {selected_store}'
    )
    return fig

Verbesserungen: Produkt Vergleich in Jahre aufteilen und übereinander lagern

Weitere Idee Darstellung der Verteilung der Werbemaßnahmen von Produktquantilen je Store
- Auswahl nach Jahren mit Checkbox oder Dropdown
- Ich würde beim zweiten Plot eher Bars verwenden anstatt des Scatters, so lässt sich das optisch besser zeitlich zuordnen
- Evlt. Checkboxen für Werbemaßnahmen -> [x]None, [x]Flyer + Display, []Display, []TPR_only, [x]Flyer
- Als weiteren Plot vllt. dann für ein Produkt schauen in welchem Zeitraum/Monat viel verkauft wurde oder gekauft wird
    - Man könnte den aktuellen Scatterplot in die Jahre 2009, 2010 und 2011(2012) einteilen und dann mit deinen vorgeschlagenen Checkboxen oder Dropdown das jeweilige Jahr auswählen, wodurch sich
        die Jahre überlagern und gut vergleichen lassen

In [79]:
app.run(jupyter_mode="tab", debug=True, host='127.0.0.1', port='8081')

Dash app running on http://127.0.0.1:8081/


<IPython.core.display.Javascript object>



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#re