In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
# Step 1: Read all the Excel files into pandas dataframes
file_dates = ['19.04.2021', '20.04.2021', '21.04.2021', '22.04.2021', '23.04.2021', '24.04.2021', '25.04.2021']
dfs = []

for date in file_dates:
    file_path = f'VP_{date}.xlsx'
    df = pd.read_excel(file_path)
    dfs.append(df)

In [3]:
# Step 2: Concatenate the dataframes into a single dataframe
combined_df = pd.concat(dfs)

In [4]:
# Step 3: Group the data by "Shop ID" and "product number"
grouped_df = combined_df.groupby(["Shop ID", "Artikel Shop-Nummer"])

In [5]:
# Step 4: Calculate the statistics "mean, max, media, min, standard deviation"
summary_stats = grouped_df["Verkaufspreis"].agg(['mean', 'median', 'min', 'max', 'std'])

In [6]:
# Display the summary statistics
print(summary_stats)

                               mean  median     min     max  std
Shop ID Artikel Shop-Nummer                                     
1       33                    22.70   22.70   22.70   22.70  0.0
        119                   17.90   17.90   17.90   17.90  0.0
        120                   17.90   17.90   17.90   17.90  0.0
        121                   17.90   17.90   17.90   17.90  0.0
        122                   19.90   19.90   19.90   19.90  0.0
...                             ...     ...     ...     ...  ...
3       2746277               39.95   39.95   39.95   39.95  NaN
        2746278               34.95   34.95   34.95   34.95  NaN
        2746279              119.90  119.90  119.90  119.90  NaN
        2746580               34.95   34.95   34.95   34.95  NaN
        2747223              201.95  201.95  201.95  201.95  NaN

[429729 rows x 5 columns]


In [7]:
summary_stats.to_csv("summary_stats_Q1.csv")

In [8]:
# Step 5: Calculate the percentage of products with price changes in each webshop

# 5.1 Count the total number of products in each webshop
df = pd.read_csv("summary_stats_Q1.csv")

product_count_per_webshop = df.groupby("Shop ID")["Artikel Shop-Nummer"].count()

# 5.2 Count the number of products with price change in each webshop
price_change_product_count_per_webshop = df[df["std"] != 0].groupby("Shop ID")["Artikel Shop-Nummer"].count()

# 5.3 Calculate the percentage of products with price changes in each webshop
total_products_per_webshop = df.groupby("Shop ID")["Artikel Shop-Nummer"].count()
percentage_product_price_change_per_webshop = (price_change_product_count_per_webshop / total_products_per_webshop) * 100

# 5.4 Combine the results into a new DataFrame
result_df = pd.DataFrame({
    "Number of Products": product_count_per_webshop,
    "Number of Products with Price change": price_change_product_count_per_webshop,
    "Percentage of Products with Price change": percentage_product_price_change_per_webshop
})

# 5.5 Display the combined DataFrame
print(result_df)

         Number of Products  Number of Products with Price change  \
Shop ID                                                             
1                    305933                                 70365   
3                    123796                                 20194   

         Percentage of Products with Price change  
Shop ID                                            
1                                       23.000134  
3                                       16.312320  
