# Separation by Warengruppe

This code splits the combined data (all our given data, kiwo, umsatzdaten_gekuerzt, wetter) into the 6 Warengruppen as follows: 

1 - Bread
2 - Roles
3 - Croissant
4 - Pastry
5 - Cakes
6 - Seasonal Products

In [None]:
import pandas as pd

# --- 1. Load the Data ---
file_path = '/workspaces/team3_goodweather/1_DatasetCharacteristics/raw_data/combined_data_outer.csv'

# Assuming standard CSV format. If you have issues, add delimiter=',' or similar.
df = pd.read_csv(file_path)

# --- 2. Handle Missing 'Warengruppe' Values ---
# Check how many rows are missing the category
missing_count = df['Warengruppe'].isna().sum()
print(f"Number of lines without a 'Warengruppe' entry: {missing_count}")

# Create a clean DataFrame removing the rows where Warengruppe is NaN
# We use .copy() to ensure we don't get warnings when modifying this new dataframe
df_clean = df.dropna(subset=['Warengruppe']).copy()

# Convert Warengruppe to integers (removes the decimal, e.g., 1.0 -> 1)
df_clean['Warengruppe'] = df_clean['Warengruppe'].astype(int)

# --- 3. Create Separate DataFrames ---
# Mapping:
# 1 - Bread
# 2 - Roles (Rolls)
# 3 - Croissant
# 4 - Pastry
# 5 - Cakes
# 6 - Seasonal Products

print("\nSeparating data into groups...")

df_bread = df_clean[df_clean['Warengruppe'] == 1]
df_roles = df_clean[df_clean['Warengruppe'] == 2]
df_croissant = df_clean[df_clean['Warengruppe'] == 3]
df_pastry = df_clean[df_clean['Warengruppe'] == 4]
df_cakes = df_clean[df_clean['Warengruppe'] == 5]
df_seasonal = df_clean[df_clean['Warengruppe'] == 6]

# --- 4. Validation (Optional) ---
# Just to check that it worked, we print the size of each new dataframe
print(f"Bread entries: {len(df_bread)}")
print(f"Roles entries: {len(df_roles)}")
print(f"Croissant entries: {len(df_croissant)}")
print(f"Pastry entries: {len(df_pastry)}")
print(f"Cakes entries: {len(df_cakes)}")
print(f"Seasonal entries: {len(df_seasonal)}")

# Now you can use df_bread, df_roles, etc. later in this script.

In [None]:
#=======Bread========
print(df_bread.head())  # Example: print first few rows of bread data

# Preparation of separated data
In the following cells the data can be analyzed and visualized separately

In [None]:
#========Roles=======
print(df_roles.head())  # Example: print first few rows of roles data


In [None]:
#========Croissant=======
print(df_croissant.head())  # Example: print first few rows of croissant data

In [None]:
#========Pastry=======
print(df_pastry.head())  # Example: print first few rows of pastry data

In [None]:
#========Cakes=======
print(df_cakes.head())  # Example: print first few rows of cakes data


In [None]:
#========Seasonal Products=======
print(df_seasonal.head())  # Example: print first few rows of seasonal products data