
#  FMCG Forecasting: Data Load & EDA
This notebook loads the raw sales data (2022–2024) and performs exploratory data analysis (EDA) to:
- Check schema, missing values, data types
- Analyze numerical relationships and correlations


In [0]:
import sys
from utils.load_data import load_data

sys.path.append("/Workspace/Users/faron.beata@gmail.com/fmcg_forecasting/fmcg_forecasting_repo")

import importlib
#importlib.reload(utils.eda_utils)

from utils.eda_utils import get_missing_summary, describe_columns, get_unique_counts, value_counts
from utils.eda_utils import groupby_summary, plot_grouped, correlation_matrix, run_eda


## Load dataset

In [0]:

# Load data
df = load_data("dbfs:/FileStore/fmcg/parquet/FMCG_2022_2024.parquet", file_format="parquet")
df.display()


## Data Overview

In [0]:
describe = describe_columns(df)
missing = get_missing_summary(df)
unique = get_unique_counts(df)

# Display results
display(describe)
display(missing)
print(unique)


In [0]:

vc = value_counts(df, "segment", normalize=True)
display(vc)

plot_grouped(df, "segment", "units_sold", agg_func="sum")

corr = correlation_matrix(df)

if corr is not None:
    display(corr)


In [0]:
run_eda(df)
