<a href="https://colab.research.google.com/github/FatmaelzahraaKhamiss/DEPI_FinalProject_SupplyChainAnalysis/blob/main/Final_Project_DataPartitioning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Final Project - Data Generate Dimension Tables**

### Introduction
Our data consisted of a single CSV file containing one sheet with 16 columns, so it needed dimension tables to display the relationships between the data and to create a model. Therefore, we generated these dimension tables.

## Data Gathering

In [None]:
# Necessary imports
import pandas as pd
import numpy as np

In [None]:
# Reading .csv file & Show it
rs = pd.read_csv('Retail Store Inventory and Demand Forecasting.csv')

print(rs.head())

         Date Store ID Product ID     Category Region  Inventory Level  \
0  2022-01-01     S001      P0001  Electronics  North              195   
1  2022-01-01     S001      P0002     Clothing  North              117   
2  2022-01-01     S001      P0003     Clothing  North              247   
3  2022-01-01     S001      P0004  Electronics  North              139   
4  2022-01-01     S001      P0005    Groceries  North              152   

   Units Sold  Units Ordered  Price  Discount Weather Condition  Promotion  \
0         102            252  72.72         5             Snowy          0   
1         117            249  80.16        15             Snowy          1   
2         114            612  62.94        10             Snowy          1   
3          45            102  87.63        10             Snowy          0   
4          65            271  54.41         0             Snowy          0   

   Competitor Pricing Seasonality  Epidemic  Demand  
0               85.73      Winte

In [None]:
# Clean column names by replacing spaces with underscores for easier access
rs.columns = rs.columns.str.replace(' ', '_')

## Generate Dimension Tables

### Dim_Store Dimension

In [None]:
dim_store = rs[['Store_ID', 'Region']].drop_duplicates().reset_index(drop=True)
dim_store['Store_Key'] = dim_store.index + 1  # Surrogate Key starting from 1
dim_store = dim_store[['Store_Key', 'Store_ID', 'Region']]

### Dim_Product Dimension


In [None]:
dim_product = rs[['Product_ID', 'Category']].drop_duplicates().reset_index(drop=True)
dim_product['Product_Key'] = dim_product.index + 1  # Surrogate Key starting from 1
dim_product = dim_product[['Product_Key', 'Product_ID', 'Category']]

### Dim_Date Dimension

In [None]:
dim_date = rs[['Date', 'Seasonality']].drop_duplicates().reset_index(drop=True)
# Generate an integer key (YYYYMMDD)
dim_date['Date_Key'] = pd.to_datetime(dim_date['Date']).dt.strftime('%Y%m%d').astype(int)
dim_date = dim_date[['Date_Key', 'Date', 'Seasonality']]

### Dim_Weather Dimension


In [None]:
dim_weather = rs[['Weather_Condition']].drop_duplicates().reset_index(drop=True)
dim_weather['Weather_Key'] = dim_weather.index + 1  # Surrogate Key starting from 1
dim_weather = dim_weather[['Weather_Key', 'Weather_Condition']]

### Dim_Promotion_Event Dimension

In [None]:
dim_promo_event = rs[['Discount', 'Promotion', 'Epidemic']].drop_duplicates().reset_index(drop=True)
dim_promo_event['Promo_Event_Key'] = dim_promo_event.index + 1  # Surrogate Key starting from 1
dim_promo_event = dim_promo_event[['Promo_Event_Key', 'Discount', 'Promotion', 'Epidemic']]

## Generate the Fact Table (Fact_Demand)


In [None]:
# Start the Fact table with a copy of the original rs DataFrame
fact_demand = rs.copy()

In [None]:
# Merge Store Key
fact_demand = fact_demand.merge(dim_store[['Store_ID', 'Store_Key']], on='Store_ID', how='left')

In [None]:
# Merge Product Key
fact_demand = fact_demand.merge(dim_product[['Product_ID', 'Product_Key']], on='Product_ID', how='left')

In [None]:
# Merge Date Key
# Ensure date columns are strings for consistent merging
fact_demand['Date'] = fact_demand['Date'].astype(str)
dim_date['Date'] = dim_date['Date'].astype(str)
fact_demand = fact_demand.merge(dim_date[['Date', 'Date_Key']], on='Date', how='left')

In [None]:
# Merge Weather Key
fact_demand = fact_demand.merge(dim_weather, on='Weather_Condition', how='left')

In [None]:
# Merge Promo Event Key
fact_demand = fact_demand.merge(
    dim_promo_event[['Discount', 'Promotion', 'Epidemic', 'Promo_Event_Key']],
    on=['Discount', 'Promotion', 'Epidemic'],
    how='left'
)

In [None]:
# Select final columns for the Fact table (Foreign Keys + Measures)
fact_demand = fact_demand[[
    'Store_Key',
    'Product_Key',
    'Date_Key',
    'Weather_Key',
    'Promo_Event_Key',
    'Inventory_Level',
    'Units_Sold',
    'Units_Ordered',
    'Price',
    'Competitor_Pricing',
    'Demand'
]]

## Exporting Tables to CSV files

In [None]:
dim_store.to_csv("Dim_Store.csv", index=False, encoding='utf-8')
dim_product.to_csv("Dim_Product.csv", index=False, encoding='utf-8')
dim_date.to_csv("Dim_Date.csv", index=False, encoding='utf-8')
dim_weather.to_csv("Dim_Weather.csv", index=False, encoding='utf-8')
dim_promo_event.to_csv("Dim_Promotion_Event.csv", index=False, encoding='utf-8')
fact_demand.to_csv("Fact_Demand.csv", index=False, encoding='utf-8')

## Downloading files

In [None]:
# Import the files module from google.colab for download functionality
from google.colab import files

In [None]:
print("\nInitiating download of 6 CSV files...")

files.download('Dim_Store.csv')
files.download('Dim_Product.csv')
files.download('Dim_Date.csv')
files.download('Dim_Weather.csv')
files.download('Dim_Promotion_Event.csv')
files.download('Fact_Demand.csv')

print("✅ Processing complete. Check your browser for the download prompts.")


Initiating download of 6 CSV files...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ Processing complete. Check your browser for the download prompts.
