### Import Pandas


`Pandas` is a powerful data analysis and manipulation tool, offering easy-to-use data structures and analysis tools for Python

In [4]:
import pandas as pd

### Define the file path

In [5]:
file_path = r"C:\Users\claud\Desktop\data_preparation_using_python\exercise_02_from_star_schema_to_single_big_table\exercise_02.xlsx"

### Load the Excel File

In [6]:
# Load the Excel file
try:
    with pd.ExcelFile(file_path) as excel_file:

        # Get the sheet names
        sheet_names = excel_file.sheet_names
        print("Sheet Names in the Excel File:" , sheet_names)

        # Create a dictionary to store the DataFrames
        dataframes = {}

        # Iterate over the sheet names and convert each one into a DataFrame
        for sheet in sheet_names:
            dataframes[sheet] = excel_file.parse(sheet)

except FileNotFoundError:
    print (f"Error: The file at path {file_path} was not found")
except ImportError as e:
    print(f"Error {e}")
    print("Please install the missing dependency by running: pip install openpyxl or Pandas")
except Exception as e:
    print(f"Error reading the Excel File {e}")    

Sheet Names in the Excel File: ['dim_customer', 'dim_employee', 'dim_employee_sales_territory', 'dim_geography', 'dim_reseller', 'dim_product', 'dim_sales_territory', 'fact_reseller_sales']


### Merge

In [7]:
dataframes['fact_reseller_sales'].head(1)

Unnamed: 0,id_order,Order date,Due date,Ship date,id_product,id_reseller,id_employee,id_sales_territory,Order Quantity
0,SO43897,2017-08-25,2017-09-04,2017-09-01,235,312,282,4,2


In [8]:
fact_single_table = dataframes['fact_reseller_sales'].merge(dataframes['dim_product'], how='left', on='id_product') \
                                                     .merge(dataframes['dim_reseller'], how='left', on='id_reseller') \
                                                     .merge(dataframes['dim_employee'], how='left', on='id_employee') \
                                                     .merge(dataframes['dim_sales_territory'], how='left', on='id_sales_territory') \
                                                     .merge(dataframes['dim_geography'], how='left', on='id_geography')

In [9]:
fact_single_table.columns

Index(['id_order', 'Order date', 'Due date', 'Ship date', 'id_product',
       'id_reseller', 'id_employee', 'id_sales_territory', 'Order Quantity',
       'product', 'standard cost', 'list price', 'id_geography',
       'Business Type', 'reseller', 'Employee', 'Region_x', 'Country', 'Group',
       'city', 'state', 'Region_y', 'id_salesterritory'],
      dtype='object')

In [10]:
# Dataframes
clear_dataframes = [fact_single_table]

# Name of Dataframes
sheet_names = [ 'ST_fact_reseller_sales']

# export as a xlsx file
with pd.ExcelWriter('output_Exercise_02.xlsx') as writer:
    for df, sheet in zip(clear_dataframes, sheet_names):
        df.to_excel(writer, sheet_name=sheet, index=False)

print("Dataframes successfully exported to 'output_Exercise_02'")

Dataframes successfully exported to 'output_Exercise_02'
