In [10]:
import pandas as pd
import os
import shutil

In [11]:
!python ../generators/attendance_dataset_3m.py
!python ../generators/sales_dataset_3m.py
!python ../generators/financial_dataset_3m.py

✅ Attendance dataset generated: attendance_dataset_3m.csv
✅ Sales dataset generated: sales_dataset_3m.csv
✅ File generated: financial_dataset_3m.csv


In [13]:
# Ensure data folder exists
os.makedirs("../data", exist_ok=True)

# List of files to move
files_to_move = [
    "attendance_dataset_3m.csv",
    "sales_dataset_3m.csv",
    "financial_dataset_3m.csv"
]

for f in files_to_move:
    src = f"../notebooks/{f}"   # correct location
    dst = f"../data/{f}"

    if os.path.exists(src):
        shutil.move(src, dst)
        print(f"Moved {f} to data folder")
    else:
        print(f"{f} not found, skipping")


Moved attendance_dataset_3m.csv to data folder
Moved sales_dataset_3m.csv to data folder
Moved financial_dataset_3m.csv to data folder


In [14]:
sales = pd.read_csv("../data/sales_dataset_3m.csv")
financial = pd.read_csv("../data/financial_dataset_3m.csv")
attendance = pd.read_csv("../data/attendance_dataset_3m.csv")

## Data Profiling

In [15]:
print("Data types:\n")

print("Sales:")
display(sales.dtypes)

print("Financial:")
display(financial.dtypes)

print("Attendance:")
display(attendance.dtypes)


Data types:

Sales:


SaleID            str
Region            str
Country           str
Product           str
Date              str
Currency          str
Quantity        int64
UnitPrice     float64
TotalSales    float64
dtype: object

Financial:


TransactionID        str
Region               str
Country              str
Product              str
Date                 str
Currency             str
Revenue          float64
Expense          float64
Profit           float64
dtype: object

Attendance:


StaffID         str
Name            str
Region          str
Country         str
Department      str
Date            str
Status          str
CheckInTime     str
CheckOutTime    str
dtype: object

In [16]:
print("Null values:\n")

print("Sales:")
display(sales.isnull().sum())

print("Financial:")
display(financial.isnull().sum())

print("Attendance:")
display(attendance.isnull().sum())


Null values:

Sales:


SaleID        0
Region        0
Country       0
Product       0
Date          0
Currency      0
Quantity      0
UnitPrice     0
TotalSales    0
dtype: int64

Financial:


TransactionID    0
Region           0
Country          0
Product          0
Date             0
Currency         0
Revenue          0
Expense          0
Profit           0
dtype: int64

Attendance:


StaffID         0
Name            0
Region          0
Country         0
Department      0
Date            0
Status          0
CheckInTime     0
CheckOutTime    0
dtype: int64

In [7]:
print("Unique currency values:\n")

print("Sales currencies:", sales["Currency"].unique())
print("Financial currencies:", financial["Currency"].unique())


Unique currency values:

Sales currencies: <StringArray>
['CAD', 'SGD', 'CLP', 'GBP', 'NGN', 'EUR', 'ZAR', 'ARS', 'BRL', 'USD', 'AUD',
 'CNY', 'NZD', 'JPY', 'EGP', 'MXN', 'INR']
Length: 17, dtype: str
Financial currencies: <StringArray>
['INR', 'BRL', 'AUD', 'EUR', 'CLP', 'CAD', 'GBP', 'MXN', 'CNY', 'JPY', 'USD',
 'ARS', 'EGP', 'NGN', 'NZD', 'SGD', 'ZAR']
Length: 17, dtype: str
