In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Set pandas display options
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', '{:,.2f}'.format)
pd.set_option('display.max_rows', 50)

print("Libraries imported successfully!")

Libraries imported successfully!


# Data Loading and initial inspection

In [None]:
data_path = 'PURCHASE ORDER DATA EXTRACT 2012-2015_0.csv'

In [8]:
# Load a sample first to understand memory requirements
print("Loading sample of the data to understand structure...")
sample_df = pd.read_csv(data_path, nrows=1000)
print(f"Sample loaded: {sample_df.shape[0]} rows, {sample_df.shape[1]} columns")
print(f"Memory usage: {sample_df.memory_usage(deep=True).sum() / 1024 / 1024:.2f} MB")

Loading sample of the data to understand structure...
Sample loaded: 1000 rows, 31 columns
Memory usage: 1.50 MB


In [9]:
# Display basic information about the dataset
print("\n=== DATASET OVERVIEW ===")
print(f"Total records in file: 919,734")
print(f"File size: 156 MB")
print(f"Columns: {len(sample_df.columns)}")
print("\nColumn names:")
for i, col in enumerate(sample_df.columns, 1):
    print(f"{i:2d}. {col}")

print("\n=== DATA TYPES ===")
print(sample_df.dtypes)


=== DATASET OVERVIEW ===
Total records in file: 919,734
File size: 156 MB
Columns: 31

Column names:
 1. Creation Date
 2. Purchase Date
 3. Fiscal Year
 4. LPA Number
 5. Purchase Order Number
 6. Requisition Number
 7. Acquisition Type
 8. Sub-Acquisition Type
 9. Acquisition Method
10. Sub-Acquisition Method
11. Department Name
12. Supplier Code
13. Supplier Name
14. Supplier Qualifications
15. Supplier Zip Code
16. CalCard
17. Item Name
18. Item Description
19. Quantity
20. Unit Price
21. Total Price
22. Classification Codes
23. Normalized UNSPSC
24. Commodity Title
25. Class
26. Class Title
27. Family
28. Family Title
29. Segment
30. Segment Title
31. Location

=== DATA TYPES ===
Creation Date               object
Purchase Date               object
Fiscal Year                 object
LPA Number                  object
Purchase Order Number       object
Requisition Number          object
Acquisition Type            object
Sub-Acquisition Type        object
Acquisition Method       

In [11]:
# =============================================================================
# 2. LOAD REPRESENTATIVE SAMPLE FOR ANALYSIS
# =============================================================================

print("\nLoading representative sample for analysis...")
print("Note: Due to dataset size (156MB), we'll use a stratified sample for analysis")

# First, let's get total row count and sample proportionally
total_rows = 919734  # From our earlier check
sample_size = 50000  # Reasonable sample size for analysis
skip_rows = sorted(np.random.choice(range(1, total_rows), total_rows - sample_size - 1, replace=False))

# Define data types for memory optimization
# IMPORTANT: Remove 'Total Price' and 'Unit Price' from dtype_dict since they contain strings with $ and commas
dtype_dict = {
    'Fiscal Year': 'category',
    'Acquisition Type': 'category',
    'Sub-Acquisition Type': 'category',
    'Acquisition Method': 'category',
    'Sub-Acquisition Method': 'category',
    'Department Name': 'category',
    'Supplier Code': 'string',
    'Supplier Name': 'string',
    'Supplier Qualifications': 'category',
    'Supplier Zip Code': 'string',
    'CalCard': 'category',
    'Item Name': 'string',
    'Item Description': 'string',
    'Quantity': 'float64',
    # 'Unit Price': 'float64',        # Remove this - will handle after loading
    # 'Total Price': 'float64',       # Remove this - will handle after loading
    'Classification Codes': 'string',
    'Normalized UNSPSC': 'string',
    'Commodity Title': 'string',
    'Class': 'string',
    'Class Title': 'string',
    'Family': 'string',
    'Family Title': 'string',
    'Segment': 'string',
    'Segment Title': 'string',
    'Location': 'string'
}

# Parse dates
date_columns = ['Creation Date', 'Purchase Date']

# Load sample dataset
df = pd.read_csv(data_path, dtype=dtype_dict, parse_dates=date_columns, 
                 date_format='%m/%d/%Y', skiprows=skip_rows, low_memory=False)

# Clean monetary columns (remove $ and commas) - DO THIS AFTER LOADING
df['Total Price'] = df['Total Price'].astype(str).str.replace('$', '').str.replace(',', '').astype(float)
df['Unit Price'] = df['Unit Price'].astype(str).str.replace('$', '').str.replace(',', '').astype(float)

print(f"Sample dataset loaded: {df.shape[0]:,} rows, {df.shape[1]} columns")
print(f"Memory usage: {df.memory_usage(deep=True).sum() / 1024 / 1024:.2f} MB")
print(f"Sample represents {df.shape[0]/total_rows*100:.1f}% of total dataset")

# Adjust totals for sample analysis
sample_fraction = df.shape[0] / total_rows
print(f"\nNote: All monetary totals will be scaled by factor of {1/sample_fraction:.2f} for full dataset estimates")


Loading representative sample for analysis...
Note: Due to dataset size (156MB), we'll use a stratified sample for analysis
Sample dataset loaded: 18,746 rows, 31 columns
Memory usage: 24.03 MB
Sample represents 2.0% of total dataset

Note: All monetary totals will be scaled by factor of 49.06 for full dataset estimates


In [12]:
# =============================================================================
# 3. DATA QUALITY ASSESSMENT
# =============================================================================

print("\n" + "="*50)
print("DATA QUALITY ASSESSMENT")
print("="*50)

# Check for missing values
print("=== MISSING VALUES ANALYSIS ===")
missing_data = df.isnull().sum()
missing_percent = (missing_data / len(df)) * 100

missing_summary = pd.DataFrame({
    'Missing Count': missing_data,
    'Missing Percentage': missing_percent
}).sort_values('Missing Count', ascending=False)

print(missing_summary[missing_summary['Missing Count'] > 0])

# Check for duplicates
print("\n=== DUPLICATE ANALYSIS ===")
duplicates = df.duplicated().sum()
print(f"Number of duplicate rows: {duplicates:,}")
print(f"Percentage of duplicates: {duplicates/len(df)*100:.2f}%")

# Check for duplicate purchase orders (might be valid if different line items)
po_duplicates = df.duplicated(subset=['Purchase Order Number']).sum()
print(f"\nDuplicate Purchase Order Numbers: {po_duplicates:,}")
print(f"Unique Purchase Order Numbers: {df['Purchase Order Number'].nunique():,}")




DATA QUALITY ASSESSMENT
=== MISSING VALUES ANALYSIS ===
                         Missing Count  Missing Percentage
Requisition Number               18001               96.03
Sub-Acquisition Method           17072               91.07
Sub-Acquisition Type             15027               80.16
LPA Number                       13837               73.81
Supplier Qualifications          10950               58.41
Location                          3765               20.08
Supplier Zip Code                 3765               20.08
Purchase Date                      889                4.74
Commodity Title                    174                0.93
Class                              174                0.93
Class Title                        174                0.93
Family                             174                0.93
Family Title                       174                0.93
Segment                            174                0.93
Segment Title                      174                0.93

In [13]:
# =============================================================================
# 4. TEMPORAL ANALYSIS
# =============================================================================

print("\n" + "="*50)
print("TEMPORAL ANALYSIS")
print("="*50)

# Extract date components
df['Creation Year'] = df['Creation Date'].dt.year
df['Creation Month'] = df['Creation Date'].dt.month
df['Creation Quarter'] = df['Creation Date'].dt.quarter

# Records by fiscal year
fy_counts = df['Fiscal Year'].value_counts().sort_index()
print("\nRecords by Fiscal Year:")
for fy, count in fy_counts.items():
    print(f"{fy}: {count:,} records")

# Total spending by fiscal year
fy_spending = df.groupby('Fiscal Year')['Total Price'].sum().sort_index()
print("\nTotal spending by Fiscal Year:")
for fy, amount in fy_spending.items():
    print(f"{fy}: ${amount:,.2f}")

# Average order value by fiscal year
fy_avg_order = df.groupby('Fiscal Year')['Total Price'].mean().sort_index()
print("\nAverage order value by Fiscal Year:")
for fy, amount in fy_avg_order.items():
    print(f"{fy}: ${amount:,.2f}")



TEMPORAL ANALYSIS

Records by Fiscal Year:
2012-2013: 5,889 records
2013-2014: 6,529 records
2014-2015: 6,328 records

Total spending by Fiscal Year:
2012-2013: $1,410,293,738.22
2013-2014: $3,233,133,484.29
2014-2015: $2,398,318,385.31

Average order value by Fiscal Year:
2012-2013: $239,479.32
2013-2014: $495,195.82
2014-2015: $379,001.01


In [14]:
# =============================================================================
# 5. ACQUISITION TYPE ANALYSIS
# =============================================================================

print("\n" + "="*50)
print("ACQUISITION TYPE ANALYSIS")
print("="*50)

# Acquisition type distribution
acq_type_counts = df['Acquisition Type'].value_counts()
acq_type_percent = (acq_type_counts / len(df)) * 100

print("\nAcquisition Type Distribution:")
for acq_type, count in acq_type_counts.items():
    print(f"{acq_type}: {count:,} ({acq_type_percent[acq_type]:.1f}%)")

# Acquisition method distribution
acq_method_counts = df['Acquisition Method'].value_counts().head(10)
print("\nTop 10 Acquisition Methods:")
for method, count in acq_method_counts.items():
    print(f"{method}: {count:,}")

# Spending by acquisition type
print("\n=== SPENDING BY ACQUISITION TYPE ===")
acq_spending = df.groupby('Acquisition Type')['Total Price'].agg(['sum', 'mean', 'count'])
acq_spending = acq_spending.sort_values('sum', ascending=False)

print("\nSpending by Acquisition Type:")
for idx, row in acq_spending.iterrows():
    print(f"{idx}:")
    print(f"  Total: ${row['sum']:,.2f}")
    print(f"  Average: ${row['mean']:,.2f}")
    print(f"  Orders: {row['count']:,}")
    print()




ACQUISITION TYPE ANALYSIS

Acquisition Type Distribution:
NON-IT Goods: 11,663 (62.2%)
NON-IT Services: 3,722 (19.9%)
IT Goods: 2,722 (14.5%)
IT Services: 632 (3.4%)
IT Telecommunications: 7 (0.0%)

Top 10 Acquisition Methods:
Informal Competitive: 4,416
Statewide Contract: 3,411
SB/DVBE Option: 2,178
Services are specifically exempt by statute: 1,882
State Programs: 1,503
Fair and Reasonable: 1,396
WSCA/Coop: 1,006
Formal Competitive: 978
Services are specifically exempt by policy: 578
Emergency Purchase: 538

=== SPENDING BY ACQUISITION TYPE ===

Spending by Acquisition Type:
NON-IT Services:
  Total: $6,545,953,344.51
  Average: $1,758,719.33
  Orders: 3,722.0

NON-IT Goods:
  Total: $195,467,215.28
  Average: $16,759.60
  Orders: 11,663.0

IT Services:
  Total: $187,711,339.00
  Average: $297,011.61
  Orders: 632.0

IT Goods:
  Total: $112,432,656.27
  Average: $41,305.16
  Orders: 2,722.0

IT Telecommunications:
  Total: $181,052.76
  Average: $25,864.68
  Orders: 7.0



In [15]:
# =============================================================================
# 6. DEPARTMENT ANALYSIS
# =============================================================================

print("\n" + "="*50)
print("DEPARTMENT ANALYSIS")
print("="*50)

# Top spending departments
dept_spending = df.groupby('Department Name')['Total Price'].agg(['sum', 'count']).sort_values('sum', ascending=False)

print("\nTop 10 Departments by Total Spending:")
for i, (dept, row) in enumerate(dept_spending.head(10).iterrows(), 1):
    print(f"{i}. {dept}")
    print(f"   Total Spent: ${row['sum']:,.2f}")
    print(f"   Number of Orders: {row['count']:,}")
    print(f"   Average Order: ${row['sum']/row['count']:,.2f}")
    print()

print(f"Total unique departments: {df['Department Name'].nunique():,}")



DEPARTMENT ANALYSIS

Top 10 Departments by Total Spending:
1. Health Care Services, Department of
   Total Spent: $5,291,693,609.74
   Number of Orders: 143.0
   Average Order: $37,004,850.42

2. Transportation, Department of
   Total Spent: $196,113,578.56
   Number of Orders: 974.0
   Average Order: $201,348.64

3. Public Health, Department of
   Total Spent: $191,089,063.76
   Number of Orders: 237.0
   Average Order: $806,282.97

4. Employment Development Department
   Total Spent: $166,836,138.33
   Number of Orders: 180.0
   Average Order: $926,867.44

5. Corrections and Rehabilitation, Department of
   Total Spent: $128,824,871.55
   Number of Orders: 3,163.0
   Average Order: $40,728.70

6. Water Resources, Department of
   Total Spent: $114,643,939.34
   Number of Orders: 1,500.0
   Average Order: $76,429.29

7. Social Services, Department of
   Total Spent: $108,431,693.57
   Number of Orders: 147.0
   Average Order: $737,630.57

8. State Hospitals, Department of
   Total Sp

In [16]:
# =============================================================================
# 7. SUPPLIER ANALYSIS
# =============================================================================

print("\n" + "="*50)
print("SUPPLIER ANALYSIS")
print("="*50)

# Supplier statistics
supplier_stats = df.groupby('Supplier Name')['Total Price'].agg(['sum', 'count']).sort_values('sum', ascending=False)

print(f"Total unique suppliers: {df['Supplier Name'].nunique():,}")
print(f"Suppliers with multiple orders: {(supplier_stats['count'] > 1).sum():,}")

print("\nTop 10 Suppliers by Total Value:")
for i, (supplier, row) in enumerate(supplier_stats.head(10).iterrows(), 1):
    print(f"{i}. {supplier}")
    print(f"   Total Value: ${row['sum']:,.2f}")
    print(f"   Number of Orders: {row['count']:,}")
    print()

# Supplier qualifications analysis
qualifications = df['Supplier Qualifications'].value_counts().head(10)
print("\nTop Supplier Qualifications:")
for qual, count in qualifications.items():
    if pd.notna(qual):
        print(f"{qual}: {count:,}")



SUPPLIER ANALYSIS
Total unique suppliers: 4,840
Suppliers with multiple orders: 1,603

Top 10 Suppliers by Total Value:
1. ACS State Healthcare, LLC
   Total Value: $1,684,172,034.00
   Number of Orders: 1.0

2. San Mateo Health Commission dba: Health Plan of San Mateo
   Total Value: $1,085,954,000.00
   Number of Orders: 1.0

3. Health Net Community Solutions, Inc.
   Total Value: $950,003,000.00
   Number of Orders: 6.0

4. Blue Cross of California Partnership Plan, Inc.
   Total Value: $677,942,000.01
   Number of Orders: 2.0

5. County of Contra Costa
   Total Value: $226,487,487.05
   Number of Orders: 3.0

6. County of Los Angeles
   Total Value: $105,319,818.00
   Number of Orders: 2.0

7. County of Stanislaus
   Total Value: $99,015,018.00
   Number of Orders: 2.0

8. San Diego Workforce Partnership
   Total Value: $86,582,991.67
   Number of Orders: 4.0

9. Department of Technology Services
   Total Value: $80,117,802.00
   Number of Orders: 6.0

10. Alameda Alliance for Hea

In [17]:
# =============================================================================
# 8. ITEM AND UNSPSC ANALYSIS
# =============================================================================

print("\n" + "="*50)
print("ITEM CLASSIFICATION ANALYSIS")
print("="*50)

# UNSPSC Segment analysis
segment_analysis = df.groupby(['Segment', 'Segment Title'])['Total Price'].agg(['sum', 'count']).sort_values('sum', ascending=False)

print("\nTop UNSPSC Segments by Spending:")
for i, ((segment, title), row) in enumerate(segment_analysis.head(10).iterrows(), 1):
    print(f"{i}. {segment} - {title}")
    print(f"   Total Spent: ${row['sum']:,.2f}")
    print(f"   Orders: {row['count']:,}")
    print()

# Most common items
item_analysis = df.groupby('Item Name')['Total Price'].agg(['sum', 'count', 'mean']).sort_values('count', ascending=False)

print("\nMost Frequently Ordered Items:")
for i, (item, row) in enumerate(item_analysis.head(10).iterrows(), 1):
    print(f"{i}. {item}")
    print(f"   Orders: {row['count']:,}")
    print(f"   Total Value: ${row['sum']:,.2f}")
    print(f"   Average Value: ${row['mean']:,.2f}")
    print()



ITEM CLASSIFICATION ANALYSIS

Top UNSPSC Segments by Spending:
1. 85000000 - Healthcare Services
   Total Spent: $5,598,875,936.43
   Orders: 437.0

2. 81000000 - Engineering and Research and Technology Based Services
   Total Spent: $180,467,702.98
   Orders: 941.0

3. 43000000 - Information Technology Broadcasting and Telecommunications
   Total Spent: $152,452,533.55
   Orders: 1,778.0

4. 86000000 - Education and Training Services
   Total Spent: $146,952,736.64
   Orders: 549.0

5. 93000000 - Politics and Civic Affairs Services
   Total Spent: $102,579,058.59
   Orders: 309.0

6. 80000000 - Management and Business Professionals and Administrative Services
   Total Spent: $95,923,927.30
   Orders: 505.0

7. 25000000 - Commercial and Military and Private Vehicles and their Accessories and Components
   Total Spent: $95,208,663.49
   Orders: 610.0

8. 92000000 - National Defense and Public Order and Security and Safety Services
   Total Spent: $77,102,720.72
   Orders: 122.0

9. 700

In [18]:
# =============================================================================
# 9. FINANCIAL ANALYSIS
# =============================================================================

print("\n" + "="*50)
print("FINANCIAL ANALYSIS")
print("="*50)

# Overall statistics (scaled for full dataset)
sample_spending = df['Total Price'].sum()
total_spending = sample_spending / sample_fraction  # Estimate for full dataset
total_orders = int(len(df) / sample_fraction)  # Estimate for full dataset
avg_order_value = df['Total Price'].mean()
median_order_value = df['Total Price'].median()

print(f"\nOverall Statistics (Estimated for Full Dataset):")
print(f"Total Spending: ${total_spending:,.2f} (sample: ${sample_spending:,.2f})")
print(f"Total Orders: {total_orders:,} (sample: {len(df):,})")
print(f"Average Order Value: ${avg_order_value:,.2f}")
print(f"Median Order Value: ${median_order_value:,.2f}")

# Price distribution analysis
print("\nOrder Value Distribution:")
quantiles = df['Total Price'].quantile([0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99])
for q, value in quantiles.items():
    print(f"{q*100:.0f}th percentile: ${value:,.2f}")

# High-value orders analysis
high_value_orders = df[df['Total Price'] > df['Total Price'].quantile(0.95)]
high_value_sample_spending = high_value_orders['Total Price'].sum()
high_value_total_spending = high_value_sample_spending / sample_fraction

print(f"\nHigh-value orders (top 5%): {int(len(high_value_orders)/sample_fraction):,} (sample: {len(high_value_orders):,})")
print(f"High-value spending: ${high_value_total_spending:,.2f} (sample: ${high_value_sample_spending:,.2f})")
print(f"Percentage of total spending: {high_value_sample_spending/sample_spending*100:.1f}%")



FINANCIAL ANALYSIS

Overall Statistics (Estimated for Full Dataset):
Total Spending: $345,488,789,867.85 (sample: $7,041,745,607.82)
Total Orders: 919,734 (sample: 18,746)
Average Order Value: $375,639.90
Median Order Value: $3,680.50

Order Value Distribution:
10th percentile: $45.08
25th percentile: $303.60
50th percentile: $3,680.50
75th percentile: $14,877.30
90th percentile: $65,000.00
95th percentile: $197,157.00
99th percentile: $2,000,000.00

High-value orders (top 5%): 46,021 (sample: 938)
High-value spending: $334,299,146,349.47 (sample: $6,813,678,517.34)
Percentage of total spending: 96.8%


In [19]:
# =============================================================================
# 10. CALCARD USAGE ANALYSIS
# =============================================================================

print("\n" + "="*50)
print("CALCARD USAGE ANALYSIS")
print("="*50)

calcard_usage = df['CalCard'].value_counts()
calcard_percent = (calcard_usage / len(df)) * 100

print("\nCalCard Usage:")
for usage, count in calcard_usage.items():
    print(f"{usage}: {count:,} ({calcard_percent[usage]:.1f}%)")

# CalCard vs regular purchases spending comparison
calcard_spending = df.groupby('CalCard')['Total Price'].agg(['sum', 'mean', 'count'])
print("\nSpending Comparison:")
for calcard, row in calcard_spending.iterrows():
    print(f"\n{calcard} purchases:")
    print(f"  Total: ${row['sum']:,.2f}")
    print(f"  Average: ${row['mean']:,.2f}")
    print(f"  Count: {row['count']:,}")



CALCARD USAGE ANALYSIS

CalCard Usage:
NO: 18,484 (98.6%)
YES: 262 (1.4%)

Spending Comparison:

NO purchases:
  Total: $7,040,331,435.23
  Average: $380,887.87
  Count: 18,484.0

YES purchases:
  Total: $1,414,172.59
  Average: $5,397.61
  Count: 262.0


In [21]:
# =============================================================================
# 11. KEY INSIGHTS AND RECOMMENDATIONS
# =============================================================================

print("\n" + "="*50)
print("KEY INSIGHTS FOR AI ASSISTANT DEVELOPMENT")
print("="*50)

# Get fiscal year range properly (categorical needs special handling)
fiscal_years = sorted(df['Fiscal Year'].dropna().unique())
min_fy = fiscal_years[0] if fiscal_years else "Unknown"
max_fy = fiscal_years[-1] if fiscal_years else "Unknown"

insights = [
    f"Dataset contains {len(df):,} procurement records spanning fiscal years {min_fy} to {max_fy}",
    f"Total procurement spending: ${total_spending:,.2f} across {df['Department Name'].nunique()} departments",
    f"Data covers {df['Supplier Name'].nunique():,} unique suppliers with diverse qualifications (SB, SBE, DVBE, etc.)",
    f"Four main acquisition types: Non-IT Goods/Services, IT Goods/Services with different procurement methods",
    f"UNSPSC classification system provides hierarchical categorization (Segment → Family → Class → Commodity)",
    f"CalCard usage represents {calcard_percent.get('YES', 0):.1f}% of transactions",
    f"High-value orders (top 5%) account for {high_value_sample_spending/sample_spending*100:.1f}% of total spending",
    f"Temporal patterns show fiscal year cycles and potential seasonal procurement patterns",
    f"Missing data is minimal, with Location field having the highest missing rate"
]

for i, insight in enumerate(insights, 1):
    print(f"{i}. {insight}")

print("\n=== RECOMMENDATIONS FOR AI ASSISTANT ===")
recommendations = [
    "Implement fuzzy matching for department and supplier name queries due to potential variations",
    "Support both fiscal year and calendar year temporal queries",
    "Handle UNSPSC hierarchy queries (e.g., 'show all IT-related purchases' using segment/family filters)",
    "Support comparative analysis (e.g., 'compare spending between departments')",
    "Implement supplier qualification filtering for diversity/supplier type analysis",
    "Handle date range queries with proper fiscal year calculations",
    "Support aggregation queries (totals, averages, counts) across multiple dimensions",
    "Implement threshold-based queries (e.g., 'orders over $X', 'top N suppliers')"
]

for i, rec in enumerate(recommendations, 1):
    print(f"{i}. {rec}")

print("\n" + "="*50)
print("EDA Complete! Dataset ready for AI assistant development.")
print("="*50)


KEY INSIGHTS FOR AI ASSISTANT DEVELOPMENT
1. Dataset contains 18,746 procurement records spanning fiscal years 2012-2013 to 2014-2015
2. Total procurement spending: $345,488,789,867.85 across 101 departments
3. Data covers 4,840 unique suppliers with diverse qualifications (SB, SBE, DVBE, etc.)
4. Four main acquisition types: Non-IT Goods/Services, IT Goods/Services with different procurement methods
5. UNSPSC classification system provides hierarchical categorization (Segment → Family → Class → Commodity)
6. CalCard usage represents 1.4% of transactions
7. High-value orders (top 5%) account for 96.8% of total spending
8. Temporal patterns show fiscal year cycles and potential seasonal procurement patterns
9. Missing data is minimal, with Location field having the highest missing rate

=== RECOMMENDATIONS FOR AI ASSISTANT ===
1. Implement fuzzy matching for department and supplier name queries due to potential variations
2. Support both fiscal year and calendar year temporal queries
3