In [1]:
from dotenv import load_dotenv
from agents import Agent, Runner, ModelSettings, trace, function_tool

import pandas as pd
import PyPDF2
import asyncio

from IPython.display import Markdown, display
import re

import openpyxl
import json

load_dotenv(override=True)


True

In [2]:
INSTRUCTIONS = """
You are a data analyst reviewing an Excel file against PDF specifications. Provide a comprehensive analysis with precise issue locations and actionable recommendations.

## Analysis Scope

1. **Structure Compliance**: Verify sheets, columns, headers, data types, and field requirements match PDF specs
2. **Format Validation**: Check dates, numbers, text, codes, and boolean fields for correct formatting
3. **Data Consistency**: Validate temporal logic, numerical relationships, business rules, and data integrity
4. **Quality Checks**: Identify missing data, duplicates, outliers, and formatting issues

## For Each Issue Report

- **Location**: Sheet name, cell reference (e.g., "C15"), column name, row number
- **Type**: Format violation | Consistency error | Missing data | Invalid value | Structural problem
- **Severity**: Critical (blocks processing) | Major (significant deviation) | Minor (formatting)
- **Details**: Current value vs. expected value with clear explanation
- **Fix**: Specific Excel-based solution (no external code)

## Output Structure

EXECUTIVE SUMMARY
- Total issues by severity
- Key problem areas

DETAILED FINDINGS
Sheet: [Name]
Issue #1: [Type] - [Severity]
• Location: Cell B7, Column "Date"
• Found: "2024/13/45"
• Expected: Valid date (DD/MM/YYYY)
• Description: Invalid date format
• Fix: Correct date entry


## Key Instructions

- Compare against PDF examples/templates
- Prioritize data integrity issues
- Note where file exceeds requirements
- Flag unclear PDF sections
- Skip 'data dictionary' sheet
- Focus on Excel-native solutions only
- Make sure that any recommendations you make, can be done in Excel. Do not suggest the use of Python or any other code language to fix the identified issues.
- Ignore 'data dictionary' sheet. No analysis should be done on this sheet!
"""

In [3]:
workbook = openpyxl.load_workbook('data.xlsx')
workbook.active

<Worksheet "Customer Data">

In [4]:
def excel_to_structured_format(filename, sheet_name=None):
    workbook = openpyxl.load_workbook(filename, data_only=False)
    worksheet = workbook.active if sheet_name is None else workbook[sheet_name]
    
    structured_data = {
        "sheet_name": worksheet.title,
        "cells": {},
        "dimensions": {
            "max_row": worksheet.max_row,
            "max_column": worksheet.max_column
        }
    }
    
    for row in worksheet.iter_rows():
        for cell in row:
            # Fix 1: Check for cell content properly
            has_formula = cell.data_type == 'f'
            has_value = cell.value is not None
            
            if has_value or has_formula:
                cell_info = {
                    "coordinate": cell.coordinate,
                    "value": cell.value,
                    "data_type": cell.data_type,
                    "row": cell.row,
                    "column": cell.column
                }
                
                # Fix 2: Check for formula properly
                if has_formula:
                    cell_info["formula"] = cell.value  # The formula IS the value when data_type is 'f'
                
                # Include formatting if needed
                if cell.number_format != 'General':
                    cell_info["number_format"] = cell.number_format
                
                structured_data["cells"][cell.coordinate] = cell_info
    
    return structured_data

# Usage
excel_data = excel_to_structured_format('data.xlsx')
llm_prompt = f"Here is Excel data in structured format:\n{json.dumps(excel_data, indent=2, default=str)}"
llm_prompt

'Here is Excel data in structured format:\n{\n  "sheet_name": "Customer Data",\n  "cells": {\n    "A1": {\n      "coordinate": "A1",\n      "value": "CustomerID",\n      "data_type": "s",\n      "row": 1,\n      "column": 1\n    },\n    "B1": {\n      "coordinate": "B1",\n      "value": "FirstName",\n      "data_type": "s",\n      "row": 1,\n      "column": 2\n    },\n    "C1": {\n      "coordinate": "C1",\n      "value": "LastName",\n      "data_type": "s",\n      "row": 1,\n      "column": 3\n    },\n    "D1": {\n      "coordinate": "D1",\n      "value": "Email",\n      "data_type": "s",\n      "row": 1,\n      "column": 4\n    },\n    "E1": {\n      "coordinate": "E1",\n      "value": "Phone",\n      "data_type": "s",\n      "row": 1,\n      "column": 5\n    },\n    "F1": {\n      "coordinate": "F1",\n      "value": "DateJoined",\n      "data_type": "s",\n      "row": 1,\n      "column": 6\n    },\n    "G1": {\n      "coordinate": "G1",\n      "value": "Status",\n      "data_type": 

In [5]:
@function_tool
def read_pdf_specs(pdf_path: str) -> str:
    """Read PDF specification file and extract text content.
    
    Args:
        pdf_path: Path to the PDF specification file
    """
    with open(pdf_path, 'rb') as file:
        pdf_reader = PyPDF2.PdfReader(file)
        content = []
        for page in pdf_reader.pages:
            content.append(page.extract_text())
    return "\n".join(content)


def detect_date_format_inconsistency(series):
    """
    Streamlined date format consistency check for most common datetime formats.
    
    Args:
        series: pandas Series containing potential datetime strings
    
    Returns:
        tuple: (has_inconsistency, inconsistency_types, warnings)
    """
    if series.dtype != 'object':
        return False, set(), []
    
    sample = series.dropna()
    if len(sample) < 2:
        return False, set(), []
    
    warnings = []
    inconsistency_types = set()
    
    # Most common date formats to test
    common_formats = [
        '%d/%m/%Y',    # DD/MM/YYYY (European)
        '%m/%d/%Y',    # MM/DD/YYYY (US)
        '%Y-%m-%d',    # YYYY-MM-DD (ISO)
        '%d-%m-%Y',    # DD-MM-YYYY
        '%m-%d-%Y',    # MM-DD-YYYY
        '%Y/%m/%d',    # YYYY/MM/DD
        '%d.%m.%Y',    # DD.MM.YYYY (German)
        '%m.%d.%Y',    # MM.DD.YYYY
    ]
    
    # Parse with each format and count successes
    format_results = []
    for fmt in common_formats:
        try:
            parsed = pd.to_datetime(sample, format=fmt, errors='coerce')
            valid_count = pd.notna(parsed).sum()
            if valid_count > 0:
                format_results.append((fmt, parsed, valid_count))
        except:
            continue
    
    if len(format_results) < 2:
        return False, set(), warnings
    
    # Check for conflicts between formats
    conflicts_found = False
    
    # Priority conflicts to check
    conflict_pairs = [
        ('%d/%m/%Y', '%m/%d/%Y'),  # DD/MM vs MM/DD with /
        ('%d-%m-%Y', '%m-%d-%Y'),  # DD/MM vs MM/DD with -
        ('%d.%m.%Y', '%m.%d.%Y'),  # DD/MM vs MM/DD with .
    ]
    
    for fmt1, fmt2 in conflict_pairs:
        # Find these formats in our results
        result1 = next((r for r in format_results if r[0] == fmt1), None)
        result2 = next((r for r in format_results if r[0] == fmt2), None)
        
        if result1 and result2:
            parsed1, parsed2 = result1[1], result2[1]
            
            # Check where both formats parse successfully but give different results
            both_valid = pd.notna(parsed1) & pd.notna(parsed2)
            different_results = parsed1 != parsed2
            conflicts = both_valid & different_results
            
            if conflicts.any():
                conflicts_found = True
                inconsistency_types.add('DMY_VS_MDY')
                examples = sample[conflicts].head(2).tolist()
                separator = '/' if '/' in fmt1 else ('-' if '-' in fmt1 else '.')
                warnings.append(f"DD{separator}MM vs MM{separator}DD format confusion detected. Examples: {examples}")
    
    # Check for mixed separators (same order, different separators)
    separator_groups = {
        'slash': [r for r in format_results if '/' in r[0]],
        'dash': [r for r in format_results if '-' in r[0]],
        'dot': [r for r in format_results if '.' in r[0]]
    }
    
    active_separators = [k for k, v in separator_groups.items() if v]
    if len(active_separators) > 1:
        inconsistency_types.add('MIXED_SEPARATORS')
        warnings.append(f"Mixed date separators detected: {', '.join(active_separators)}")
        conflicts_found = True
    
    # Check for ISO vs non-ISO mixing
    iso_formats = [r for r in format_results if r[0].startswith('%Y')]
    non_iso_formats = [r for r in format_results if not r[0].startswith('%Y')]
    
    if iso_formats and non_iso_formats and len(format_results) > 1:
        # Only flag if there's significant parsing in both styles
        iso_count = sum(r[2] for r in iso_formats)
        non_iso_count = sum(r[2] for r in non_iso_formats)
        
        if iso_count > 1 and non_iso_count > 1:
            inconsistency_types.add('ISO_VS_TRADITIONAL')
            warnings.append("Mixed ISO (YYYY-MM-DD) and traditional date formats detected")
            conflicts_found = True
    
    return conflicts_found, inconsistency_types, warnings



@function_tool  
def read_excel_data(excel_path: str) -> str:
    """Read Excel file and return structure and sample data.
    
    Args:
        excel_path: Path to the Excel file to analyze
    """
    excel_data = pd.read_excel(excel_path, sheet_name=None)
    
    analysis = []
    for sheet_name, df in excel_data.items():
            analysis.append(f"=== SHEET: {sheet_name} ===")
            analysis.append(f"Shape: {df.shape[0]} rows, {df.shape[1]} columns")
            analysis.append(f"Columns: {list(df.columns)}")
            analysis.append(f"Data types:\n{df.dtypes}")
            
            # Enhanced data quality metrics
            analysis.append(f"\n--- DATA QUALITY SUMMARY ---")
            analysis.append(f"Missing values per column:\n{df.isnull().sum()}")
            analysis.append(f"Missing values percentage:\n{(df.isnull().sum() / len(df) * 100).round(2)}%")
            analysis.append(f"Duplicate rows: {df.duplicated().sum()}")
            
            # More comprehensive sampling
            analysis.append(f"\n--- DATA SAMPLES ---")
            analysis.append(f"First 10 rows:\n{df.head(10)}")
            if len(df) > 6:
                analysis.append(f"Last 10 rows:\n{df.tail(10)}")
            if len(df) > 10:
                analysis.append(f"Random sample (10 rows):\n{df.sample(min(10, len(df)-10), random_state=42)}")
            
            # Column-specific analysis
            analysis.append(f"\n--- COLUMN ANALYSIS ---")
            for col in df.columns:
                col_info = []
                col_info.append(f"Column '{col}':")
                col_info.append(f"  - Type: {df[col].dtype}")
                col_info.append(f"  - Non-null count: {df[col].count()}")
                col_info.append(f"  - Unique values: {df[col].nunique()}")
                
                if df[col].dtype in ['object', 'string']:
                    # Text analysis
                    unique_vals = df[col].dropna().unique()
                    if len(unique_vals) <= 10:
                        col_info.append(f"  - Unique values: {list(unique_vals)}")
                    else:
                        col_info.append(f"  - Sample unique values: {list(unique_vals[:10])}")
                    
                    # Check for potential formatting issues
                    has_mixed_case = len(set(str(v).lower() if pd.notna(v) else v for v in df[col].unique())) != df[col].nunique()
                    if has_mixed_case:
                        col_info.append(f"  - WARNING: Mixed case values detected")
                    
                    # Check for leading/trailing spaces
                    if df[col].astype(str).str.strip().nunique() != df[col].nunique():
                        col_info.append(f"  - WARNING: Leading/trailing spaces detected")
                    
                    # Check for date format inconsistency
                    inconsistent, formats, warnings = detect_date_format_inconsistency(df[col])
                    if inconsistent or warnings:
                        col_info.append(f"  - DATE FORMAT ISSUES:")
                        for warning in warnings:
                            col_info.append(f"    - WARNING: {warning}")
                
                elif df[col].dtype in ['int64', 'float64', 'int32', 'float32']:
                    # Numeric analysis
                    col_info.append(f"  - Min: {df[col].min()}")
                    col_info.append(f"  - Max: {df[col].max()}")
                    col_info.append(f"  - Mean: {df[col].mean():.2f}")
                    col_info.append(f"  - Median: {df[col].median()}")
                    
                    # Check for outliers (simple IQR method)
                    Q1 = df[col].quantile(0.25)
                    Q3 = df[col].quantile(0.75)
                    IQR = Q3 - Q1
                    outliers = df[(df[col] < (Q1 - 1.5 * IQR)) | (df[col] > (Q3 + 1.5 * IQR))][col]
                    if len(outliers) > 0:
                        col_info.append(f"  - Potential outliers: {len(outliers)} values")
                        col_info.append(f"  - Outlier range: {outliers.min()} to {outliers.max()}")
                
                elif df[col].dtype in ['datetime64[ns]', 'datetime64']:
                    # Date analysis
                    col_info.append(f"  - Date range: {df[col].min()} to {df[col].max()}")
                
                analysis.append("\n".join(col_info))
            
            analysis.append("")
    
    return "\n".join(analysis)


@function_tool  
def read_excel_data_raw(excel_path: str) -> str:
    """Read Excel file and return raw data.
    
    Args:
        excel_path: Path to the Excel file to analyze
    """
    excel_data = pd.read_excel(excel_path, sheet_name=None)
    return excel_data

In [6]:
# Test with mixed formats
test_data = pd.Series([
        '01/02/2023',  # Ambiguous
        '15/03/2023',  # Clearly DD/MM
        '2023-04-05',  # ISO format
        '12/25/2023',  # Clearly MM/DD
        '06.07.2023',  # European dot format
        None,
        '09-10-2023',  # Ambiguous with dash
    ])
    
has_inconsistency, types, warnings = detect_date_format_inconsistency(test_data)
    
print(f"Has inconsistency: {has_inconsistency}")
print(f"Inconsistency types: {types}")
for warning in warnings:
        print(f"Warning: {warning}")

Has inconsistency: True
Inconsistency types: {'MIXED_SEPARATORS', 'DMY_VS_MDY'}


In [7]:
data_qc_agent = Agent(
    name="data_qc_agent",
    instructions=INSTRUCTIONS,
    tools=[read_pdf_specs, read_excel_data], 
    model="gpt-4o-mini",
    model_settings=ModelSettings(
        tool_choice="required"
    )
)

In [8]:
data_qc_agent 

Agent(name='data_qc_agent', instructions='\nYou are a data analyst reviewing an Excel file against PDF specifications. Provide a comprehensive analysis with precise issue locations and actionable recommendations.\n\n## Analysis Scope\n\n1. **Structure Compliance**: Verify sheets, columns, headers, data types, and field requirements match PDF specs\n2. **Format Validation**: Check dates, numbers, text, codes, and boolean fields for correct formatting\n3. **Data Consistency**: Validate temporal logic, numerical relationships, business rules, and data integrity\n4. **Quality Checks**: Identify missing data, duplicates, outliers, and formatting issues\n\n## For Each Issue Report\n\n- **Location**: Sheet name, cell reference (e.g., "C15"), column name, row number\n- **Type**: Format violation | Consistency error | Missing data | Invalid value | Structural problem\n- **Severity**: Critical (blocks processing) | Major (significant deviation) | Minor (formatting)\n- **Details**: Current value 

In [9]:
async def test_data_qc():
    result = await Runner.run(
        data_qc_agent, 
        """
        Execute this data quality analysis workflow:
        
        1. FIRST: Call read_pdf_specs with file path 'spec.pdf' to get the specifications
        2. SECOND: Call read_excel_data with file path 'data.xlsx' to analyze the Excel file
        3. THIRD: Compare findings and provide detailed quality assessment
        
        Start by using the read_pdf_specs tool now.
        """,
        max_turns=5
    )
    
    display(Markdown(result.final_output))

In [10]:
# Run the test
with trace("Data QC Agent Test 1"):
    await test_data_qc()

### EXECUTIVE SUMMARY

- **Total Issues by Severity**:
  - Critical: 10
  - Major: 5
  - Minor: 8

- **Key Problem Areas**:
  - Date formatting issues in all sheets.
  - Missing values in required columns (CustomerID, OrderID, ProductCode).
  - Invalid values detected (e.g., incorrect email format, negative prices).

---

### DETAILED FINDINGS

#### SHEET: Customer Data

**Issue #1: Format Violation - Major**
- **Location**: Cell B7, Column "DateJoined"
- **Found**: "2024/13/45"
- **Expected**: Valid date (DD/MM/YYYY)
- **Description**: Invalid date format.
- **Fix**: Correct the date entry to a valid day and month (e.g., "15/01/2024").

**Issue #2: Missing Data - Major**
- **Location**: Cell A3, Column "CustomerID"
- **Found**: NaN
- **Expected**: Required CustomerID
- **Description**: Missing entry in required field.
- **Fix**: Add a valid CustomerID (e.g., "CUST-xxxx").

**Issue #3: Invalid Value - Major**
- **Location**: Cell C3, Column "FirstName"
- **Found**: NaN
- **Expected**: Required FirstName
- **Description**: Missing entry in required field.
- **Fix**: Input a valid name (e.g., "Bob").

**Issue #4: Format Violation - Minor**
- **Location**: Column "Email"
- **Found**: "alice.w@email"
- **Expected**: Valid email format.
- **Description**: Invalid email format.
- **Fix**: Correct the entry to "alice.w@email.com".

**Issue #5: Duplicate Data - Major**
- **Location**: Row 0, Column "CustomerID"
- **Found**: "CUST-12345" (repeated)
- **Description**: Duplicate CustomerID violating uniqueness rule.
- **Fix**: Remove duplicate entry.

---

#### SHEET: Orders

**Issue #1: Missing Data - Major**
- **Location**: Cell A5, Column "OrderID"
- **Found**: NaN
- **Expected**: Required OrderID
- **Description**: Missing entry in required field.
- **Fix**: Add a valid OrderID (e.g., "ORD-xxxx").

**Issue #2: Invalid Value - Major**
- **Location**: Cell E4, Column "PaymentMethod"
- **Found**: "Bitcoin"
- **Expected**: Values "Credit", "Debit", "Cash", "PayPal"
- **Description**: Invalid payment method.
- **Fix**: Change entry to one of the valid options.

**Issue #3: Format Violation - Critical**
- **Location**: Cell C5, Column "OrderDate"
- **Found**: "32/13/2023"
- **Expected**: Valid date format (DD/MM/YYYY)
- **Description**: Invalid date entry.
- **Fix**: Correct to a valid date, e.g., "15/01/2023".

**Issue #4: Invalid Value - Major**
- **Location**: Cell D3, Column "TotalAmount"
- **Found**: "450000"
- **Expected**: Positive decimal number under 99999.99
- **Description**: Exceeds maximum limit.
- **Fix**: Adjust the amount to be within the allowed range.

---

#### SHEET: Products

**Issue #1: Missing Data - Major**
- **Location**: Cell A5, Column "ProductCode"
- **Found**: NaN
- **Expected**: Required ProductCode
- **Description**: Missing entry in required field.
- **Fix**: Add a valid ProductCode (e.g., "PROD-xxx").

**Issue #2: Invalid Value - Major**
- **Location**: Cell E2, Column "StockQuantity"
- **Found**: "-10"
- **Expected**: Non-negative integer
- **Description**: Invalid stock quantity.
- **Fix**: Change to a non-negative value (e.g., "10").

**Issue #3: Format Violation - Minor**
- **Location**: Cell A2, Column "ProductCode"
- **Found**: "PRD-JKL-012"
- **Expected**: Should follow format "PROD-XXX-YYY"
- **Description**: Incorrect format.
- **Fix**: Adjust format, e.g., "PROD-JKL-012".

**Issue #4: Date Format Issue - Major**
- **Location**: Column "LastUpdated"
- **Found**: "2024-07-01" and "30/02/2024"
- **Expected**: Valid dates in DD/MM/YYYY format.
- **Description**: Format not compliant with requirements.
- **Fix**: Change to DD/MM/YYYY format (e.g., "01/07/2024").

---

### CONCLUSION

The analysis reveals significant data quality issues across the sheets, particularly in required fields, formatting, and invalid entries. Addressing these issues will enhance the reliability and compliance of the data with the specified requirements. Focus on correcting date formats, ensuring all required fields are populated, and maintaining unique identifiers.

In [11]:
data_qc_agent_2 = Agent(
    name="data_qc_agent",
    instructions=INSTRUCTIONS,
    tools=[read_pdf_specs, read_excel_data_raw], 
    model="gpt-4o-mini",
    model_settings=ModelSettings(
        tool_choice="required"
    )
)

async def test_data_qc():
    result = await Runner.run(
        data_qc_agent_2, 
        """
        Execute this data quality analysis workflow:
        
        1. FIRST: Call read_pdf_specs with file path 'spec.pdf' to get the specifications
        2. SECOND: Call read_excel_data_raw with file path 'data.xlsx' to analyze the Excel file
        3. THIRD: Compare findings and provide detailed quality assessment
        
        Start by using the read_pdf_specs tool now.
        """,
        max_turns=5
    )
    
    display(Markdown(result.final_output))

In [12]:
# Run the test
with trace("Data QC Agent Test 2"):
    await test_data_qc()

### EXECUTIVE SUMMARY

- **Total Issues by Severity**:
  - Critical: 6
  - Major: 7
  - Minor: 5
- **Key Problem Areas**:
  - Customer Data Sheet: Multiple format violations and duplicates.
  - Orders Sheet: Compatibility issues with IDs, negative total amounts, and invalid formats.
  - Products Sheet: Incorrect formatting and invalid stock quantities.

---

### DETAILED FINDINGS

#### Sheet: Customer Data
**Issue #1: Format violation - Major**
- **Location**: Cell A2, Column "CustomerID"
- **Found**: CUST12348
- **Expected**: Format "CUST-XXXXX" where X is a digit
- **Description**: Missing hyphen in CustomerID format.
- **Fix**: Change to "CUST-12348".

**Issue #2: Invalid value - Critical**
- **Location**: Cell B3, Column "FirstName"
- **Found**: `NaN`
- **Expected**: Valid text (not empty)
- **Description**: Missing first name for customer.
- **Fix**: Enter a valid name or remove row if not applicable.

**Issue #3: Format violation - Major**
- **Location**: Cell F2, Column "DateJoined"
- **Found**: 2024/13/45
- **Expected**: Valid date in DD/MM/YYYY
- **Description**: Invalid date format.
- **Fix**: Correct to a valid date format (e.g., 15/04/2024).

**Issue #4: Duplicate entry - Critical**
- **Location**: Cell A4, Column "CustomerID"
- **Found**: CUST-12345
- **Expected**: Unique entry
- **Description**: Duplicate CustomerID found.
- **Fix**: Remove or update to a unique ID.

**Issue #5: Format violation - Minor**
- **Location**: Cell E5, Column "Phone"
- **Found**: `NaN`
- **Expected**: Format "(XXX) XXX-XXXX" or valid phone
- **Description**: Missing phone number.
- **Fix**: Input a valid phone number or leave blank if optional.

**Issue #6: Invalid email - Critical**
- **Location**: Cell D3, Column "Email"
- **Found**: alice.w@email
- **Expected**: Valid email format
- **Description**: Email is improperly formatted.
- **Fix**: Correct to a valid email format (e.g., alice.w@example.com).

---

#### Sheet: Orders
**Issue #1: Format violation - Major**
- **Location**: Cell A3, Column "OrderID"
- **Found**: ORD12345681 
- **Expected**: Format "ORD-XXXXXXXX"
- **Description**: Missing hyphen in OrderID format.
- **Fix**: Change to "ORD-12345681".

**Issue #2: Invalid value - Critical**
- **Location**: Cell C2, Column "TotalAmount"
- **Found**: -50
- **Expected**: Positive decimal value
- **Description**: TotalAmount cannot be negative.
- **Fix**: Correct to a positive amount.

**Issue #3: Inconsistent ID - Critical**
- **Location**: Cell B2, Column "CustomerID"
- **Found**: CUST-99999
- **Expected**: Must exist in Customer Data
- **Description**: CustomerID not found.
- **Fix**: Update to a valid CustomerID.

**Issue #4: Format violation - Major**
- **Location**: Cell C4, Column "OrderDate"
- **Found**: 32/13/2023
- **Expected**: Valid date in DD/MM/YYYY
- **Description**: Invalid date format.
- **Fix**: Correct to a valid date.

---

#### Sheet: Products
**Issue #1: Format violation - Major**
- **Location**: Cell A3, Column "ProductCode"
- **Found**: PROD-123-ABC
- **Expected**: Format "PROD-XXX-YYY"
- **Description**: Invalid ProductCode format.
- **Fix**: Correct to match expected format.

**Issue #2: Invalid value - Major**
- **Location**: Cell D3, Column "StockQuantity"
- **Found**: -10
- **Expected**: Non-negative integer
- **Description**: StockQuantity cannot be negative.
- **Fix**: Correct to a non-negative value.

**Issue #3: Format violation - Minor**
- **Location**: Cell E4, Column "Price"
- **Found**: -299.99
- **Expected**: Positive decimal value
- **Description**: Price cannot be negative.
- **Fix**: Change to a positive value.

**Issue #4: Invalid date - Critical**
- **Location**: Cell F3, Column "LastUpdated"
- **Found**: 2024-07-01
- **Expected**: DD/MM/YYYY
- **Description**: Incorrect date format.
- **Fix**: Change to a valid format (e.g., 01/07/2024).

**Issue #5: Missing value - Minor**
- **Location**: Cell A6, Column "ProductCode"
- **Found**: `NaN`
- **Expected**: Valid text
- **Description**: Missing product code.
- **Fix**: Provide a valid code.

---

### ACTIONS RECOMMENDED
1. Address all **critical** issues immediately to ensure data integrity.
2. Review and resolve **major** issues to avoid significant discrepancies.
3. Standardize formatting across sheets for better usability. 
4. Implement validation checks in Excel to prevent future data entry errors (e.g., data validation rules).