# Measuring Data Accuracy

**Activity Overview**: Assess data accuracy by comparing it to a trusted source and detecting incorrect values or mismatches.

## Title: Product Pricing

**Task**: Compare a dataset of product prices with the latest official price list.

**Steps**:
1. Obtain the latest product price list from the official company website.
2. Compare the dataset's product prices against the verified list.
3. Identify any discrepancies and mark them for correction.

In [1]:
# Write your code from here
import pandas as pd

def assess_product_price_accuracy(dataset_path, official_price_list_path):
    try:
        dataset_df = pd.read_csv(dataset_path)
        official_prices_df = pd.read_csv(official_price_list_path)
        if not all(col in dataset_df.columns for col in ['Product_ID', 'Price']):
            print("Error: Dataset must contain 'Product_ID' and 'Price' columns.")
            return None
        if not all(col in official_prices_df.columns for col in ['Product_ID', 'Price']):
            print("Error: Official price list must contain 'Product_ID' and 'Price' columns.")
            return None
        merged_df = pd.merge(dataset_df, official_prices_df, on='Product_ID', suffixes=('_Dataset', '_Official'))
        discrepancy_df = merged_df[merged_df['Price_Dataset'] != merged_df['Price_Official']].copy()
        if discrepancy_df.empty:
            print("No price discrepancies found.")
            return pd.DataFrame()
        discrepancy_df['Discrepancy'] = discrepancy_df.apply(
            lambda row: f"Dataset Price: {row['Price_Dataset']}, Official Price: {row['Price_Official']}", axis=1
        )
        return discrepancy_df[['Product_ID', 'Price_Dataset', 'Price_Official', 'Discrepancy']]
    except FileNotFoundError as e:
        print(f"Error: File not found: {e.filename}")
        return None
    except KeyError as e:
        print(f"Error: Column not found: {e}")
        return None
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return None

if __name__ == '__main__':
    dataset_data = {'Product_ID': ['P001', 'P002', 'P003', 'P004', 'P005'],
                    'Price': [10.00, 20.00, 30.00, 40.00, 50.00],
                    'Other_Data': ['A', 'B', 'C', 'D', 'E']}
    official_price_list_data = {'Product_ID': ['P001', 'P002', 'P003', 'P004', 'P006'],
                               'Price': [10.00, 25.00, 30.00, 45.00, 60.00]}
    dataset_df = pd.DataFrame(dataset_data)
    official_price_list_df = pd.DataFrame(official_price_list_data)
    dataset_file_path = 'dataset.csv'
    official_price_list_file_path = 'official_prices.csv'
    dataset_df.to_csv(dataset_file_path, index=False)
    official_price_list_df.to_csv(official_price_list_file_path, index=False)
    discrepancies = assess_product_price_accuracy(dataset_file_path, official_price_list_file_path)
    if discrepancies is not None and not discrepancies.empty:
        print("Product Price Discrepancies:")
        print(discrepancies)
    elif discrepancies is not None:
        print("No discrepancies found.")
    else:
        print("An error occurred during the process.")


Product Price Discrepancies:
  Product_ID  Price_Dataset  Price_Official  \
1       P002           20.0            25.0   
3       P004           40.0            45.0   

                                 Discrepancy  
1  Dataset Price: 20.0, Official Price: 25.0  
3  Dataset Price: 40.0, Official Price: 45.0  
