## Check Accuracy & Completeness

**Objective**: Learn to assess data quality by checking for accuracy and completeness using Python.

For this, you will use a sample dataset students.csv that contains the following
columns: ID , Name , Age , Grade , Email .

**Steps**:
1. Check Accuracy
    - Verify Numerical Data Accuracy
    - Validate Email Format
    - Integer Accuracy Check for Age
2. Check Completeness
    - Identify Missing Values
    - Rows with Missing Data
    - Column Specific Missing Value Check

In [None]:
# Write your code from here
import pandas as pd
import numpy as np
import re
from io import StringIO
data = """ID,Name,Age,Grade,Email
1,John Doe,20,A,john@example.com
2,Jane Smith,21,B,jane@example.com
3,Bob Johnson,,C,bob@example
4,Alice Brown,22,,alice@example.com
5,Tom Wilson,23.5,D,tom@example.com
6,,24,E,not-an-email
7,Mike Davis,25,F,mike@example.com
"""
df = pd.read_csv(StringIO(data))
def check_accuracy(df):
    numerical_cols = ['Age', 'Grade']
    for col in numerical_cols:
        if col in df.columns:
            print(f"\nStatistics for {col}:")
            print(df[col].describe())
    
    if 'Email' in df.columns:
        email_regex = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
        valid_emails = df['Email'].apply(lambda x: bool(re.match(email_regex, str(x))))
        print(f"\nValid email percentage: {valid_emails.mean()*100:.2f}%")
    
    if 'Age' in df.columns:
        is_integer = df['Age'].apply(lambda x: float(x).is_integer() if not pd.isna(x) else False)
        print(f"\nInteger age percentage: {is_integer.mean()*100:.2f}%")
def check_completeness(df):
    print("\nMissing values per column:")
    print(df.isnull().sum())
    print("\nRows with any missing data:")
    print(df[df.isnull().any(axis=1)])
    print("\nPercentage of missing values per column:")
    print(df.isnull().mean() * 100)
print("Accuracy Checks:")
check_accuracy(df)

print("\nCompleteness Checks:")
check_completeness(df)