In [1]:
import pandas as pd

# --- Part 1: Load a Dataset & Check Missing Values ---

# Task 1: Iris Dataset
# - Load the dataset using Pandas.
try:
    iris_df = pd.read_csv('iris.csv')  # Assuming 'iris.csv' is in the same directory
except FileNotFoundError:
    print("Error: 'iris.csv' not found. Please make sure the file is in the correct directory.")
    iris_df = None  # Set to None if not loaded

# - Check for missing values in the entire dataset.
print("--- Part 1: Load a Dataset & Check Missing Values ---")
if iris_df is not None:
    missing_values = iris_df.isnull().sum()
    print("\nMissing values per column in the Iris dataset:")
    print(missing_values)

    if missing_values.sum() == 0:
        print("\nNo missing values found in the Iris dataset.")
    else:
        columns_with_missing = missing_values[missing_values > 0]
        print("\nColumns with missing values:")
        print(columns_with_missing)

    # Display the first few rows to get a sense of the data
    print("\nFirst 5 rows of the Iris dataset:")
    print(iris_df.head())
else:
    print("\nIris dataset could not be loaded. Skipping missing value check.")

# --- Part 2: Identify Inconsistent Entries in a Sample Dataset ---

# Task 2: Inconsistent Entries in a Sample Dataset
# - Assume you have a dataset with a 'Gender' column. Identify inconsistent entries like 'M', 'Male', or 'male'.

# Create a sample DataFrame with a 'Gender' column
sample_data = {'ID': [1, 2, 3, 4, 5, 6, 7],
               'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve', 'Frank', 'Grace'],
               'Gender': ['Female', 'Male', 'M', 'male', 'FEMALE', 'MALE', 'f']}
gender_df = pd.DataFrame(sample_data)

print("\n--- Part 2: Identify Inconsistent Entries in a Sample Dataset ---")
print("\nOriginal Gender Column:")
print(gender_df['Gender'])

# Identify unique values in the 'Gender' column
unique_genders = gender_df['Gender'].unique()
print("\nUnique values in the 'Gender' column:")
print(unique_genders)

# Identify inconsistent entries (case variations and abbreviations)
consistent_genders = ['Male', 'Female']
inconsistent_entries = [g for g in unique_genders if g.lower() not in [cg.lower() for cg in consistent_genders]]
print("\nInconsistent entries in the 'Gender' column (compared to 'Male', 'Female'):")
print(inconsistent_entries)

# You can further investigate these inconsistent entries if needed
# For example, to see the rows where these inconsistencies occur:
inconsistent_rows = gender_df[gender_df['Gender'].isin(inconsistent_entries)]
print("\nRows with inconsistent 'Gender' entries:")
print(inconsistent_rows)

# --- Part 3: Generate a Data Quality Report ---

# Task 3: Iris Dataset Summary
# - Generate basic descriptive statistics for the Iris dataset.

print("\n--- Part 3: Iris Dataset Summary ---")

if iris_df is not None:
    print("\nBasic Descriptive Statistics for Numerical Columns:")
    print(iris_df.describe())

    print("\nValue Counts for the 'Species' Column (Categorical):")
    print(iris_df['Species'].value_counts())
else:
    print("\nIris dataset could not be loaded. Skipping dataset summary.")

print("\n--- End of Data Quality Report ---")

Error: 'iris.csv' not found. Please make sure the file is in the correct directory.
--- Part 1: Load a Dataset & Check Missing Values ---

Iris dataset could not be loaded. Skipping missing value check.

--- Part 2: Identify Inconsistent Entries in a Sample Dataset ---

Original Gender Column:
0    Female
1      Male
2         M
3      male
4    FEMALE
5      MALE
6         f
Name: Gender, dtype: object

Unique values in the 'Gender' column:
['Female' 'Male' 'M' 'male' 'FEMALE' 'MALE' 'f']

Inconsistent entries in the 'Gender' column (compared to 'Male', 'Female'):
['M', 'f']

Rows with inconsistent 'Gender' entries:
   ID     Name Gender
2   3  Charlie      M
6   7    Grace      f

--- Part 3: Iris Dataset Summary ---

Iris dataset could not be loaded. Skipping dataset summary.

--- End of Data Quality Report ---
