In [None]:
# Step 1: Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Step 2: Simulate or load time-series data (simulate different days of data collection)
data_quality_records = [
    {'date': '2024-05-01', 'completeness': 95, 'accuracy': 90, 'consistency': 92},
    {'date': '2024-05-02', 'completeness': 93, 'accuracy': 88, 'consistency': 91},
    {'date': '2024-05-03', 'completeness': 96, 'accuracy': 91, 'consistency': 94},
    {'date': '2024-05-04', 'completeness': 92, 'accuracy': 85, 'consistency': 89},
    {'date': '2024-05-05', 'completeness': 94, 'accuracy': 87, 'consistency': 90},
    {'date': '2024-05-06', 'completeness': 97, 'accuracy': 92, 'consistency': 95}
]

# Step 3: Create DataFrame
df_trend = pd.DataFrame(data_quality_records)
df_trend['date'] = pd.to_datetime(df_trend['date'])

# Step 4: Calculate overall quality score
df_trend['overall_score'] = df_trend[['completeness', 'accuracy', 'consistency']].mean(axis=1)

# Step 5: Plot the trends over time
plt.figure(figsize=(12, 6))
plt.plot(df_trend['date'], df_trend['completeness'], label='Completeness')
plt.plot(df_trend['date'], df_trend['accuracy'], label='Accuracy')
plt.plot(df_trend['date'], df_trend['consistency'], label='Consistency')
plt.plot(df_trend['date'], df_trend['overall_score'], label='Overall Score', linestyle='--', linewidth=2, color='black')

plt.title('Data Quality Trends Over Time')
plt.xlabel('Date')
plt.ylabel('Score (%)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# Step 6: Display the DataFrame
print("Data Quality Scores Over Time:")
print(df_trend)