### Machine Learning for Data Quality Prediction
**Description**: Use a machine learning model to predict data quality issues.

**Steps**:
1. Create a mock dataset with features and label (quality issue/label: 0: good, 1: issue).
2. Train a machine learning model.
3. Evaluate the model performance.

In [None]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
data = {
    'missing_values': [1, 0, 0, 1, 1, 0, 0, 1, 0, 1],
    'duplicate_values': [0, 0, 1, 1, 0, 1, 0, 1, 0, 0],
    'inconsistent_format': [0, 1, 0, 1, 0, 0, 0, 1, 0, 1],
    'outliers': [1, 0, 1, 0, 1, 0, 0, 0, 1, 1],
    'valid_entries': [0, 1, 1, 0, 0, 1, 1, 0, 1, 1],
    'quality_issue': [1, 0, 1, 1, 1, 0, 0, 1, 0, 1] 
}
df = pd.DataFrame(data)
X = df.drop('quality_issue', axis=1) 
y = df['quality_issue'] 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')
print('Classification Report:')
print(classification_report(y_test, y_pred))
