diff --git a/README.md b/README.md
index f9023fc..13f8623 100644
--- a/README.md
+++ b/README.md
@@ -2,9 +2,7 @@
 
 # python-synthpop
 
-Python implementation of the R package [synthpop](https://cran.r-project.org/web/packages/synthpop/index.html).
-
-```python-synthpop``` is an open-source library for synthetic data generation (SDG). The library includes robust implementations of Classification and Regression Trees (CART) and Gaussian Copula (GC) synthesizers, equipping users with an open-source python library to generate high-quality, privacy-preserving synthetic data.
+```python-synthpop``` is an open-source library for synthetic data generation (SDG). The library includes robust implementations of Classification and Regression Trees (CART) and Gaussian Copula (GC) synthesizers, equipping users with an open-source python library to generate high-quality, privacy-preserving synthetic data. This library is a Python implementation of the CART method used in R package [synthpop](https://cran.r-project.org/web/packages/synthpop/index.html).
 
 Synthetic data is generated in six steps:
 
@@ -56,23 +54,25 @@ Out[2]:
 
 ### python-synthpop
 
-Using default parameters the six steps are applied on the Social Diagnosis example tot generate synthetic data. See also [link](./example_notebooks/00_readme.ipynb).
+Using default parameters the six steps are applied on the Social Diagnosis example to generate synthetic data. See also [link](./example_notebooks/00_readme.ipynb).
 
 ```
 In [1]:     from synthpop import MissingDataHandler, DataProcessor, CARTMethod
 
 In [2]:     # 1. Initiate metadata
-            metadata = MissingDataHandler()
+            md_handler = MissingDataHandler()
 
-            # 1.1 Detect data types
-            column_dtypes = metadata.get_column_dtypes(df)
-            print("Column Data Types:", column_dtypes)
+            # 1.1 Get data types
+            metadata= md_handler.get_column_dtypes(df)
+            print("Column Data Types:", metadata)
 
             Column Data Types: {'sex': 'categorical', 'age': 'numerical', 'marital': 'categorical', 'income': 'numerical', 'ls': 'categorical', 'smoke': 'categorical'}
 
-In [3]:     # 2. Missing data
+In [3]:     # 2. Process missing data
+            print("Missing data:")
             print(df.isnull().sum())
 
+            Missing data:
             sex          0
             age          0
             marital      9
@@ -82,17 +82,19 @@ In [3]:     # 2. Missing data
             dtype: int64
 
 In [4]:     # 2.1 Detect type of missingness
-            missingness_dict = metadata.detect_missingness(df)
-            print("Detected missingness yype:", missingness_dict)
+            missingness_dict = md_handler.detect_missingness(df)
+            print("Detected missingness type:", missingness_dict)
 
             Detected missingness type: {'marital': 'MAR', 'income': 'MAR', 'ls': 'MAR', 'smoke': 'MAR'}
 
 
 In [5]:     # 2.2 Impute missing values
-            df_imputed = metadata.apply_imputation(df, missingness_dict)
+            real_df = md_handler.apply_imputation(df, missingness_dict)
 
-            print(df_imputed.isnull().sum())
+            print("Missing data:")
+            print(real_df.isnull().sum())
 
+            Missing data:
             sex        0
             age        0
             marital    0
@@ -102,25 +104,73 @@ In [5]:     # 2.2 Impute missing values
             dtype: int64
 
 
-In [6]:     # 3. Instantiate the DataProcessor with column types
-            processor = DataProcessor(column_dtypes)
+In [6]:     # 3. Preprocessing: Instantiate the DataProcessor with column_dtypes
+            processor = DataProcessor(metadata)
 
             # 3.1 Preprocess the data: transforms raw data into a numerical format
-            processed_data = processor.preprocess(df)
-            print("Processed Data:")
+            processed_data = processor.preprocess(real_df)
+            print("Processed data:")
             display(processed_data.head())
 
-            Processed Data:
+            Processed data:
             sex	age	marital	income	ls	smoke
-            0	0	0.503625	3	-0.480608	4	0
-            1	1	-1.495187	4	-0.834521	3	0
-            2	0	-1.603231	4	NaN	4	0
-            3	0	1.638086	5	-0.401961	1	0
-            4	0	0.341559	3	0.069923	3	1
+            0	0	0.503625	3	-0.517232	4	0
+            1	1	-1.495187	4	-0.898113	3	0
+            2	0	-1.603231	4	0.000000	4	0
+            3	0	1.638086	5	-0.432591	1	0
+            4	0	0.341559	3	0.075251	3	1
+
 
 In [7]:     # 4. Fit the CART method
             cart = CARTMethod(metadata, smoothing=True, proper=True, minibucket=5, random_state=42)
             cart.fit(processed_data)
 
+In [8]:     # 4.1 Preview generated synthetic data
+            synthetic_processed = cart.sample(100)
+            print("Synthetic processed data:")
+            display(synthetic_processed.head())
+
+            Synthetic processed data:
+            sex	age	marital	income	ls	smoke
+            0	1	-1.087360	3	-1.201126	4	0
+            1	1	-0.882289	3	1.182255	4	0
+            2	0	1.449201	5	-0.255936	2	0
+            3	0	0.890598	3	0.220739	4	1
+            4	0	0.313502	3	1.395039	4	0
+
+In [9]:     # 5. Postprocessing: back to the original format and preview of data
+            synthetic_df = processor.postprocess(synthetic_processed)
+            print("Synthetic data in original format:")
+            display(synthetic_df.head())
+
+            Synthetic data in original format:
+            sex	age	marital	income	ls	smoke
+            0	FEMALE	30.377064	SINGLE	-8.000000	MOSTLY DISSATISFIED	NO
+            1	MALE	54.823585	MARRIED	1861.809802	PLEASED	YES
+            2	FEMALE	78.641244	MARRIED	771.239134	MOSTLY DISSATISFIED	NO
+            3	MALE	53.458122	MARRIED	1758.942347	PLEASED	NO
+            4	FEMALE	60.354551	SINGLE	1024.351794	PLEASED	NO
+
+In [10]:    from synthpop.metrics import (
+                MetricsReport,
+                EfficacyMetrics,
+                DisclosureProtection
+            )
+
+In [11]:    # 6. Evaluate the synthetic data
+
+            # 6.1 Diagnostic report
+            report = MetricsReport(real_df, synthetic_df, metadata)
+            report_df = report.generate_report()
+            print("=== Diagnostic Report ===")
+            display(report_df)
+
+            	column	type	missing_value_similarity	range_coverage	boundary_adherence	ks_complement	tv_complement	statistic_similarity	category_coverage	category_adherence
+                0	sex	categorical	1.0	N/A	N/A	N/A	0.9764	N/A	1.0	1.0
+                1	age	numerical	1.0	0.94757	1.0	0.9142	N/A	0.962239	N/A	N/A
+                2	marital	categorical	1.0	N/A	N/A	N/A	0.967	N/A	0.666667	1.0
+                3	income	numerical	1.0	0.408926	1.0	0.9056	N/A	0.948719	N/A	N/A
+                4	ls	categorical	1.0	N/A	N/A	N/A	0.9224	N/A	0.857143	1.0
+                5	smoke	categorical	1.0	N/A	N/A	N/A	0.9754	N/A	1.0	1.0
 
 ```
\ No newline at end of file
diff --git a/example_notebooks/00_readme.ipynb b/example_notebooks/00_readme.ipynb
index 1f1be22..241334d 100644
--- a/example_notebooks/00_readme.ipynb
+++ b/example_notebooks/00_readme.ipynb
@@ -155,6 +155,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Missing data:\n",
       "sex          0\n",
       "age          0\n",
       "marital      9\n",
@@ -166,7 +167,8 @@
     }
    ],
    "source": [
-    "# 2. Missing data\n",
+    "# 2. Process missing data\n",
+    "print(\"Missing data:\")\n",
     "print(df.isnull().sum())"
    ]
   },
@@ -198,6 +200,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Missing data:\n",
       "sex        0\n",
       "age        0\n",
       "marital    0\n",
@@ -210,9 +213,10 @@
    ],
    "source": [
     "# 2.2 Impute missing values\n",
-    "df_imputed = md_handler.apply_imputation(df, missingness_dict)\n",
+    "real_df = md_handler.apply_imputation(df, missingness_dict)\n",
     "\n",
-    "print(df_imputed.isnull().sum())"
+    "print(\"Missing data:\")\n",
+    "print(real_df.isnull().sum())"
    ]
   },
   {
@@ -224,7 +228,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Processed Data:\n"
+      "Processed data:\n"
      ]
     },
     {
@@ -262,7 +266,7 @@
        "      <td>0</td>\n",
        "      <td>0.503625</td>\n",
        "      <td>3</td>\n",
-       "      <td>-0.480608</td>\n",
+       "      <td>-0.517232</td>\n",
        "      <td>4</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
@@ -271,7 +275,7 @@
        "      <td>1</td>\n",
        "      <td>-1.495187</td>\n",
        "      <td>4</td>\n",
-       "      <td>-0.834521</td>\n",
+       "      <td>-0.898113</td>\n",
        "      <td>3</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
@@ -280,7 +284,7 @@
        "      <td>0</td>\n",
        "      <td>-1.603231</td>\n",
        "      <td>4</td>\n",
-       "      <td>NaN</td>\n",
+       "      <td>0.000000</td>\n",
        "      <td>4</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
@@ -289,7 +293,7 @@
        "      <td>0</td>\n",
        "      <td>1.638086</td>\n",
        "      <td>5</td>\n",
-       "      <td>-0.401961</td>\n",
+       "      <td>-0.432591</td>\n",
        "      <td>1</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
@@ -298,7 +302,7 @@
        "      <td>0</td>\n",
        "      <td>0.341559</td>\n",
        "      <td>3</td>\n",
-       "      <td>0.069923</td>\n",
+       "      <td>0.075251</td>\n",
        "      <td>3</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
@@ -308,11 +312,11 @@
       ],
       "text/plain": [
        "   sex       age  marital    income  ls  smoke\n",
-       "0    0  0.503625        3 -0.480608   4      0\n",
-       "1    1 -1.495187        4 -0.834521   3      0\n",
-       "2    0 -1.603231        4       NaN   4      0\n",
-       "3    0  1.638086        5 -0.401961   1      0\n",
-       "4    0  0.341559        3  0.069923   3      1"
+       "0    0  0.503625        3 -0.517232   4      0\n",
+       "1    1 -1.495187        4 -0.898113   3      0\n",
+       "2    0 -1.603231        4  0.000000   4      0\n",
+       "3    0  1.638086        5 -0.432591   1      0\n",
+       "4    0  0.341559        3  0.075251   3      1"
       ]
      },
      "metadata": {},
@@ -320,12 +324,12 @@
     }
    ],
    "source": [
-    "# 3. Instantiate the DataProcessor with column_dtypes\n",
+    "# 3. Preprocessing: Instantiate the DataProcessor with column_dtypes\n",
     "processor = DataProcessor(metadata)\n",
     "\n",
     "# 3.1 Preprocess the data: transforms raw data into a numerical format\n",
-    "processed_data = processor.preprocess(df)\n",
-    "print(\"Processed Data:\")\n",
+    "processed_data = processor.preprocess(real_df)\n",
+    "print(\"Processed data:\")\n",
     "display(processed_data.head())"
    ]
   },
@@ -333,15 +337,7 @@
    "cell_type": "code",
    "execution_count": 9,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "ERROR:synthpop.method.cart:Error fitting model for column 'income': Input y contains NaN.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# 4. Fit the CART method\n",
     "cart = CARTMethod(metadata, smoothing=True, proper=True, minibucket=5, random_state=42)\n",
@@ -350,14 +346,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Synthetic Processed Data:\n"
+      "Synthetic processed data:\n"
      ]
     },
     {
@@ -384,6 +380,7 @@
        "      <th>sex</th>\n",
        "      <th>age</th>\n",
        "      <th>marital</th>\n",
+       "      <th>income</th>\n",
        "      <th>ls</th>\n",
        "      <th>smoke</th>\n",
        "    </tr>\n",
@@ -391,42 +388,47 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>0</td>\n",
-       "      <td>-1.123252</td>\n",
+       "      <td>1</td>\n",
+       "      <td>-0.716885</td>\n",
        "      <td>4</td>\n",
+       "      <td>-1.189097</td>\n",
        "      <td>2</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>1</td>\n",
-       "      <td>0.704909</td>\n",
+       "      <td>-1.066729</td>\n",
        "      <td>3</td>\n",
+       "      <td>0.057878</td>\n",
        "      <td>4</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>0</td>\n",
-       "      <td>1.583713</td>\n",
-       "      <td>5</td>\n",
+       "      <td>1.552391</td>\n",
        "      <td>3</td>\n",
+       "      <td>-0.754037</td>\n",
+       "      <td>2</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>0</td>\n",
-       "      <td>-0.127991</td>\n",
+       "      <td>0.522026</td>\n",
        "      <td>3</td>\n",
+       "      <td>0.337329</td>\n",
        "      <td>4</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>0</td>\n",
-       "      <td>0.868010</td>\n",
+       "      <td>0.262577</td>\n",
+       "      <td>3</td>\n",
+       "      <td>-1.179427</td>\n",
        "      <td>3</td>\n",
-       "      <td>4</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -434,12 +436,12 @@
        "</div>"
       ],
       "text/plain": [
-       "  sex       age marital ls smoke\n",
-       "0   0 -1.123252       4  2     0\n",
-       "1   1  0.704909       3  4     1\n",
-       "2   0  1.583713       5  3     0\n",
-       "3   0 -0.127991       3  4     1\n",
-       "4   0  0.868010       3  4     0"
+       "  sex       age marital    income ls smoke\n",
+       "0   1 -0.716885       4 -1.189097  2     0\n",
+       "1   1 -1.066729       3  0.057878  4     1\n",
+       "2   0  1.552391       3 -0.754037  2     0\n",
+       "3   0  0.522026       3  0.337329  4     1\n",
+       "4   0  0.262577       3 -1.179427  3     0"
       ]
      },
      "metadata": {},
@@ -449,41 +451,124 @@
    "source": [
     "# 4.1 Preview generated synthetic data\n",
     "synthetic_processed = cart.sample(100)\n",
-    "print(\"Synthetic Processed Data:\")\n",
+    "print(\"Synthetic processed data:\")\n",
     "display(synthetic_processed.head())"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
-     "ename": "KeyError",
-     "evalue": "\"None of [Index(['income'], dtype='object')] are in the [columns]\"",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[12], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;66;03m# 4.2 Postprocess the synthetic data back to the original format\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m synthetic_data \u001b[38;5;241m=\u001b[39m \u001b[43mprocessor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpostprocess\u001b[49m\u001b[43m(\u001b[49m\u001b[43msynthetic_processed\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSynthetic Data in Original Format:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m      4\u001b[0m display(synthetic_data\u001b[38;5;241m.\u001b[39mhead())\n",
-      "File \u001b[0;32m/opt/homebrew/lib/python3.11/site-packages/synthpop/processor/data_processor.py:90\u001b[0m, in \u001b[0;36mDataProcessor.postprocess\u001b[0;34m(self, synthetic_data)\u001b[0m\n\u001b[1;32m     88\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m dtype \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnumerical\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m col \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mscalers:\n\u001b[1;32m     89\u001b[0m     scaler \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mscalers[col]\n\u001b[0;32m---> 90\u001b[0m     synthetic_data[col] \u001b[38;5;241m=\u001b[39m scaler\u001b[38;5;241m.\u001b[39minverse_transform(\u001b[43msynthetic_data\u001b[49m\u001b[43m[\u001b[49m\u001b[43m[\u001b[49m\u001b[43mcol\u001b[49m\u001b[43m]\u001b[49m\u001b[43m]\u001b[49m)\n\u001b[1;32m     92\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m dtype \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mboolean\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m     93\u001b[0m     synthetic_data[col] \u001b[38;5;241m=\u001b[39m synthetic_data[col]\u001b[38;5;241m.\u001b[39mround()\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mbool\u001b[39m)\n",
-      "File \u001b[0;32m/opt/homebrew/lib/python3.11/site-packages/pandas/core/frame.py:4108\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m   4106\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m is_iterator(key):\n\u001b[1;32m   4107\u001b[0m         key \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(key)\n\u001b[0;32m-> 4108\u001b[0m     indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_indexer_strict\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcolumns\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m[\u001b[38;5;241m1\u001b[39m]\n\u001b[1;32m   4110\u001b[0m \u001b[38;5;66;03m# take() does not accept boolean indexers\u001b[39;00m\n\u001b[1;32m   4111\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(indexer, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mbool\u001b[39m:\n",
-      "File \u001b[0;32m/opt/homebrew/lib/python3.11/site-packages/pandas/core/indexes/base.py:6200\u001b[0m, in \u001b[0;36mIndex._get_indexer_strict\u001b[0;34m(self, key, axis_name)\u001b[0m\n\u001b[1;32m   6197\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   6198\u001b[0m     keyarr, indexer, new_indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reindex_non_unique(keyarr)\n\u001b[0;32m-> 6200\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_raise_if_missing\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkeyarr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   6202\u001b[0m keyarr \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtake(indexer)\n\u001b[1;32m   6203\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(key, Index):\n\u001b[1;32m   6204\u001b[0m     \u001b[38;5;66;03m# GH 42790 - Preserve name from an Index\u001b[39;00m\n",
-      "File \u001b[0;32m/opt/homebrew/lib/python3.11/site-packages/pandas/core/indexes/base.py:6249\u001b[0m, in \u001b[0;36mIndex._raise_if_missing\u001b[0;34m(self, key, indexer, axis_name)\u001b[0m\n\u001b[1;32m   6247\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m nmissing:\n\u001b[1;32m   6248\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m nmissing \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mlen\u001b[39m(indexer):\n\u001b[0;32m-> 6249\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNone of [\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkey\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m] are in the [\u001b[39m\u001b[38;5;132;01m{\u001b[39;00maxis_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m]\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m   6251\u001b[0m     not_found \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(ensure_index(key)[missing_mask\u001b[38;5;241m.\u001b[39mnonzero()[\u001b[38;5;241m0\u001b[39m]]\u001b[38;5;241m.\u001b[39munique())\n\u001b[1;32m   6252\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnot_found\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m not in index\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
-      "\u001b[0;31mKeyError\u001b[0m: \"None of [Index(['income'], dtype='object')] are in the [columns]\""
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Synthetic data in original format:\n"
      ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>sex</th>\n",
+       "      <th>age</th>\n",
+       "      <th>marital</th>\n",
+       "      <th>income</th>\n",
+       "      <th>ls</th>\n",
+       "      <th>smoke</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>MALE</td>\n",
+       "      <td>34.407146</td>\n",
+       "      <td>SINGLE</td>\n",
+       "      <td>6.211859</td>\n",
+       "      <td>MOSTLY DISSATISFIED</td>\n",
+       "      <td>NO</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>MALE</td>\n",
+       "      <td>27.931190</td>\n",
+       "      <td>MARRIED</td>\n",
+       "      <td>1479.474304</td>\n",
+       "      <td>PLEASED</td>\n",
+       "      <td>YES</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>FEMALE</td>\n",
+       "      <td>76.413698</td>\n",
+       "      <td>MARRIED</td>\n",
+       "      <td>520.222172</td>\n",
+       "      <td>MOSTLY DISSATISFIED</td>\n",
+       "      <td>NO</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>FEMALE</td>\n",
+       "      <td>57.340625</td>\n",
+       "      <td>MARRIED</td>\n",
+       "      <td>1809.637340</td>\n",
+       "      <td>PLEASED</td>\n",
+       "      <td>YES</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>FEMALE</td>\n",
+       "      <td>52.537967</td>\n",
+       "      <td>MARRIED</td>\n",
+       "      <td>17.637157</td>\n",
+       "      <td>MOSTLY SATISFIED</td>\n",
+       "      <td>NO</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      sex        age  marital       income                   ls smoke\n",
+       "0    MALE  34.407146   SINGLE     6.211859  MOSTLY DISSATISFIED    NO\n",
+       "1    MALE  27.931190  MARRIED  1479.474304              PLEASED   YES\n",
+       "2  FEMALE  76.413698  MARRIED   520.222172  MOSTLY DISSATISFIED    NO\n",
+       "3  FEMALE  57.340625  MARRIED  1809.637340              PLEASED   YES\n",
+       "4  FEMALE  52.537967  MARRIED    17.637157     MOSTLY SATISFIED    NO"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     }
    ],
    "source": [
-    "# 4.2 Postprocess the synthetic data back to the original format and give preview of generated synthetic data\n",
-    "synthetic_data = processor.postprocess(synthetic_processed)\n",
-    "print(\"Synthetic Data in Original Format:\")\n",
-    "display(synthetic_data.head())"
+    "# 5. Postprocessing: back to the original format and preview of data\n",
+    "synthetic_df = processor.postprocess(synthetic_processed)\n",
+    "print(\"Synthetic data in original format:\")\n",
+    "display(synthetic_df.head())"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -493,6 +578,259 @@
     "    DisclosureProtection\n",
     ")"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "=== Diagnostic Report ===\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>column</th>\n",
+       "      <th>type</th>\n",
+       "      <th>missing_value_similarity</th>\n",
+       "      <th>range_coverage</th>\n",
+       "      <th>boundary_adherence</th>\n",
+       "      <th>ks_complement</th>\n",
+       "      <th>tv_complement</th>\n",
+       "      <th>statistic_similarity</th>\n",
+       "      <th>category_coverage</th>\n",
+       "      <th>category_adherence</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>sex</td>\n",
+       "      <td>categorical</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>N/A</td>\n",
+       "      <td>N/A</td>\n",
+       "      <td>N/A</td>\n",
+       "      <td>0.9764</td>\n",
+       "      <td>N/A</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>age</td>\n",
+       "      <td>numerical</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.94757</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.9142</td>\n",
+       "      <td>N/A</td>\n",
+       "      <td>0.962239</td>\n",
+       "      <td>N/A</td>\n",
+       "      <td>N/A</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>marital</td>\n",
+       "      <td>categorical</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>N/A</td>\n",
+       "      <td>N/A</td>\n",
+       "      <td>N/A</td>\n",
+       "      <td>0.967</td>\n",
+       "      <td>N/A</td>\n",
+       "      <td>0.666667</td>\n",
+       "      <td>1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>income</td>\n",
+       "      <td>numerical</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.408926</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.9056</td>\n",
+       "      <td>N/A</td>\n",
+       "      <td>0.948719</td>\n",
+       "      <td>N/A</td>\n",
+       "      <td>N/A</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>ls</td>\n",
+       "      <td>categorical</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>N/A</td>\n",
+       "      <td>N/A</td>\n",
+       "      <td>N/A</td>\n",
+       "      <td>0.9224</td>\n",
+       "      <td>N/A</td>\n",
+       "      <td>0.857143</td>\n",
+       "      <td>1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>smoke</td>\n",
+       "      <td>categorical</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>N/A</td>\n",
+       "      <td>N/A</td>\n",
+       "      <td>N/A</td>\n",
+       "      <td>0.9754</td>\n",
+       "      <td>N/A</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    column         type  missing_value_similarity range_coverage  \\\n",
+       "0      sex  categorical                       1.0            N/A   \n",
+       "1      age    numerical                       1.0        0.94757   \n",
+       "2  marital  categorical                       1.0            N/A   \n",
+       "3   income    numerical                       1.0       0.408926   \n",
+       "4       ls  categorical                       1.0            N/A   \n",
+       "5    smoke  categorical                       1.0            N/A   \n",
+       "\n",
+       "  boundary_adherence ks_complement tv_complement statistic_similarity  \\\n",
+       "0                N/A           N/A        0.9764                  N/A   \n",
+       "1                1.0        0.9142           N/A             0.962239   \n",
+       "2                N/A           N/A         0.967                  N/A   \n",
+       "3                1.0        0.9056           N/A             0.948719   \n",
+       "4                N/A           N/A        0.9224                  N/A   \n",
+       "5                N/A           N/A        0.9754                  N/A   \n",
+       "\n",
+       "  category_coverage category_adherence  \n",
+       "0               1.0                1.0  \n",
+       "1               N/A                N/A  \n",
+       "2          0.666667                1.0  \n",
+       "3               N/A                N/A  \n",
+       "4          0.857143                1.0  \n",
+       "5               1.0                1.0  "
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# 6. Evaluate the synthetic data\n",
+    "\n",
+    "# 6.1 Diagnostic report\n",
+    "report = MetricsReport(real_df, synthetic_df, metadata)\n",
+    "report_df = report.generate_report()\n",
+    "print(\"=== Diagnostic Report ===\")\n",
+    "display(report_df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ValueError",
+     "evalue": "could not convert string to float: 'MALE'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "\u001b[0;32m/var/folders/v8/64zc34sn3b95klfm660085h80000gn/T/ipykernel_9335/3414886545.py\u001b[0m in \u001b[0;36m?\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# 6.2 Efficacy metrics\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;31m# regression\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0mreg_efficacy\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mEfficacyMetrics\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtask\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'regression'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget_column\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"income\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mreg_metrics\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreg_efficacy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mevaluate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreal_df\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msynthetic_df\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      6\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"=== Regression Efficacy Metrics ===\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreg_metrics\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/opt/homebrew/lib/python3.11/site-packages/synthpop/metrics/efficacy_metrics.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(self, real_df, synthetic_df)\u001b[0m\n\u001b[1;32m     88\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     89\u001b[0m         \u001b[0;31m# Model Training and Evaluation\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     90\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtask\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'regression'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     91\u001b[0m             \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mLinearRegression\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 92\u001b[0;31m             \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_syn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_syn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     93\u001b[0m             \u001b[0mpredictions\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_real\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     94\u001b[0m             \u001b[0mmse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmean_squared_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_real\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpredictions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     95\u001b[0m             \u001b[0mmae\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmean_absolute_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_real\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpredictions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/opt/homebrew/lib/python3.11/site-packages/sklearn/base.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(estimator, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1470\u001b[0m                 skip_parameter_validation=(\n\u001b[1;32m   1471\u001b[0m                     \u001b[0mprefer_skip_nested_validation\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mglobal_skip_validation\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1472\u001b[0m                 )\n\u001b[1;32m   1473\u001b[0m             ):\n\u001b[0;32m-> 1474\u001b[0;31m                 \u001b[0;32mreturn\u001b[0m \u001b[0mfit_method\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;32m/opt/homebrew/lib/python3.11/site-packages/sklearn/linear_model/_base.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m    574\u001b[0m         \u001b[0mn_jobs_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_jobs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    575\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    576\u001b[0m         \u001b[0maccept_sparse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpositive\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\"csr\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"csc\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"coo\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    577\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 578\u001b[0;31m         X, y = self._validate_data(\n\u001b[0m\u001b[1;32m    579\u001b[0m             \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maccept_sparse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_numeric\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmulti_output\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    580\u001b[0m         )\n\u001b[1;32m    581\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/opt/homebrew/lib/python3.11/site-packages/sklearn/base.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(self, X, y, reset, validate_separately, cast_to_ndarray, **check_params)\u001b[0m\n\u001b[1;32m    646\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0;34m\"estimator\"\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcheck_y_params\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    647\u001b[0m                     \u001b[0mcheck_y_params\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mdefault_check_params\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mcheck_y_params\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    648\u001b[0m                 \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_name\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"y\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mcheck_y_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    649\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 650\u001b[0;31m                 \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_X_y\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mcheck_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    651\u001b[0m             \u001b[0mout\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    652\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    653\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mno_val_X\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mcheck_params\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"ensure_2d\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/opt/homebrew/lib/python3.11/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator)\u001b[0m\n\u001b[1;32m   1259\u001b[0m         raise ValueError(\n\u001b[1;32m   1260\u001b[0m             \u001b[0;34mf\"{estimator_name} requires y to be passed, but the target y is None\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1261\u001b[0m         )\n\u001b[1;32m   1262\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1263\u001b[0;31m     X = check_array(\n\u001b[0m\u001b[1;32m   1264\u001b[0m         \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1265\u001b[0m         \u001b[0maccept_sparse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maccept_sparse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1266\u001b[0m         \u001b[0maccept_large_sparse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maccept_large_sparse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/opt/homebrew/lib/python3.11/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)\u001b[0m\n\u001b[1;32m    994\u001b[0m                         )\n\u001b[1;32m    995\u001b[0m                     \u001b[0marray\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mxp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    996\u001b[0m                 \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    997\u001b[0m                     \u001b[0marray\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_asarray_with_order\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mxp\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mxp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 998\u001b[0;31m             \u001b[0;32mexcept\u001b[0m \u001b[0mComplexWarning\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mcomplex_warning\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    999\u001b[0m                 raise ValueError(\n\u001b[1;32m   1000\u001b[0m                     \u001b[0;34m\"Complex data not supported\\n{}\\n\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1001\u001b[0m                 ) from complex_warning\n",
+      "\u001b[0;32m/opt/homebrew/lib/python3.11/site-packages/sklearn/utils/_array_api.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(array, dtype, order, copy, xp)\u001b[0m\n\u001b[1;32m    517\u001b[0m         \u001b[0;31m# Use NumPy API to support order\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    518\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mcopy\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    519\u001b[0m             \u001b[0marray\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnumpy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    520\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 521\u001b[0;31m             \u001b[0marray\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnumpy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    522\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    523\u001b[0m         \u001b[0;31m# At this point array is a NumPy ndarray. We convert it to an array\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    524\u001b[0m         \u001b[0;31m# container that is consistent with the input's namespace.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/opt/homebrew/lib/python3.11/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(self, dtype, copy)\u001b[0m\n\u001b[1;32m   2149\u001b[0m     def __array__(\n\u001b[1;32m   2150\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mnpt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDTypeLike\u001b[0m \u001b[0;34m|\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mbool_t\u001b[0m \u001b[0;34m|\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2151\u001b[0m     ) -> np.ndarray:\n\u001b[1;32m   2152\u001b[0m         \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_values\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2153\u001b[0;31m         \u001b[0marr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2154\u001b[0m         if (\n\u001b[1;32m   2155\u001b[0m             \u001b[0mastype_is_view\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0marr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2156\u001b[0m             \u001b[0;32mand\u001b[0m \u001b[0musing_copy_on_write\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mValueError\u001b[0m: could not convert string to float: 'MALE'"
+     ]
+    }
+   ],
+   "source": [
+    "# 6.2 Efficacy metrics\n",
+    "\n",
+    "# regression\n",
+    "reg_efficacy = EfficacyMetrics(task='regression', target_column=\"income\")\n",
+    "reg_metrics = reg_efficacy.evaluate(real_df, synthetic_df)\n",
+    "print(\"=== Regression Efficacy Metrics ===\")\n",
+    "print(reg_metrics)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== Classification Efficacy Metrics ===\n",
+      "{'accuracy': 0.6392, 'f1_score': 0.6481509447474609}\n"
+     ]
+    }
+   ],
+   "source": [
+    "# classification\n",
+    "clf_efficacy = EfficacyMetrics(task='classification', target_column=\"smoke\")\n",
+    "clf_metrics = clf_efficacy.evaluate(real_df, synthetic_df)\n",
+    "print(\"\\n=== Classification Efficacy Metrics ===\")\n",
+    "print(clf_metrics)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== Disclosure Protection ===\n",
+      "Score: 1.000\n",
+      "Detailed Report: {'threshold': 0.0, 'risk_rate': 0.0, 'disclosure_protection_score': 1.0}\n"
+     ]
+    }
+   ],
+   "source": [
+    "# privacy\n",
+    "dp = DisclosureProtection(real_df, synthetic_df)\n",
+    "dp_score = dp.score()\n",
+    "dp_report = dp.report()\n",
+    "\n",
+    "print(\"\\n=== Disclosure Protection ===\")\n",
+    "print(f\"Score: {dp_score:.3f}\")\n",
+    "print(\"Detailed Report:\", dp_report)"
+   ]
   }
  ],
  "metadata": {