diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..0249621
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,60 @@
+repos:
+  #  1. Code Formatter: Black (Ensures uniform formatting)
+  - repo: https://github.com/psf/black
+    rev: stable
+    hooks:
+      - id: black
+
+  #  2. Code Formatter: isort (Sorts imports)
+  - repo: https://github.com/PyCQA/isort
+    rev: 5.12.0
+    hooks:
+      - id: isort
+
+  #  3. Linter: Flake8 (Finds style & syntax issues)
+  - repo: https://github.com/pycqa/flake8
+    rev: 6.0.0
+    hooks:
+      - id: flake8
+
+  #  4. Security: Bandit (Finds security vulnerabilities)
+  - repo: https://github.com/PyCQA/bandit
+    rev: stable
+    hooks:
+      - id: bandit
+        args: ["-r", "."]
+
+  # 5. Security: detect-secrets (Prevents committing secrets)
+  - repo: https://github.com/Yelp/detect-secrets
+    rev: v1.3.0
+    hooks:
+      - id: detect-secrets-hook
+
+  #  6. Type Checker: mypy (Checks for type errors)
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.0
+    hooks:
+      - id: mypy
+
+  #  7. Tests: Pytest (Runs test cases before commit)
+  - repo: local
+    hooks:
+      - id: pytest
+        name: Run Pytest
+        entry: pytest
+        language: system
+        types: [python]
+
+  #  8. Dependency Check: pip-audit (Checks for vulnerable dependencies)
+  - repo: https://github.com/pypa/pip-audit
+    rev: v2.4.0
+    hooks:
+      - id: pip-audit
+
+  #  9. File Cleanup: Remove trailing whitespace
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.4.0
+    hooks:
+      - id: trailing-whitespace
+
+
diff --git a/.venv/Scripts/Activate.ps1 b/.venv/Scripts/Activate.ps1
new file mode 100644
index 0000000..b63e7b7
--- /dev/null
+++ b/.venv/Scripts/Activate.ps1
@@ -0,0 +1,502 @@
+<#
+.Synopsis
+Activate a Python virtual environment for the current PowerShell session.
+
+.Description
+Pushes the python executable for a virtual environment to the front of the
+$Env:PATH environment variable and sets the prompt to signify that you are
+in a Python virtual environment. Makes use of the command line switches as
+well as the `pyvenv.cfg` file values present in the virtual environment.
+
+.Parameter VenvDir
+Path to the directory that contains the virtual environment to activate. The
+default value for this is the parent of the directory that the Activate.ps1
+script is located within.
+
+.Parameter Prompt
+The prompt prefix to display when this virtual environment is activated. By
+default, this prompt is the name of the virtual environment folder (VenvDir)
+surrounded by parentheses and followed by a single space (ie. '(.venv) ').
+
+.Example
+Activate.ps1
+Activates the Python virtual environment that contains the Activate.ps1 script.
+
+.Example
+Activate.ps1 -Verbose
+Activates the Python virtual environment that contains the Activate.ps1 script,
+and shows extra information about the activation as it executes.
+
+.Example
+Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
+Activates the Python virtual environment located in the specified location.
+
+.Example
+Activate.ps1 -Prompt "MyPython"
+Activates the Python virtual environment that contains the Activate.ps1 script,
+and prefixes the current prompt with the specified string (surrounded in
+parentheses) while the virtual environment is active.
+
+.Notes
+On Windows, it may be required to enable this Activate.ps1 script by setting the
+execution policy for the user. You can do this by issuing the following PowerShell
+command:
+
+PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
+
+For more information on Execution Policies: 
+https://go.microsoft.com/fwlink/?LinkID=135170
+
+#>
+Param(
+    [Parameter(Mandatory = $false)]
+    [String]
+    $VenvDir,
+    [Parameter(Mandatory = $false)]
+    [String]
+    $Prompt
+)
+
+<# Function declarations --------------------------------------------------- #>
+
+<#
+.Synopsis
+Remove all shell session elements added by the Activate script, including the
+addition of the virtual environment's Python executable from the beginning of
+the PATH variable.
+
+.Parameter NonDestructive
+If present, do not remove this function from the global namespace for the
+session.
+
+#>
+function global:deactivate ([switch]$NonDestructive) {
+    # Revert to original values
+
+    # The prior prompt:
+    if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
+        Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
+        Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
+    }
+
+    # The prior PYTHONHOME:
+    if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
+        Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
+        Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
+    }
+
+    # The prior PATH:
+    if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
+        Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
+        Remove-Item -Path Env:_OLD_VIRTUAL_PATH
+    }
+
+    # Just remove the VIRTUAL_ENV altogether:
+    if (Test-Path -Path Env:VIRTUAL_ENV) {
+        Remove-Item -Path env:VIRTUAL_ENV
+    }
+
+    # Just remove VIRTUAL_ENV_PROMPT altogether.
+    if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) {
+        Remove-Item -Path env:VIRTUAL_ENV_PROMPT
+    }
+
+    # Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
+    if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
+        Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
+    }
+
+    # Leave deactivate function in the global namespace if requested:
+    if (-not $NonDestructive) {
+        Remove-Item -Path function:deactivate
+    }
+}
+
+<#
+.Description
+Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
+given folder, and returns them in a map.
+
+For each line in the pyvenv.cfg file, if that line can be parsed into exactly
+two strings separated by `=` (with any amount of whitespace surrounding the =)
+then it is considered a `key = value` line. The left hand string is the key,
+the right hand is the value.
+
+If the value starts with a `'` or a `"` then the first and last character is
+stripped from the value before being captured.
+
+.Parameter ConfigDir
+Path to the directory that contains the `pyvenv.cfg` file.
+#>
+function Get-PyVenvConfig(
+    [String]
+    $ConfigDir
+) {
+    Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
+
+    # Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
+    $pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
+
+    # An empty map will be returned if no config file is found.
+    $pyvenvConfig = @{ }
+
+    if ($pyvenvConfigPath) {
+
+        Write-Verbose "File exists, parse `key = value` lines"
+        $pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
+
+        $pyvenvConfigContent | ForEach-Object {
+            $keyval = $PSItem -split "\s*=\s*", 2
+            if ($keyval[0] -and $keyval[1]) {
+                $val = $keyval[1]
+
+                # Remove extraneous quotations around a string value.
+                if ("'""".Contains($val.Substring(0, 1))) {
+                    $val = $val.Substring(1, $val.Length - 2)
+                }
+
+                $pyvenvConfig[$keyval[0]] = $val
+                Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
+            }
+        }
+    }
+    return $pyvenvConfig
+}
+
+
+<# Begin Activate script --------------------------------------------------- #>
+
+# Determine the containing directory of this script
+$VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
+$VenvExecDir = Get-Item -Path $VenvExecPath
+
+Write-Verbose "Activation script is located in path: '$VenvExecPath'"
+Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
+Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
+
+# Set values required in priority: CmdLine, ConfigFile, Default
+# First, get the location of the virtual environment, it might not be
+# VenvExecDir if specified on the command line.
+if ($VenvDir) {
+    Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
+}
+else {
+    Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
+    $VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
+    Write-Verbose "VenvDir=$VenvDir"
+}
+
+# Next, read the `pyvenv.cfg` file to determine any required value such
+# as `prompt`.
+$pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
+
+# Next, set the prompt from the command line, or the config file, or
+# just use the name of the virtual environment folder.
+if ($Prompt) {
+    Write-Verbose "Prompt specified as argument, using '$Prompt'"
+}
+else {
+    Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
+    if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
+        Write-Verbose "  Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
+        $Prompt = $pyvenvCfg['prompt'];
+    }
+    else {
+        Write-Verbose "  Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)"
+        Write-Verbose "  Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
+        $Prompt = Split-Path -Path $venvDir -Leaf
+    }
+}
+
+Write-Verbose "Prompt = '$Prompt'"
+Write-Verbose "VenvDir='$VenvDir'"
+
+# Deactivate any currently active virtual environment, but leave the
+# deactivate function in place.
+deactivate -nondestructive
+
+# Now set the environment variable VIRTUAL_ENV, used by many tools to determine
+# that there is an activated venv.
+$env:VIRTUAL_ENV = $VenvDir
+
+if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
+
+    Write-Verbose "Setting prompt to '$Prompt'"
+
+    # Set the prompt to include the env name
+    # Make sure _OLD_VIRTUAL_PROMPT is global
+    function global:_OLD_VIRTUAL_PROMPT { "" }
+    Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
+    New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
+
+    function global:prompt {
+        Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
+        _OLD_VIRTUAL_PROMPT
+    }
+    $env:VIRTUAL_ENV_PROMPT = $Prompt
+}
+
+# Clear PYTHONHOME
+if (Test-Path -Path Env:PYTHONHOME) {
+    Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
+    Remove-Item -Path Env:PYTHONHOME
+}
+
+# Add the venv to the PATH
+Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
+$Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"
+
+# SIG # Begin signature block
+# MIIvIwYJKoZIhvcNAQcCoIIvFDCCLxACAQExDzANBglghkgBZQMEAgEFADB5Bgor
+# BgEEAYI3AgEEoGswaTA0BgorBgEEAYI3AgEeMCYCAwEAAAQQH8w7YFlLCE63JNLG
+# KX7zUQIBAAIBAAIBAAIBAAIBADAxMA0GCWCGSAFlAwQCAQUABCBnL745ElCYk8vk
+# dBtMuQhLeWJ3ZGfzKW4DHCYzAn+QB6CCE8MwggWQMIIDeKADAgECAhAFmxtXno4h
+# MuI5B72nd3VcMA0GCSqGSIb3DQEBDAUAMGIxCzAJBgNVBAYTAlVTMRUwEwYDVQQK
+# EwxEaWdpQ2VydCBJbmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5jb20xITAfBgNV
+# BAMTGERpZ2lDZXJ0IFRydXN0ZWQgUm9vdCBHNDAeFw0xMzA4MDExMjAwMDBaFw0z
+# ODAxMTUxMjAwMDBaMGIxCzAJBgNVBAYTAlVTMRUwEwYDVQQKEwxEaWdpQ2VydCBJ
+# bmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5jb20xITAfBgNVBAMTGERpZ2lDZXJ0
+# IFRydXN0ZWQgUm9vdCBHNDCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIB
+# AL/mkHNo3rvkXUo8MCIwaTPswqclLskhPfKK2FnC4SmnPVirdprNrnsbhA3EMB/z
+# G6Q4FutWxpdtHauyefLKEdLkX9YFPFIPUh/GnhWlfr6fqVcWWVVyr2iTcMKyunWZ
+# anMylNEQRBAu34LzB4TmdDttceItDBvuINXJIB1jKS3O7F5OyJP4IWGbNOsFxl7s
+# Wxq868nPzaw0QF+xembud8hIqGZXV59UWI4MK7dPpzDZVu7Ke13jrclPXuU15zHL
+# 2pNe3I6PgNq2kZhAkHnDeMe2scS1ahg4AxCN2NQ3pC4FfYj1gj4QkXCrVYJBMtfb
+# BHMqbpEBfCFM1LyuGwN1XXhm2ToxRJozQL8I11pJpMLmqaBn3aQnvKFPObURWBf3
+# JFxGj2T3wWmIdph2PVldQnaHiZdpekjw4KISG2aadMreSx7nDmOu5tTvkpI6nj3c
+# AORFJYm2mkQZK37AlLTSYW3rM9nF30sEAMx9HJXDj/chsrIRt7t/8tWMcCxBYKqx
+# YxhElRp2Yn72gLD76GSmM9GJB+G9t+ZDpBi4pncB4Q+UDCEdslQpJYls5Q5SUUd0
+# viastkF13nqsX40/ybzTQRESW+UQUOsxxcpyFiIJ33xMdT9j7CFfxCBRa2+xq4aL
+# T8LWRV+dIPyhHsXAj6KxfgommfXkaS+YHS312amyHeUbAgMBAAGjQjBAMA8GA1Ud
+# EwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgGGMB0GA1UdDgQWBBTs1+OC0nFdZEzf
+# Lmc/57qYrhwPTzANBgkqhkiG9w0BAQwFAAOCAgEAu2HZfalsvhfEkRvDoaIAjeNk
+# aA9Wz3eucPn9mkqZucl4XAwMX+TmFClWCzZJXURj4K2clhhmGyMNPXnpbWvWVPjS
+# PMFDQK4dUPVS/JA7u5iZaWvHwaeoaKQn3J35J64whbn2Z006Po9ZOSJTROvIXQPK
+# 7VB6fWIhCoDIc2bRoAVgX+iltKevqPdtNZx8WorWojiZ83iL9E3SIAveBO6Mm0eB
+# cg3AFDLvMFkuruBx8lbkapdvklBtlo1oepqyNhR6BvIkuQkRUNcIsbiJeoQjYUIp
+# 5aPNoiBB19GcZNnqJqGLFNdMGbJQQXE9P01wI4YMStyB0swylIQNCAmXHE/A7msg
+# dDDS4Dk0EIUhFQEI6FUy3nFJ2SgXUE3mvk3RdazQyvtBuEOlqtPDBURPLDab4vri
+# RbgjU2wGb2dVf0a1TD9uKFp5JtKkqGKX0h7i7UqLvBv9R0oN32dmfrJbQdA75PQ7
+# 9ARj6e/CVABRoIoqyc54zNXqhwQYs86vSYiv85KZtrPmYQ/ShQDnUBrkG5WdGaG5
+# nLGbsQAe79APT0JsyQq87kP6OnGlyE0mpTX9iV28hWIdMtKgK1TtmlfB2/oQzxm3
+# i0objwG2J5VT6LaJbVu8aNQj6ItRolb58KaAoNYes7wPD1N1KarqE3fk3oyBIa0H
+# EEcRrYc9B9F1vM/zZn4wggawMIIEmKADAgECAhAIrUCyYNKcTJ9ezam9k67ZMA0G
+# CSqGSIb3DQEBDAUAMGIxCzAJBgNVBAYTAlVTMRUwEwYDVQQKEwxEaWdpQ2VydCBJ
+# bmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5jb20xITAfBgNVBAMTGERpZ2lDZXJ0
+# IFRydXN0ZWQgUm9vdCBHNDAeFw0yMTA0MjkwMDAwMDBaFw0zNjA0MjgyMzU5NTla
+# MGkxCzAJBgNVBAYTAlVTMRcwFQYDVQQKEw5EaWdpQ2VydCwgSW5jLjFBMD8GA1UE
+# AxM4RGlnaUNlcnQgVHJ1c3RlZCBHNCBDb2RlIFNpZ25pbmcgUlNBNDA5NiBTSEEz
+# ODQgMjAyMSBDQTEwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQDVtC9C
+# 0CiteLdd1TlZG7GIQvUzjOs9gZdwxbvEhSYwn6SOaNhc9es0JAfhS0/TeEP0F9ce
+# 2vnS1WcaUk8OoVf8iJnBkcyBAz5NcCRks43iCH00fUyAVxJrQ5qZ8sU7H/Lvy0da
+# E6ZMswEgJfMQ04uy+wjwiuCdCcBlp/qYgEk1hz1RGeiQIXhFLqGfLOEYwhrMxe6T
+# SXBCMo/7xuoc82VokaJNTIIRSFJo3hC9FFdd6BgTZcV/sk+FLEikVoQ11vkunKoA
+# FdE3/hoGlMJ8yOobMubKwvSnowMOdKWvObarYBLj6Na59zHh3K3kGKDYwSNHR7Oh
+# D26jq22YBoMbt2pnLdK9RBqSEIGPsDsJ18ebMlrC/2pgVItJwZPt4bRc4G/rJvmM
+# 1bL5OBDm6s6R9b7T+2+TYTRcvJNFKIM2KmYoX7BzzosmJQayg9Rc9hUZTO1i4F4z
+# 8ujo7AqnsAMrkbI2eb73rQgedaZlzLvjSFDzd5Ea/ttQokbIYViY9XwCFjyDKK05
+# huzUtw1T0PhH5nUwjewwk3YUpltLXXRhTT8SkXbev1jLchApQfDVxW0mdmgRQRNY
+# mtwmKwH0iU1Z23jPgUo+QEdfyYFQc4UQIyFZYIpkVMHMIRroOBl8ZhzNeDhFMJlP
+# /2NPTLuqDQhTQXxYPUez+rbsjDIJAsxsPAxWEQIDAQABo4IBWTCCAVUwEgYDVR0T
+# AQH/BAgwBgEB/wIBADAdBgNVHQ4EFgQUaDfg67Y7+F8Rhvv+YXsIiGX0TkIwHwYD
+# VR0jBBgwFoAU7NfjgtJxXWRM3y5nP+e6mK4cD08wDgYDVR0PAQH/BAQDAgGGMBMG
+# A1UdJQQMMAoGCCsGAQUFBwMDMHcGCCsGAQUFBwEBBGswaTAkBggrBgEFBQcwAYYY
+# aHR0cDovL29jc3AuZGlnaWNlcnQuY29tMEEGCCsGAQUFBzAChjVodHRwOi8vY2Fj
+# ZXJ0cy5kaWdpY2VydC5jb20vRGlnaUNlcnRUcnVzdGVkUm9vdEc0LmNydDBDBgNV
+# HR8EPDA6MDigNqA0hjJodHRwOi8vY3JsMy5kaWdpY2VydC5jb20vRGlnaUNlcnRU
+# cnVzdGVkUm9vdEc0LmNybDAcBgNVHSAEFTATMAcGBWeBDAEDMAgGBmeBDAEEATAN
+# BgkqhkiG9w0BAQwFAAOCAgEAOiNEPY0Idu6PvDqZ01bgAhql+Eg08yy25nRm95Ry
+# sQDKr2wwJxMSnpBEn0v9nqN8JtU3vDpdSG2V1T9J9Ce7FoFFUP2cvbaF4HZ+N3HL
+# IvdaqpDP9ZNq4+sg0dVQeYiaiorBtr2hSBh+3NiAGhEZGM1hmYFW9snjdufE5Btf
+# Q/g+lP92OT2e1JnPSt0o618moZVYSNUa/tcnP/2Q0XaG3RywYFzzDaju4ImhvTnh
+# OE7abrs2nfvlIVNaw8rpavGiPttDuDPITzgUkpn13c5UbdldAhQfQDN8A+KVssIh
+# dXNSy0bYxDQcoqVLjc1vdjcshT8azibpGL6QB7BDf5WIIIJw8MzK7/0pNVwfiThV
+# 9zeKiwmhywvpMRr/LhlcOXHhvpynCgbWJme3kuZOX956rEnPLqR0kq3bPKSchh/j
+# wVYbKyP/j7XqiHtwa+aguv06P0WmxOgWkVKLQcBIhEuWTatEQOON8BUozu3xGFYH
+# Ki8QxAwIZDwzj64ojDzLj4gLDb879M4ee47vtevLt/B3E+bnKD+sEq6lLyJsQfmC
+# XBVmzGwOysWGw/YmMwwHS6DTBwJqakAwSEs0qFEgu60bhQjiWQ1tygVQK+pKHJ6l
+# /aCnHwZ05/LWUpD9r4VIIflXO7ScA+2GRfS0YW6/aOImYIbqyK+p/pQd52MbOoZW
+# eE4wggd3MIIFX6ADAgECAhAHHxQbizANJfMU6yMM0NHdMA0GCSqGSIb3DQEBCwUA
+# MGkxCzAJBgNVBAYTAlVTMRcwFQYDVQQKEw5EaWdpQ2VydCwgSW5jLjFBMD8GA1UE
+# AxM4RGlnaUNlcnQgVHJ1c3RlZCBHNCBDb2RlIFNpZ25pbmcgUlNBNDA5NiBTSEEz
+# ODQgMjAyMSBDQTEwHhcNMjIwMTE3MDAwMDAwWhcNMjUwMTE1MjM1OTU5WjB8MQsw
+# CQYDVQQGEwJVUzEPMA0GA1UECBMGT3JlZ29uMRIwEAYDVQQHEwlCZWF2ZXJ0b24x
+# IzAhBgNVBAoTGlB5dGhvbiBTb2Z0d2FyZSBGb3VuZGF0aW9uMSMwIQYDVQQDExpQ
+# eXRob24gU29mdHdhcmUgRm91bmRhdGlvbjCCAiIwDQYJKoZIhvcNAQEBBQADggIP
+# ADCCAgoCggIBAKgc0BTT+iKbtK6f2mr9pNMUTcAJxKdsuOiSYgDFfwhjQy89koM7
+# uP+QV/gwx8MzEt3c9tLJvDccVWQ8H7mVsk/K+X+IufBLCgUi0GGAZUegEAeRlSXx
+# xhYScr818ma8EvGIZdiSOhqjYc4KnfgfIS4RLtZSrDFG2tN16yS8skFa3IHyvWdb
+# D9PvZ4iYNAS4pjYDRjT/9uzPZ4Pan+53xZIcDgjiTwOh8VGuppxcia6a7xCyKoOA
+# GjvCyQsj5223v1/Ig7Dp9mGI+nh1E3IwmyTIIuVHyK6Lqu352diDY+iCMpk9Zanm
+# SjmB+GMVs+H/gOiofjjtf6oz0ki3rb7sQ8fTnonIL9dyGTJ0ZFYKeb6BLA66d2GA
+# LwxZhLe5WH4Np9HcyXHACkppsE6ynYjTOd7+jN1PRJahN1oERzTzEiV6nCO1M3U1
+# HbPTGyq52IMFSBM2/07WTJSbOeXjvYR7aUxK9/ZkJiacl2iZI7IWe7JKhHohqKuc
+# eQNyOzxTakLcRkzynvIrk33R9YVqtB4L6wtFxhUjvDnQg16xot2KVPdfyPAWd81w
+# tZADmrUtsZ9qG79x1hBdyOl4vUtVPECuyhCxaw+faVjumapPUnwo8ygflJJ74J+B
+# Yxf6UuD7m8yzsfXWkdv52DjL74TxzuFTLHPyARWCSCAbzn3ZIly+qIqDAgMBAAGj
+# ggIGMIICAjAfBgNVHSMEGDAWgBRoN+Drtjv4XxGG+/5hewiIZfROQjAdBgNVHQ4E
+# FgQUt/1Teh2XDuUj2WW3siYWJgkZHA8wDgYDVR0PAQH/BAQDAgeAMBMGA1UdJQQM
+# MAoGCCsGAQUFBwMDMIG1BgNVHR8Ega0wgaowU6BRoE+GTWh0dHA6Ly9jcmwzLmRp
+# Z2ljZXJ0LmNvbS9EaWdpQ2VydFRydXN0ZWRHNENvZGVTaWduaW5nUlNBNDA5NlNI
+# QTM4NDIwMjFDQTEuY3JsMFOgUaBPhk1odHRwOi8vY3JsNC5kaWdpY2VydC5jb20v
+# RGlnaUNlcnRUcnVzdGVkRzRDb2RlU2lnbmluZ1JTQTQwOTZTSEEzODQyMDIxQ0Ex
+# LmNybDA+BgNVHSAENzA1MDMGBmeBDAEEATApMCcGCCsGAQUFBwIBFhtodHRwOi8v
+# d3d3LmRpZ2ljZXJ0LmNvbS9DUFMwgZQGCCsGAQUFBwEBBIGHMIGEMCQGCCsGAQUF
+# BzABhhhodHRwOi8vb2NzcC5kaWdpY2VydC5jb20wXAYIKwYBBQUHMAKGUGh0dHA6
+# Ly9jYWNlcnRzLmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydFRydXN0ZWRHNENvZGVTaWdu
+# aW5nUlNBNDA5NlNIQTM4NDIwMjFDQTEuY3J0MAwGA1UdEwEB/wQCMAAwDQYJKoZI
+# hvcNAQELBQADggIBABxv4AeV/5ltkELHSC63fXAFYS5tadcWTiNc2rskrNLrfH1N
+# s0vgSZFoQxYBFKI159E8oQQ1SKbTEubZ/B9kmHPhprHya08+VVzxC88pOEvz68nA
+# 82oEM09584aILqYmj8Pj7h/kmZNzuEL7WiwFa/U1hX+XiWfLIJQsAHBla0i7QRF2
+# de8/VSF0XXFa2kBQ6aiTsiLyKPNbaNtbcucaUdn6vVUS5izWOXM95BSkFSKdE45O
+# q3FForNJXjBvSCpwcP36WklaHL+aHu1upIhCTUkzTHMh8b86WmjRUqbrnvdyR2yd
+# I5l1OqcMBjkpPpIV6wcc+KY/RH2xvVuuoHjlUjwq2bHiNoX+W1scCpnA8YTs2d50
+# jDHUgwUo+ciwpffH0Riq132NFmrH3r67VaN3TuBxjI8SIZM58WEDkbeoriDk3hxU
+# 8ZWV7b8AW6oyVBGfM06UgkfMb58h+tJPrFx8VI/WLq1dTqMfZOm5cuclMnUHs2uq
+# rRNtnV8UfidPBL4ZHkTcClQbCoz0UbLhkiDvIS00Dn+BBcxw/TKqVL4Oaz3bkMSs
+# M46LciTeucHY9ExRVt3zy7i149sd+F4QozPqn7FrSVHXmem3r7bjyHTxOgqxRCVa
+# 18Vtx7P/8bYSBeS+WHCKcliFCecspusCDSlnRUjZwyPdP0VHxaZg2unjHY3rMYIa
+# tjCCGrICAQEwfTBpMQswCQYDVQQGEwJVUzEXMBUGA1UEChMORGlnaUNlcnQsIElu
+# Yy4xQTA/BgNVBAMTOERpZ2lDZXJ0IFRydXN0ZWQgRzQgQ29kZSBTaWduaW5nIFJT
+# QTQwOTYgU0hBMzg0IDIwMjEgQ0ExAhAHHxQbizANJfMU6yMM0NHdMA0GCWCGSAFl
+# AwQCAQUAoIHIMBkGCSqGSIb3DQEJAzEMBgorBgEEAYI3AgEEMBwGCisGAQQBgjcC
+# AQsxDjAMBgorBgEEAYI3AgEVMC8GCSqGSIb3DQEJBDEiBCBnAZ6P7YvTwq0fbF62
+# o7E75R0LxsW5OtyYiFESQckLhjBcBgorBgEEAYI3AgEMMU4wTKBGgEQAQgB1AGkA
+# bAB0ADoAIABSAGUAbABlAGEAcwBlAF8AdgAzAC4AMQAyAC4ANABfADIAMAAyADQA
+# MAA2ADAANgAuADAAMaECgAAwDQYJKoZIhvcNAQEBBQAEggIAV29hYhi09QNyGtav
+# HZIo33y/iqXsIa4o88S5gzBa7Nnkwra0QLitSjvRfVbcFvq54Id+VIn00di4Nde0
+# maAUKPGXtTQL48esG/F/TLDOWd/jb9qCYHyNZYpJjKdXqI8IbyG6Pl05IMSas7wX
+# DHsK19ZEGuGrmKCAxh6JbFXADgeUbftg3i9UxpMnfSugZjjdKIdyVWlzUnpYkKuI
+# fpafwvNHfIYzfxOeV9CWsdqe34D6fRrEs8ZDEZSQl+Mw9aGaT39vuryFE1iKOzj0
+# uqrX/wN/wwu8oLWNC7JWE8SDG3eD0QLy+x7zEnlPkWsRV9nGOgrP9Khge0LgL+jP
+# Km8iDs7fSGEOB/7PPxAl8yshEULOZAhBhcsGeGs+kQrVzlqZ9WlrU1Z1cylpLWzX
+# Kkvs2DXD+zrplhpiVv6Gnn3YMBr4BKf0mXESTX9/BzIwvxlkhpv/BT0OWwrDlgPM
+# hNj8jA5r2/WSqCg15DYjJ0RlnCerC/ORhSbs7v/HjpmH3DhaICJF7tdyFSIFXgNV
+# W0GyQJMulQDEPd2+o+PNyAPElvGC3SYTjVnRLPcJTGhAt+VuHfnMG4HNkmyeU+nk
+# OAMShxEax6NLeRsjKqqABUgZb2g4FSmXzHy7HgQOPmCQMv8xH4m8u992YMLyxh5U
+# gGRUOUiAhrHXNZ6wG6T52NGQppehghc/MIIXOwYKKwYBBAGCNwMDATGCFyswghcn
+# BgkqhkiG9w0BBwKgghcYMIIXFAIBAzEPMA0GCWCGSAFlAwQCAQUAMHcGCyqGSIb3
+# DQEJEAEEoGgEZjBkAgEBBglghkgBhv1sBwEwMTANBglghkgBZQMEAgEFAAQg+eJt
+# Pwl5Hz89rrpf2qbsjNAUNlBq9SGjVuw+Erci2HcCEDPjoeI//+uRP30fqUoeHIAY
+# DzIwMjQwNjA2MTk1MDE0WqCCEwkwggbCMIIEqqADAgECAhAFRK/zlJ0IOaa/2z9f
+# 5WEWMA0GCSqGSIb3DQEBCwUAMGMxCzAJBgNVBAYTAlVTMRcwFQYDVQQKEw5EaWdp
+# Q2VydCwgSW5jLjE7MDkGA1UEAxMyRGlnaUNlcnQgVHJ1c3RlZCBHNCBSU0E0MDk2
+# IFNIQTI1NiBUaW1lU3RhbXBpbmcgQ0EwHhcNMjMwNzE0MDAwMDAwWhcNMzQxMDEz
+# MjM1OTU5WjBIMQswCQYDVQQGEwJVUzEXMBUGA1UEChMORGlnaUNlcnQsIEluYy4x
+# IDAeBgNVBAMTF0RpZ2lDZXJ0IFRpbWVzdGFtcCAyMDIzMIICIjANBgkqhkiG9w0B
+# AQEFAAOCAg8AMIICCgKCAgEAo1NFhx2DjlusPlSzI+DPn9fl0uddoQ4J3C9Io5d6
+# OyqcZ9xiFVjBqZMRp82qsmrdECmKHmJjadNYnDVxvzqX65RQjxwg6seaOy+WZuNp
+# 52n+W8PWKyAcwZeUtKVQgfLPywemMGjKg0La/H8JJJSkghraarrYO8pd3hkYhftF
+# 6g1hbJ3+cV7EBpo88MUueQ8bZlLjyNY+X9pD04T10Mf2SC1eRXWWdf7dEKEbg8G4
+# 5lKVtUfXeCk5a+B4WZfjRCtK1ZXO7wgX6oJkTf8j48qG7rSkIWRw69XloNpjsy7p
+# Be6q9iT1HbybHLK3X9/w7nZ9MZllR1WdSiQvrCuXvp/k/XtzPjLuUjT71Lvr1KAs
+# NJvj3m5kGQc3AZEPHLVRzapMZoOIaGK7vEEbeBlt5NkP4FhB+9ixLOFRr7StFQYU
+# 6mIIE9NpHnxkTZ0P387RXoyqq1AVybPKvNfEO2hEo6U7Qv1zfe7dCv95NBB+plwK
+# WEwAPoVpdceDZNZ1zY8SdlalJPrXxGshuugfNJgvOuprAbD3+yqG7HtSOKmYCaFx
+# smxxrz64b5bV4RAT/mFHCoz+8LbH1cfebCTwv0KCyqBxPZySkwS0aXAnDU+3tTbR
+# yV8IpHCj7ArxES5k4MsiK8rxKBMhSVF+BmbTO77665E42FEHypS34lCh8zrTioPL
+# QHsCAwEAAaOCAYswggGHMA4GA1UdDwEB/wQEAwIHgDAMBgNVHRMBAf8EAjAAMBYG
+# A1UdJQEB/wQMMAoGCCsGAQUFBwMIMCAGA1UdIAQZMBcwCAYGZ4EMAQQCMAsGCWCG
+# SAGG/WwHATAfBgNVHSMEGDAWgBS6FtltTYUvcyl2mi91jGogj57IbzAdBgNVHQ4E
+# FgQUpbbvE+fvzdBkodVWqWUxo97V40kwWgYDVR0fBFMwUTBPoE2gS4ZJaHR0cDov
+# L2NybDMuZGlnaWNlcnQuY29tL0RpZ2lDZXJ0VHJ1c3RlZEc0UlNBNDA5NlNIQTI1
+# NlRpbWVTdGFtcGluZ0NBLmNybDCBkAYIKwYBBQUHAQEEgYMwgYAwJAYIKwYBBQUH
+# MAGGGGh0dHA6Ly9vY3NwLmRpZ2ljZXJ0LmNvbTBYBggrBgEFBQcwAoZMaHR0cDov
+# L2NhY2VydHMuZGlnaWNlcnQuY29tL0RpZ2lDZXJ0VHJ1c3RlZEc0UlNBNDA5NlNI
+# QTI1NlRpbWVTdGFtcGluZ0NBLmNydDANBgkqhkiG9w0BAQsFAAOCAgEAgRrW3qCp
+# tZgXvHCNT4o8aJzYJf/LLOTN6l0ikuyMIgKpuM+AqNnn48XtJoKKcS8Y3U623mzX
+# 4WCcK+3tPUiOuGu6fF29wmE3aEl3o+uQqhLXJ4Xzjh6S2sJAOJ9dyKAuJXglnSoF
+# eoQpmLZXeY/bJlYrsPOnvTcM2Jh2T1a5UsK2nTipgedtQVyMadG5K8TGe8+c+nji
+# kxp2oml101DkRBK+IA2eqUTQ+OVJdwhaIcW0z5iVGlS6ubzBaRm6zxbygzc0brBB
+# Jt3eWpdPM43UjXd9dUWhpVgmagNF3tlQtVCMr1a9TMXhRsUo063nQwBw3syYnhmJ
+# A+rUkTfvTVLzyWAhxFZH7doRS4wyw4jmWOK22z75X7BC1o/jF5HRqsBV44a/rCcs
+# QdCaM0qoNtS5cpZ+l3k4SF/Kwtw9Mt911jZnWon49qfH5U81PAC9vpwqbHkB3NpE
+# 5jreODsHXjlY9HxzMVWggBHLFAx+rrz+pOt5Zapo1iLKO+uagjVXKBbLafIymrLS
+# 2Dq4sUaGa7oX/cR3bBVsrquvczroSUa31X/MtjjA2Owc9bahuEMs305MfR5ocMB3
+# CtQC4Fxguyj/OOVSWtasFyIjTvTs0xf7UGv/B3cfcZdEQcm4RtNsMnxYL2dHZeUb
+# c7aZ+WssBkbvQR7w8F/g29mtkIBEr4AQQYowggauMIIElqADAgECAhAHNje3JFR8
+# 2Ees/ShmKl5bMA0GCSqGSIb3DQEBCwUAMGIxCzAJBgNVBAYTAlVTMRUwEwYDVQQK
+# EwxEaWdpQ2VydCBJbmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5jb20xITAfBgNV
+# BAMTGERpZ2lDZXJ0IFRydXN0ZWQgUm9vdCBHNDAeFw0yMjAzMjMwMDAwMDBaFw0z
+# NzAzMjIyMzU5NTlaMGMxCzAJBgNVBAYTAlVTMRcwFQYDVQQKEw5EaWdpQ2VydCwg
+# SW5jLjE7MDkGA1UEAxMyRGlnaUNlcnQgVHJ1c3RlZCBHNCBSU0E0MDk2IFNIQTI1
+# NiBUaW1lU3RhbXBpbmcgQ0EwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoIC
+# AQDGhjUGSbPBPXJJUVXHJQPE8pE3qZdRodbSg9GeTKJtoLDMg/la9hGhRBVCX6SI
+# 82j6ffOciQt/nR+eDzMfUBMLJnOWbfhXqAJ9/UO0hNoR8XOxs+4rgISKIhjf69o9
+# xBd/qxkrPkLcZ47qUT3w1lbU5ygt69OxtXXnHwZljZQp09nsad/ZkIdGAHvbREGJ
+# 3HxqV3rwN3mfXazL6IRktFLydkf3YYMZ3V+0VAshaG43IbtArF+y3kp9zvU5Emfv
+# DqVjbOSmxR3NNg1c1eYbqMFkdECnwHLFuk4fsbVYTXn+149zk6wsOeKlSNbwsDET
+# qVcplicu9Yemj052FVUmcJgmf6AaRyBD40NjgHt1biclkJg6OBGz9vae5jtb7IHe
+# IhTZgirHkr+g3uM+onP65x9abJTyUpURK1h0QCirc0PO30qhHGs4xSnzyqqWc0Jo
+# n7ZGs506o9UD4L/wojzKQtwYSH8UNM/STKvvmz3+DrhkKvp1KCRB7UK/BZxmSVJQ
+# 9FHzNklNiyDSLFc1eSuo80VgvCONWPfcYd6T/jnA+bIwpUzX6ZhKWD7TA4j+s4/T
+# Xkt2ElGTyYwMO1uKIqjBJgj5FBASA31fI7tk42PgpuE+9sJ0sj8eCXbsq11GdeJg
+# o1gJASgADoRU7s7pXcheMBK9Rp6103a50g5rmQzSM7TNsQIDAQABo4IBXTCCAVkw
+# EgYDVR0TAQH/BAgwBgEB/wIBADAdBgNVHQ4EFgQUuhbZbU2FL3MpdpovdYxqII+e
+# yG8wHwYDVR0jBBgwFoAU7NfjgtJxXWRM3y5nP+e6mK4cD08wDgYDVR0PAQH/BAQD
+# AgGGMBMGA1UdJQQMMAoGCCsGAQUFBwMIMHcGCCsGAQUFBwEBBGswaTAkBggrBgEF
+# BQcwAYYYaHR0cDovL29jc3AuZGlnaWNlcnQuY29tMEEGCCsGAQUFBzAChjVodHRw
+# Oi8vY2FjZXJ0cy5kaWdpY2VydC5jb20vRGlnaUNlcnRUcnVzdGVkUm9vdEc0LmNy
+# dDBDBgNVHR8EPDA6MDigNqA0hjJodHRwOi8vY3JsMy5kaWdpY2VydC5jb20vRGln
+# aUNlcnRUcnVzdGVkUm9vdEc0LmNybDAgBgNVHSAEGTAXMAgGBmeBDAEEAjALBglg
+# hkgBhv1sBwEwDQYJKoZIhvcNAQELBQADggIBAH1ZjsCTtm+YqUQiAX5m1tghQuGw
+# GC4QTRPPMFPOvxj7x1Bd4ksp+3CKDaopafxpwc8dB+k+YMjYC+VcW9dth/qEICU0
+# MWfNthKWb8RQTGIdDAiCqBa9qVbPFXONASIlzpVpP0d3+3J0FNf/q0+KLHqrhc1D
+# X+1gtqpPkWaeLJ7giqzl/Yy8ZCaHbJK9nXzQcAp876i8dU+6WvepELJd6f8oVInw
+# 1YpxdmXazPByoyP6wCeCRK6ZJxurJB4mwbfeKuv2nrF5mYGjVoarCkXJ38SNoOeY
+# +/umnXKvxMfBwWpx2cYTgAnEtp/Nh4cku0+jSbl3ZpHxcpzpSwJSpzd+k1OsOx0I
+# SQ+UzTl63f8lY5knLD0/a6fxZsNBzU+2QJshIUDQtxMkzdwdeDrknq3lNHGS1yZr
+# 5Dhzq6YBT70/O3itTK37xJV77QpfMzmHQXh6OOmc4d0j/R0o08f56PGYX/sr2H7y
+# Rp11LB4nLCbbbxV7HhmLNriT1ObyF5lZynDwN7+YAN8gFk8n+2BnFqFmut1VwDop
+# hrCYoCvtlUG3OtUVmDG0YgkPCr2B2RP+v6TR81fZvAT6gt4y3wSJ8ADNXcL50CN/
+# AAvkdgIm2fBldkKmKYcJRyvmfxqkhQ/8mJb2VVQrH4D6wPIOK+XW+6kvRBVK5xMO
+# Hds3OBqhK/bt1nz8MIIFjTCCBHWgAwIBAgIQDpsYjvnQLefv21DiCEAYWjANBgkq
+# hkiG9w0BAQwFADBlMQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5j
+# MRkwFwYDVQQLExB3d3cuZGlnaWNlcnQuY29tMSQwIgYDVQQDExtEaWdpQ2VydCBB
+# c3N1cmVkIElEIFJvb3QgQ0EwHhcNMjIwODAxMDAwMDAwWhcNMzExMTA5MjM1OTU5
+# WjBiMQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQL
+# ExB3d3cuZGlnaWNlcnQuY29tMSEwHwYDVQQDExhEaWdpQ2VydCBUcnVzdGVkIFJv
+# b3QgRzQwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQC/5pBzaN675F1K
+# PDAiMGkz7MKnJS7JIT3yithZwuEppz1Yq3aaza57G4QNxDAf8xukOBbrVsaXbR2r
+# snnyyhHS5F/WBTxSD1Ifxp4VpX6+n6lXFllVcq9ok3DCsrp1mWpzMpTREEQQLt+C
+# 8weE5nQ7bXHiLQwb7iDVySAdYyktzuxeTsiT+CFhmzTrBcZe7FsavOvJz82sNEBf
+# sXpm7nfISKhmV1efVFiODCu3T6cw2Vbuyntd463JT17lNecxy9qTXtyOj4DatpGY
+# QJB5w3jHtrHEtWoYOAMQjdjUN6QuBX2I9YI+EJFwq1WCQTLX2wRzKm6RAXwhTNS8
+# rhsDdV14Ztk6MUSaM0C/CNdaSaTC5qmgZ92kJ7yhTzm1EVgX9yRcRo9k98FpiHaY
+# dj1ZXUJ2h4mXaXpI8OCiEhtmmnTK3kse5w5jrubU75KSOp493ADkRSWJtppEGSt+
+# wJS00mFt6zPZxd9LBADMfRyVw4/3IbKyEbe7f/LVjHAsQWCqsWMYRJUadmJ+9oCw
+# ++hkpjPRiQfhvbfmQ6QYuKZ3AeEPlAwhHbJUKSWJbOUOUlFHdL4mrLZBdd56rF+N
+# P8m800ERElvlEFDrMcXKchYiCd98THU/Y+whX8QgUWtvsauGi0/C1kVfnSD8oR7F
+# wI+isX4KJpn15GkvmB0t9dmpsh3lGwIDAQABo4IBOjCCATYwDwYDVR0TAQH/BAUw
+# AwEB/zAdBgNVHQ4EFgQU7NfjgtJxXWRM3y5nP+e6mK4cD08wHwYDVR0jBBgwFoAU
+# Reuir/SSy4IxLVGLp6chnfNtyA8wDgYDVR0PAQH/BAQDAgGGMHkGCCsGAQUFBwEB
+# BG0wazAkBggrBgEFBQcwAYYYaHR0cDovL29jc3AuZGlnaWNlcnQuY29tMEMGCCsG
+# AQUFBzAChjdodHRwOi8vY2FjZXJ0cy5kaWdpY2VydC5jb20vRGlnaUNlcnRBc3N1
+# cmVkSURSb290Q0EuY3J0MEUGA1UdHwQ+MDwwOqA4oDaGNGh0dHA6Ly9jcmwzLmRp
+# Z2ljZXJ0LmNvbS9EaWdpQ2VydEFzc3VyZWRJRFJvb3RDQS5jcmwwEQYDVR0gBAow
+# CDAGBgRVHSAAMA0GCSqGSIb3DQEBDAUAA4IBAQBwoL9DXFXnOF+go3QbPbYW1/e/
+# Vwe9mqyhhyzshV6pGrsi+IcaaVQi7aSId229GhT0E0p6Ly23OO/0/4C5+KH38nLe
+# JLxSA8hO0Cre+i1Wz/n096wwepqLsl7Uz9FDRJtDIeuWcqFItJnLnU+nBgMTdydE
+# 1Od/6Fmo8L8vC6bp8jQ87PcDx4eo0kxAGTVGamlUsLihVo7spNU96LHc/RzY9Hda
+# XFSMb++hUD38dglohJ9vytsgjTVgHAIDyyCwrFigDkBjxZgiwbJZ9VVrzyerbHbO
+# byMt9H5xaiNrIv8SuFQtJ37YOtnwtoeW/VvRXKwYw02fc7cBqZ9Xql4o4rmUMYID
+# djCCA3ICAQEwdzBjMQswCQYDVQQGEwJVUzEXMBUGA1UEChMORGlnaUNlcnQsIElu
+# Yy4xOzA5BgNVBAMTMkRpZ2lDZXJ0IFRydXN0ZWQgRzQgUlNBNDA5NiBTSEEyNTYg
+# VGltZVN0YW1waW5nIENBAhAFRK/zlJ0IOaa/2z9f5WEWMA0GCWCGSAFlAwQCAQUA
+# oIHRMBoGCSqGSIb3DQEJAzENBgsqhkiG9w0BCRABBDAcBgkqhkiG9w0BCQUxDxcN
+# MjQwNjA2MTk1MDE0WjArBgsqhkiG9w0BCRACDDEcMBowGDAWBBRm8CsywsLJD4Jd
+# zqqKycZPGZzPQDAvBgkqhkiG9w0BCQQxIgQgUvswt0fWRoofHUAuTE0/8V9tLmHP
+# zr/l2RTobZjBdqYwNwYLKoZIhvcNAQkQAi8xKDAmMCQwIgQg0vbkbe10IszR1EBX
+# aEE2b4KK2lWarjMWr00amtQMeCgwDQYJKoZIhvcNAQEBBQAEggIAc7/uG/S8kf0i
+# 2kaDQkE8NSfiXCYfN7z/2sgi6RNrkipvs/KTWfEKuMbhu9qWjjusZFgywn/IrZqw
+# td4Js1kmaN+HJ02t/HXYUCr+KTJye4mDaBGvaXXHllCqsK7bhsJxJYE0uYiL03MP
+# g64jyu9WdJD3N26MW/DkO6HTVhYzRzjafbAKbrr8KCvaFan1KZERzYwbA8XVjm88
+# HOodLCA9h+91Iqdc+uSz3Sg9/+Ns4zCp4BonvnsPYTlWTitiB5cpfPe/v4lBvCNu
+# x0ha6whvKMdRLZJgXsiDXo2NwwB55kkWEBwD3a1RnBJQmyJxFEGpSXOrhmdcEWPg
+# fjoHVIfowKBrIgINdWJbvIu+pLzQRMkVhuJzB32xpiZBIvbzkPETYQMOmKIu40I9
+# 5EAL0xNakPxYiT3nTkncn6woLOhiOXFm7crE+gO4IzDNauYuT9Vfe36K1CqtuYSy
+# JesLIey9Z81OQqOo6n2/lW110MKMEV2PkPU7YW/bYO2uKsZ3OAjUWr63nMT+M2wk
+# VdUAcqm0QdZsELY75Q3ekRxHje/B9ePP4Q4RMQGOZvmgqdtEeFhsmRwufR4fzfqx
+# WMttmOHelTd8Sc0sfA9B+1dxtiC9GFn3de5/o+T2s/jQn6eNp2hvlCqGV0iFzSQp
+# InPTBa9Na/+5UeXZ3NBWRvarfZ62TVM=
+# SIG # End signature block
diff --git a/.venv/Scripts/activate b/.venv/Scripts/activate
new file mode 100644
index 0000000..cffeeaa
--- /dev/null
+++ b/.venv/Scripts/activate
@@ -0,0 +1,70 @@
+# This file must be used with "source bin/activate" *from bash*
+# You cannot run it directly
+
+deactivate () {
+    # reset old environment variables
+    if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
+        PATH="${_OLD_VIRTUAL_PATH:-}"
+        export PATH
+        unset _OLD_VIRTUAL_PATH
+    fi
+    if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
+        PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
+        export PYTHONHOME
+        unset _OLD_VIRTUAL_PYTHONHOME
+    fi
+
+    # Call hash to forget past commands. Without forgetting
+    # past commands the $PATH changes we made may not be respected
+    hash -r 2> /dev/null
+
+    if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
+        PS1="${_OLD_VIRTUAL_PS1:-}"
+        export PS1
+        unset _OLD_VIRTUAL_PS1
+    fi
+
+    unset VIRTUAL_ENV
+    unset VIRTUAL_ENV_PROMPT
+    if [ ! "${1:-}" = "nondestructive" ] ; then
+    # Self destruct!
+        unset -f deactivate
+    fi
+}
+
+# unset irrelevant variables
+deactivate nondestructive
+
+# on Windows, a path can contain colons and backslashes and has to be converted:
+if [ "${OSTYPE:-}" = "cygwin" ] || [ "${OSTYPE:-}" = "msys" ] ; then
+    # transform D:\path\to\venv to /d/path/to/venv on MSYS
+    # and to /cygdrive/d/path/to/venv on Cygwin
+    export VIRTUAL_ENV=$(cygpath "c:\Users\valen\OneDrive\Documents\Jordy Projects\AlgorithmAudit\python_synthpop\.venv")
+else
+    # use the path as-is
+    export VIRTUAL_ENV="c:\Users\valen\OneDrive\Documents\Jordy Projects\AlgorithmAudit\python_synthpop\.venv"
+fi
+
+_OLD_VIRTUAL_PATH="$PATH"
+PATH="$VIRTUAL_ENV/Scripts:$PATH"
+export PATH
+
+# unset PYTHONHOME if set
+# this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
+# could use `if (set -u; : $PYTHONHOME) ;` in bash
+if [ -n "${PYTHONHOME:-}" ] ; then
+    _OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
+    unset PYTHONHOME
+fi
+
+if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
+    _OLD_VIRTUAL_PS1="${PS1:-}"
+    PS1="(.venv) ${PS1:-}"
+    export PS1
+    VIRTUAL_ENV_PROMPT="(.venv) "
+    export VIRTUAL_ENV_PROMPT
+fi
+
+# Call hash to forget past commands. Without forgetting
+# past commands the $PATH changes we made may not be respected
+hash -r 2> /dev/null
diff --git a/.venv/Scripts/activate.bat b/.venv/Scripts/activate.bat
new file mode 100644
index 0000000..2d6b787
--- /dev/null
+++ b/.venv/Scripts/activate.bat
@@ -0,0 +1,34 @@
+@echo off
+
+rem This file is UTF-8 encoded, so we need to update the current code page while executing it
+for /f "tokens=2 delims=:." %%a in ('"%SystemRoot%\System32\chcp.com"') do (
+    set _OLD_CODEPAGE=%%a
+)
+if defined _OLD_CODEPAGE (
+    "%SystemRoot%\System32\chcp.com" 65001 > nul
+)
+
+set VIRTUAL_ENV=c:\Users\valen\OneDrive\Documents\Jordy Projects\AlgorithmAudit\python_synthpop\.venv
+
+if not defined PROMPT set PROMPT=$P$G
+
+if defined _OLD_VIRTUAL_PROMPT set PROMPT=%_OLD_VIRTUAL_PROMPT%
+if defined _OLD_VIRTUAL_PYTHONHOME set PYTHONHOME=%_OLD_VIRTUAL_PYTHONHOME%
+
+set _OLD_VIRTUAL_PROMPT=%PROMPT%
+set PROMPT=(.venv) %PROMPT%
+
+if defined PYTHONHOME set _OLD_VIRTUAL_PYTHONHOME=%PYTHONHOME%
+set PYTHONHOME=
+
+if defined _OLD_VIRTUAL_PATH set PATH=%_OLD_VIRTUAL_PATH%
+if not defined _OLD_VIRTUAL_PATH set _OLD_VIRTUAL_PATH=%PATH%
+
+set PATH=%VIRTUAL_ENV%\Scripts;%PATH%
+set VIRTUAL_ENV_PROMPT=(.venv) 
+
+:END
+if defined _OLD_CODEPAGE (
+    "%SystemRoot%\System32\chcp.com" %_OLD_CODEPAGE% > nul
+    set _OLD_CODEPAGE=
+)
diff --git a/.venv/Scripts/deactivate.bat b/.venv/Scripts/deactivate.bat
new file mode 100644
index 0000000..62a39a7
--- /dev/null
+++ b/.venv/Scripts/deactivate.bat
@@ -0,0 +1,22 @@
+@echo off
+
+if defined _OLD_VIRTUAL_PROMPT (
+    set "PROMPT=%_OLD_VIRTUAL_PROMPT%"
+)
+set _OLD_VIRTUAL_PROMPT=
+
+if defined _OLD_VIRTUAL_PYTHONHOME (
+    set "PYTHONHOME=%_OLD_VIRTUAL_PYTHONHOME%"
+    set _OLD_VIRTUAL_PYTHONHOME=
+)
+
+if defined _OLD_VIRTUAL_PATH (
+    set "PATH=%_OLD_VIRTUAL_PATH%"
+)
+
+set _OLD_VIRTUAL_PATH=
+
+set VIRTUAL_ENV=
+set VIRTUAL_ENV_PROMPT=
+
+:END
diff --git a/.venv/Scripts/f2py.exe b/.venv/Scripts/f2py.exe
new file mode 100644
index 0000000..dd6da59
Binary files /dev/null and b/.venv/Scripts/f2py.exe differ
diff --git a/.venv/Scripts/numpy-config.exe b/.venv/Scripts/numpy-config.exe
new file mode 100644
index 0000000..69d7106
Binary files /dev/null and b/.venv/Scripts/numpy-config.exe differ
diff --git a/.venv/Scripts/pip.exe b/.venv/Scripts/pip.exe
new file mode 100644
index 0000000..d2007fb
Binary files /dev/null and b/.venv/Scripts/pip.exe differ
diff --git a/.venv/Scripts/pip3.12.exe b/.venv/Scripts/pip3.12.exe
new file mode 100644
index 0000000..d2007fb
Binary files /dev/null and b/.venv/Scripts/pip3.12.exe differ
diff --git a/.venv/Scripts/pip3.exe b/.venv/Scripts/pip3.exe
new file mode 100644
index 0000000..d2007fb
Binary files /dev/null and b/.venv/Scripts/pip3.exe differ
diff --git a/.venv/Scripts/py.test.exe b/.venv/Scripts/py.test.exe
new file mode 100644
index 0000000..296a973
Binary files /dev/null and b/.venv/Scripts/py.test.exe differ
diff --git a/.venv/Scripts/pytest.exe b/.venv/Scripts/pytest.exe
new file mode 100644
index 0000000..296a973
Binary files /dev/null and b/.venv/Scripts/pytest.exe differ
diff --git a/.venv/Scripts/python.exe b/.venv/Scripts/python.exe
new file mode 100644
index 0000000..53121ae
Binary files /dev/null and b/.venv/Scripts/python.exe differ
diff --git a/.venv/Scripts/pythonw.exe b/.venv/Scripts/pythonw.exe
new file mode 100644
index 0000000..a09f6e9
Binary files /dev/null and b/.venv/Scripts/pythonw.exe differ
diff --git a/example_notebooks/01_missing_data_handler_example.ipynb b/example_notebooks/01_missing_data_handler_example.ipynb
new file mode 100644
index 0000000..a27c58f
--- /dev/null
+++ b/example_notebooks/01_missing_data_handler_example.ipynb
@@ -0,0 +1,127 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from datetime import  timedelta\n",
+    "from synthpop import MissingDataHandler"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dates = pd.date_range(\"2023-01-01\", periods=50, freq=\"D\")\n",
+    "bool_values = np.random.choice([True, False], size=50)\n",
+    "timedeltas = timedeltas = [timedelta(days=int(i)) for i in np.random.randint(1, 100, 50)]\n",
+    "df_custom = pd.DataFrame({\n",
+    "    \"numeric_col1\": np.random.normal(50, 10, 50),\n",
+    "    \"numeric_col2\": np.random.randint(0, 100, 50),\n",
+    "    \"categorical_col\": np.random.choice([\"Red\", \"Green\", \"Blue\"], size=50),\n",
+    "    \"boolean_col\": bool_values,\n",
+    "    \"datetime_col\": dates,\n",
+    "    \"timedelta_col\": timedeltas,\n",
+    "    \"float_col\": np.random.uniform(0.0, 1.0, 50)\n",
+    "})\n",
+    "\n",
+    "\n",
+    "df = df_custom.copy()\n",
+    "df.head()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(42)  # For reproducibility\n",
+    "\n",
+    "def introduce_missingness(dataframe, missing_frac=0.1):\n",
+    "    \"\"\"Randomly set a fraction of each column's values to NaN.\"\"\"\n",
+    "    df_with_nans = dataframe.copy()\n",
+    "    rows = len(df_with_nans)\n",
+    "    for col in df_with_nans.columns:\n",
+    "        n_missing = int(rows * missing_frac)\n",
+    "        missing_indices = np.random.choice(df_with_nans.index, n_missing, replace=False)\n",
+    "        df_with_nans.loc[missing_indices, col] = np.nan\n",
+    "    return df_with_nans\n",
+    "\n",
+    "df_missing = introduce_missingness(df, missing_frac=0.2)  # 20% missingness\n",
+    "df_missing.head(10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "md_handler = MissingDataHandler()\n",
+    "\n",
+    "# Check the data types\n",
+    "column_dtypes = md_handler.get_column_dtypes(df_missing)\n",
+    "print(\"Column Data Types:\", column_dtypes)\n",
+    "\n",
+    "# Detect missingness\n",
+    "missingness_dict = md_handler.detect_missingness(df_missing)\n",
+    "print(\"Detected Missingness Type:\", missingness_dict)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_imputed = md_handler.apply_imputation(df_missing, missingness_dict)\n",
+    "\n",
+    "print(\"Before Imputation:\\n\", df_missing.head(10))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"\\nAfter Imputation:\\n\", df_imputed.head(10))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "AAdev",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/example_notebooks/02_data_processor_example.ipynb b/example_notebooks/02_data_processor_example.ipynb
new file mode 100644
index 0000000..609ab40
--- /dev/null
+++ b/example_notebooks/02_data_processor_example.ipynb
@@ -0,0 +1,120 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from datetime import  timedelta\n",
+    "from synthpop import DataProcessor"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Sample data generation using the provided data\n",
+    "dates = pd.date_range(\"2023-01-01\", periods=50, freq=\"D\")\n",
+    "bool_values = np.random.choice([True, False], size=50)\n",
+    "timedeltas = [timedelta(days=int(i)) for i in np.random.randint(1, 100, 50)]\n",
+    "\n",
+    "df_custom = pd.DataFrame({\n",
+    "    \"numeric_col1\": np.random.normal(50, 10, 50),\n",
+    "    \"numeric_col2\": np.random.randint(0, 100, 50),\n",
+    "    \"categorical_col\": np.random.choice([\"Red\", \"Green\", \"Blue\"], size=50),\n",
+    "    \"boolean_col\": bool_values,\n",
+    "    \"datetime_col\": dates,\n",
+    "    \"timedelta_col\": timedeltas,\n",
+    "    \"float_col\": np.random.uniform(0.0, 1.0, 50)\n",
+    "})\n",
+    "\n",
+    "df = df_custom.copy()\n",
+    "print(\"Original Data:\")\n",
+    "display(df.head())\n",
+    "\n",
+    "# Define metadata for each column (update these types as needed)\n",
+    "metadata = {\n",
+    "    \"numeric_col1\": \"numerical\",\n",
+    "    \"numeric_col2\": \"numerical\",\n",
+    "    \"categorical_col\": \"categorical\",\n",
+    "    \"boolean_col\": \"boolean\",\n",
+    "    \"datetime_col\": \"datetime\",\n",
+    "    \"timedelta_col\": \"timedelta\",\n",
+    "    \"float_col\": \"numerical\"\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Instantiate the DataProcessor with the metadata\n",
+    "processor = DataProcessor(metadata)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Preprocess the data: transforms raw data into a numerical format\n",
+    "processed_data = processor.preprocess(df)\n",
+    "print(\"Processed Data:\")\n",
+    "display(processed_data.head())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Simulate synthetic data generation by copying the processed data\n",
+    "# (Replace this step with your synthetic data generation method if available)\n",
+    "synthetic_data = processed_data.copy()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Postprocess the synthetic data to revert it back to its original data types\n",
+    "#the post processing makes sure to have the columns in their original order. \n",
+    "recovered_data = processor.postprocess(synthetic_data)\n",
+    "print(\"Recovered Data:\")\n",
+    "display(recovered_data.head())"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "AAdev",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/example_notebooks/03_gaussian_copula.ipynb b/example_notebooks/03_gaussian_copula.ipynb
new file mode 100644
index 0000000..49bcc03
--- /dev/null
+++ b/example_notebooks/03_gaussian_copula.ipynb
@@ -0,0 +1,123 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from datetime import  timedelta\n",
+    "from synthpop import DataProcessor, GaussianCopulaMethod, MissingDataHandler"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Sample Data Generation with 100 observations\n",
+    "dates = pd.date_range(\"2023-01-01\", periods=100, freq=\"D\")\n",
+    "bool_values = np.random.choice([True, False], size=100)\n",
+    "timedeltas = [timedelta(days=int(i)) for i in np.random.randint(1, 100, 100)]\n",
+    "\n",
+    "df_custom = pd.DataFrame({\n",
+    "    \"numeric_col1\": np.random.normal(50, 10, 100),\n",
+    "    \"numeric_col2\": np.random.randint(0, 100, 100),\n",
+    "    \"categorical_col\": np.random.choice([\"Red\", \"Green\", \"Blue\"], size=100),\n",
+    "    \"boolean_col\": bool_values,\n",
+    "    \"datetime_col\": dates,\n",
+    "    \"timedelta_col\": timedeltas,\n",
+    "    \"float_col\": np.random.uniform(0.0, 1.0, 100)\n",
+    "})\n",
+    "\n",
+    "df = df_custom.copy()\n",
+    "print(\"Original Data:\")\n",
+    "display(df.head())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#get the metadata from df \n",
+    "metadata = MissingDataHandler.get_column_dtypes(df)\n",
+    "print(metadata)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Instantiate DataProcessor and preprocess the data\n",
+    "processor = DataProcessor(metadata)\n",
+    "processed_data = processor.preprocess(df)\n",
+    "print(\"Processed Data:\")\n",
+    "display(processed_data.head())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Instantiate GaussianCopulaMethod with the same metadata\n",
+    "gaussian_copula = GaussianCopulaMethod(metadata)\n",
+    "# Fit the Gaussian Copula model on the processed data\n",
+    "gaussian_copula.fit(processed_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Generate synthetic processed data (e.g., 100 synthetic observations)\n",
+    "synthetic_processed = gaussian_copula.sample(100)\n",
+    "print(\"Synthetic Processed Data:\")\n",
+    "display(synthetic_processed.head())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Postprocess the synthetic data back to the original format\n",
+    "synthetic_data = processor.postprocess(synthetic_processed)\n",
+    "print(\"Synthetic Data in Original Format:\")\n",
+    "display(synthetic_data.head())"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "AAdev",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/example_notebooks/04_cart_method.ipynb b/example_notebooks/04_cart_method.ipynb
new file mode 100644
index 0000000..5896447
--- /dev/null
+++ b/example_notebooks/04_cart_method.ipynb
@@ -0,0 +1,122 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from datetime import  timedelta\n",
+    "from synthpop import DataProcessor, CARTMethod, MissingDataHandler"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Sample Data Generation with 100 observations\n",
+    "dates = pd.date_range(\"2023-01-01\", periods=100, freq=\"D\")\n",
+    "bool_values = np.random.choice([True, False], size=100)\n",
+    "timedeltas = [timedelta(days=int(i)) for i in np.random.randint(1, 100, 100)]\n",
+    "\n",
+    "df_custom = pd.DataFrame({\n",
+    "    \"numeric_col1\": np.random.normal(50, 10, 100),\n",
+    "    \"numeric_col2\": np.random.randint(0, 100, 100),\n",
+    "    \"categorical_col\": np.random.choice([\"Red\", \"Green\", \"Blue\"], size=100),\n",
+    "    \"boolean_col\": bool_values,\n",
+    "    \"datetime_col\": dates,\n",
+    "    \"timedelta_col\": timedeltas,\n",
+    "    \"float_col\": np.random.uniform(0.0, 1.0, 100)\n",
+    "})\n",
+    "\n",
+    "df = df_custom.copy()\n",
+    "print(\"Original Data:\")\n",
+    "display(df.head())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#get the metadata from df \n",
+    "metadata = MissingDataHandler.get_column_dtypes(df)\n",
+    "print(metadata)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Instantiate DataProcessor and preprocess the data\n",
+    "processor = DataProcessor(metadata)\n",
+    "processed_data = processor.preprocess(df)\n",
+    "print(\"Processed Data:\")\n",
+    "display(processed_data.head())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Instantiate and fit the CART method\n",
+    "cart = CARTMethod(metadata, smoothing=True, proper=True, minibucket=5, random_state=42)\n",
+    "cart.fit(processed_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "    # For prediction, we might use the same data (or new preprocessed data)\n",
+    "synthetic_processed = cart.sample(100)\n",
+    "print(\"Synthetic Processed Data:\")\n",
+    "display(synthetic_processed.head())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Postprocess the synthetic data back to the original format\n",
+    "synthetic_data = processor.postprocess(synthetic_processed)\n",
+    "print(\"Synthetic Data in Original Format:\")\n",
+    "display(synthetic_data.head())"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "AAdev",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/example_notebooks/05_metrics.ipynb b/example_notebooks/05_metrics.ipynb
new file mode 100644
index 0000000..923234c
--- /dev/null
+++ b/example_notebooks/05_metrics.ipynb
@@ -0,0 +1,170 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from synthpop.metrics import (\n",
+    "    MetricsReport,\n",
+    "    EfficacyMetrics,\n",
+    "    DisclosureProtection\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a small real DataFrame\n",
+    "real_df = pd.DataFrame({\n",
+    "    \"numeric_col\": [1, 2, 3, 4, 5, np.nan],\n",
+    "    \"categorical_col\": [\"a\", \"b\", \"a\", \"c\", \"b\", \"b\"],\n",
+    "    \"datetime_col\": pd.date_range(\"2023-01-01\", periods=6),\n",
+    "    \"boolean_col\": [True, False, True, False, True, False]\n",
+    "})\n",
+    "\n",
+    "# Create a corresponding synthetic DataFrame\n",
+    "#can come from one of the SDG methods \n",
+    "synthetic_df = pd.DataFrame({\n",
+    "    \"numeric_col\": [1.1, 2.1, 2.9, 3.8, 5.2, np.nan],\n",
+    "    \"categorical_col\": [\"a\", \"b\", \"b\", \"c\", \"d\", \"b\"],\n",
+    "    \"datetime_col\": pd.date_range(\"2023-01-01\", periods=6),\n",
+    "    \"boolean_col\": [True, True, True, False, True, False]\n",
+    "})\n",
+    "\n",
+    "# Optional metadata\n",
+    "#can be obtained also by the missing data handler \n",
+    "metadata = {\n",
+    "    \"numeric_col\": \"numerical\",\n",
+    "    \"categorical_col\": \"categorical\",\n",
+    "    \"datetime_col\": \"datetime\",\n",
+    "    \"boolean_col\": \"boolean\"\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Instantiate and generate a diagnostic report\n",
+    "report = MetricsReport(real_df, synthetic_df, metadata)\n",
+    "report_df = report.generate_report()\n",
+    "print(\"=== Diagnostic Report ===\")\n",
+    "display(report_df)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "##  2. DEMO: EfficacyMetrics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# EXAMPLE A: Regression\n",
+    "real_reg = pd.DataFrame({\n",
+    "    \"feat1\": np.random.normal(0, 1, 100),\n",
+    "    \"feat2\": np.random.normal(5, 2, 100),\n",
+    "    \"target\": np.random.normal(10, 3, 100)\n",
+    "})\n",
+    "synthetic_reg = pd.DataFrame({\n",
+    "    \"feat1\": np.random.normal(0, 1, 100),\n",
+    "    \"feat2\": np.random.normal(5, 2, 100),\n",
+    "    \"target\": np.random.normal(10, 3, 100)\n",
+    "})\n",
+    "\n",
+    "reg_efficacy = EfficacyMetrics(task='regression', target_column=\"target\")\n",
+    "reg_metrics = reg_efficacy.evaluate(real_reg, synthetic_reg)\n",
+    "print(\"=== Regression Efficacy Metrics ===\")\n",
+    "print(reg_metrics)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# EXAMPLE B: Classification\n",
+    "real_clf = pd.DataFrame({\n",
+    "    \"feat1\": np.random.normal(0, 1, 100),\n",
+    "    \"feat2\": np.random.normal(5, 2, 100),\n",
+    "    \"target\": np.random.choice([\"A\", \"B\"], size=100)\n",
+    "})\n",
+    "synthetic_clf = pd.DataFrame({\n",
+    "    \"feat1\": np.random.normal(0, 1, 100),\n",
+    "    \"feat2\": np.random.normal(5, 2, 100),\n",
+    "    \"target\": np.random.choice([\"A\", \"B\"], size=100)\n",
+    "})\n",
+    "\n",
+    "clf_efficacy = EfficacyMetrics(task='classification', target_column=\"target\")\n",
+    "clf_metrics = clf_efficacy.evaluate(real_clf, synthetic_clf)\n",
+    "print(\"\\n=== Classification Efficacy Metrics ===\")\n",
+    "print(clf_metrics)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. DEMO: Privacy metrics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Example numeric real vs. synthetic data\n",
+    "real_privacy = pd.DataFrame({\n",
+    "    \"col1\": np.random.normal(0, 1, 100),\n",
+    "    \"col2\": np.random.normal(5, 2, 100)\n",
+    "})\n",
+    "synthetic_privacy = real_privacy + np.random.normal(0, 0.3, real_privacy.shape)\n",
+    "\n",
+    "dp = DisclosureProtection(real_privacy, synthetic_privacy)\n",
+    "dp_score = dp.score()\n",
+    "dp_report = dp.report()\n",
+    "\n",
+    "print(\"\\n=== Disclosure Protection ===\")\n",
+    "print(f\"Score: {dp_score:.3f}\")\n",
+    "print(\"Detailed Report:\", dp_report)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "AAdev",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/pyproject.toml b/pyproject.toml
index 8ab67d4..446d46d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,6 +14,8 @@ dependencies = [
     "numpy>=1.20.0",
     "pandas>=1.3.0",
     "scikit-learn>=1.0.0",
+    "copulas>=0.1.0",
+
 ]
 readme = "README.md"
 license = {file = "LICENSE"}
diff --git a/requirements.txt b/requirements.txt
index 3c8fd34..8fc59a0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,5 @@
 numpy>=1.20.0
 pandas>=1.3.0
 scikit-learn>=1.0.0
-pytest>=7.0.0
\ No newline at end of file
+pytest>=7.0.0
+copulas>=0.1.0
diff --git a/setup.py b/setup.py
index 76a7d88..9b3ab5d 100644
--- a/setup.py
+++ b/setup.py
@@ -28,6 +28,7 @@
         "numpy>=1.20.0",
         "pandas>=1.3.0",
         "scikit-learn>=1.0.0",
+        "copulas>=0.1.0",
     ],
     extras_require={
         "dev": [
diff --git a/synthpop/__init__.py b/synthpop/__init__.py
index 8e76743..69edef5 100644
--- a/synthpop/__init__.py
+++ b/synthpop/__init__.py
@@ -1,5 +1,25 @@
-NUM_COLS_DTYPES = ['int', 'float', 'datetime']
-CAT_COLS_DTYPES = ['category', 'bool']
+from .method import CARTMethod, GaussianCopulaMethod, proper, smooth
+from .processor import DataProcessor, MissingDataHandler
+from .validator import Validator
+from .constants import NUM_COLS_DTYPES, CAT_COLS_DTYPES
+from .metrics import MetricsReport, EfficacyMetrics, DisclosureProtection
+# from .metrics import , compute_TSComplement  # if needed
+
+
+__all__ = [
+    "CARTMethod",
+    "GaussianCopulaMethod",
+    "proper",
+    "smooth",
+    "DataProcessor",
+    "MissingDataHandler",
+    "Validator",
+    "MetricsReport", 
+    "EfficacyMetrics",
+    "DisclosureProtection",
+    "NUM_COLS_DTYPES", 
+    "CAT_COLS_DTYPES",
+    # "compute_TSComplement",
+]
+
 
-from synthpop.synthpop import Synthpop
-print('yes')
\ No newline at end of file
diff --git a/synthpop/constants.py b/synthpop/constants.py
new file mode 100644
index 0000000..bfcb761
--- /dev/null
+++ b/synthpop/constants.py
@@ -0,0 +1,2 @@
+NUM_COLS_DTYPES = ['int', 'float', 'datetime']
+CAT_COLS_DTYPES = ['category', 'bool']
\ No newline at end of file
diff --git a/synthpop/method/GC.py b/synthpop/method/GC.py
new file mode 100644
index 0000000..5f902a4
--- /dev/null
+++ b/synthpop/method/GC.py
@@ -0,0 +1,368 @@
+import inspect
+import logging
+import warnings
+from copy import deepcopy
+from typing import Any, Dict, List, Optional, Union
+
+import numpy as np
+import pandas as pd
+import scipy
+import copulas.univariate
+from copulas import multivariate
+from sklearn.preprocessing import OneHotEncoder
+from synthpop.method.helpers import (
+    validate_numerical_distributions,
+    warn_missing_numerical_distributions,
+    flatten_dict,
+    unflatten_dict,
+)
+
+LOGGER = logging.getLogger(__name__)
+
+class BaseSingleTableSynthesizer:
+    """
+    Base class for single table synthesizers.
+    
+    Args:
+        metadata (dict): Dictionary mapping column names to their types.
+        enforce_min_max_values (bool): Whether to clip reversed numerical values to the observed min/max. Defaults to True.
+        enforce_rounding (bool): Whether to round numerical columns during reverse transformation. Defaults to True.
+        locales (Union[List[str], str]): Default locale(s) to use. Defaults to "en_US".
+    """
+    def __init__(
+        self,
+        metadata: Dict[str, str],
+        enforce_min_max_values: bool = True,
+        enforce_rounding: bool = True,
+        locales: Union[List[str], str] = "en_US",
+    ) -> None:
+        self.metadata = metadata
+        self.enforce_min_max_values = enforce_min_max_values
+        self.enforce_rounding = enforce_rounding
+        if isinstance(locales, str):
+            self.locales = [locales]
+        else:
+            self.locales = locales
+
+
+class GaussianCopulaMethod(BaseSingleTableSynthesizer):
+    # Mapping of distribution name (lowercase) to copulas univariate classes.
+    _DISTRIBUTIONS: Dict[str, Any] = {
+        "norm": copulas.univariate.GaussianUnivariate,
+        "beta": copulas.univariate.BetaUnivariate,
+        "truncnorm": copulas.univariate.TruncatedGaussian,
+        "gamma": copulas.univariate.GammaUnivariate,
+        "uniform": copulas.univariate.UniformUnivariate,
+        "gaussian_kde": copulas.univariate.GaussianKDE,
+    }
+    # Maximum iterations for correlation matrix adjustment
+    _MAX_CORR_ITERATIONS: int = 10
+
+    @classmethod
+    def get_distribution_class(cls, distribution: str) -> Any:
+        """
+        Return the corresponding distribution class from copulas.univariate.
+
+        Args:
+            distribution (str): A string representing a copulas univariate distribution.
+
+        Returns:
+            The corresponding copulas univariate class.
+        """
+        if not isinstance(distribution, str):
+            raise ValueError(f"Distribution specification must be a string, got {type(distribution)}")
+        # Allow case-insensitive matching.
+        distribution_key = distribution.lower()
+        if distribution_key not in cls._DISTRIBUTIONS:
+            error_message = (
+                f"Invalid distribution specification '{distribution}'. "
+                f"Valid options: {list(cls._DISTRIBUTIONS.keys())}"
+            )
+            raise ValueError(error_message)
+        return cls._DISTRIBUTIONS[distribution_key]
+
+    def __init__(
+        self,
+        metadata: Dict[str, str],
+        enforce_min_max_values: bool = True,
+        enforce_rounding: bool = True,
+        locales: Union[List[str], str] = "en_US",
+        numerical_distributions: Optional[Dict[str, str]] = None,
+        default_distribution: Optional[str] = None,
+    ) -> None:
+        super().__init__(metadata, enforce_min_max_values, enforce_rounding, locales)
+        # Validate numerical distributions using metadata keys.
+        validate_numerical_distributions(numerical_distributions, list(self.metadata.keys()))
+        self.default_distribution: str = default_distribution or "beta"
+        self._default_distribution = self.get_distribution_class(self.default_distribution)
+        self._set_numerical_distributions(numerical_distributions)
+        self._num_rows: Optional[int] = None
+        self._model: Optional[Any] = None
+        self._fitted: bool = False
+
+    def _set_numerical_distributions(self, numerical_distributions: Optional[Dict[str, str]]) -> None:
+        """
+        Sets the numerical distributions to be used during model initialization.
+        """
+        self.numerical_distributions = numerical_distributions or {}
+        self._numerical_distributions = {
+            field: self.get_distribution_class(distribution)
+            for field, distribution in self.numerical_distributions.items()
+        }
+
+    def _learn_num_rows(self, processed_data: pd.DataFrame) -> int:
+        """
+        Learn the number of rows from the processed data.
+        """
+        return len(processed_data)
+
+    def _get_numerical_distributions(self, processed_data: pd.DataFrame) -> Dict[str, Any]:
+        """
+        Get a complete dictionary of numerical distributions for all columns in the data.
+        """
+        numerical_distributions = deepcopy(self._numerical_distributions)
+        for column in processed_data.columns:
+            if column not in numerical_distributions:
+                numerical_distributions[column] = self._default_distribution
+        return numerical_distributions
+
+    def _initialize_model(self, numerical_distributions: Dict[str, Any]) -> Any:
+        """
+        Initialize the GaussianMultivariate model with the given numerical distributions.
+        """
+        return multivariate.GaussianMultivariate(distribution=numerical_distributions)
+
+    def _fit_model(self, processed_data: pd.DataFrame) -> None:
+        """
+        Fit the GaussianMultivariate model on the processed data.
+        """
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", module="scipy")
+            self._model.fit(processed_data)
+
+    
+    def fit(self, processed_data: pd.DataFrame) -> None:
+        """
+        Public API method to fit the Gaussian Copula model on processed data.
+        
+        Args:
+            processed_data (pd.DataFrame): Data that has been preprocessed.
+        """
+        warn_missing_numerical_distributions(self.numerical_distributions, list(processed_data.columns))
+        self._num_rows = self._learn_num_rows(processed_data)
+        numerical_distributions = self._get_numerical_distributions(processed_data)
+        self._model = self._initialize_model(numerical_distributions)
+        self._fit_model(processed_data)
+        self._fitted = True
+
+    def sample(self, num_rows: int, conditions: Optional[Dict[str, Any]] = None) -> pd.DataFrame:
+        """
+        Public API method to sample synthetic data from the fitted model.
+        
+        Args:
+            num_rows (int): Number of rows to sample.
+            conditions (Optional[Dict[str, Any]]): Optional conditions for sampling.
+            
+        Returns:
+            pd.DataFrame: A DataFrame containing the synthetic samples.
+        """
+        if not self._fitted or self._model is None:
+            raise ValueError("Model is not fitted yet. Please call fit() before sampling.")
+        return self._model.sample(num_rows, conditions=conditions)
+
+    def get_learned_distributions(self) -> Dict[str, Any]:
+        """
+        Get the marginal distributions used by the Gaussian Copula.
+        
+        Returns:
+            Dict[str, Any]: A dictionary mapping column names to the distribution name and learned parameters.
+            
+        Raises:
+            ValueError: If the model has not been fitted.
+        """
+        if not self._fitted or self._model is None:
+            raise ValueError("Distributions have not been learned yet. Please fit your model first using 'fit()'.")
+        if not hasattr(self._model, "to_dict") or not self._model.to_dict():
+            return {}
+        parameters = self._model.to_dict()
+        columns = parameters.get("columns", [])
+        univariates = deepcopy(parameters.get("univariates", []))
+        learned_distributions: Dict[str, Any] = {}
+        valid_columns = self._get_valid_columns_from_metadata(columns)
+        for column, learned_params in zip(columns, univariates):
+            if column in valid_columns:
+                distribution = self.numerical_distributions.get(column, self.default_distribution)
+                learned_params.pop("type", None)
+                learned_distributions[column] = {
+                    "distribution": distribution,
+                    "learned_parameters": learned_params,
+                }
+        return learned_distributions
+
+    def _get_valid_columns_from_metadata(self, columns: List[str]) -> List[str]:
+        """
+        Extract valid columns based on the metadata.
+        
+        Args:
+            columns (List[str]): List of column names.
+            
+        Returns:
+            List[str]: Valid column names found in metadata.
+        """
+        valid_columns: List[str] = []
+        for column in columns:
+            for valid_column in self.metadata.keys():
+                if column.startswith(valid_column):
+                    valid_columns.append(column)
+                    break
+        return valid_columns
+
+    def _get_parameters(self) -> Dict[str, Any]:
+        """
+        Get the parameters of the copula model.
+        
+        Returns:
+            Dict[str, Any]: A flattened dictionary containing copula parameters.
+        """
+        # Ensure univariates are in their base instance form if applicable.
+        for univariate in self._model.univariates:
+            if isinstance(univariate, copulas.univariate.Univariate):
+                univariate = univariate._instance
+        params = self._model.to_dict()
+        correlation = []
+        for index, row in enumerate(params.get("correlation", [])[1:]):
+            correlation.append(row[: index + 1])
+        params["correlation"] = correlation
+        params["univariates"] = dict(zip(params.get("columns", []), params.get("univariates", [])))
+        params["num_rows"] = self._num_rows
+        return flatten_dict(params)
+    
+    @classmethod
+    def _get_nearest_correlation_matrix(cls, matrix: np.ndarray) -> np.ndarray:
+        """
+        Find the nearest Positive Semi-definite (PSD) correlation matrix.
+        Iteratively adjust negative eigenvalues up to a maximum number of iterations.
+        
+        Args:
+            matrix (np.ndarray): Input correlation matrix.
+            
+        Returns:
+            np.ndarray: Adjusted correlation matrix that is PSD and has ones on the diagonal.
+        """
+        eigenvalues, eigenvectors = scipy.linalg.eigh(matrix)
+        iterations = 0
+        identity = np.identity(len(matrix))
+        while np.any(eigenvalues < 0) and iterations < cls._MAX_CORR_ITERATIONS:
+            # Set negative eigenvalues to zero.
+            eigenvalues[eigenvalues < 0] = 0
+            matrix = eigenvectors @ np.diag(eigenvalues) @ eigenvectors.T
+            # Force ones on the diagonal.
+            matrix = matrix - np.diag(np.diag(matrix)) + np.identity(len(matrix))
+            max_value = np.abs(matrix).max()
+            if max_value > 1:
+                matrix /= max_value
+            eigenvalues, eigenvectors = scipy.linalg.eigh(matrix)
+            iterations += 1
+        if iterations >= cls._MAX_CORR_ITERATIONS and np.any(eigenvalues < 0):
+            LOGGER.warning("Correlation matrix did not converge to PSD within maximum iterations.")
+        return matrix
+
+    def _set_parameters(self, parameters: Dict[str, Any], default_params: Optional[Dict[str, Any]] = None) -> None:
+        """
+        Set copula model parameters based on a flattened parameter dictionary.
+        
+        Args:
+            parameters (Dict[str, Any]): Flattened dictionary of model parameters.
+            default_params (Optional[Dict[str, Any]]): Default parameters to fall back on if provided.
+        """
+        if default_params is not None:
+            default_params = unflatten_dict(default_params)
+        else:
+            default_params = {}
+        parameters = unflatten_dict(parameters)
+        if "num_rows" in parameters:
+            num_rows = parameters.pop("num_rows")
+            self._num_rows = 0 if pd.isna(num_rows) else max(0, int(round(num_rows)))
+        if parameters:
+            parameters = self._rebuild_gaussian_copula(parameters, default_params)
+            self._model = multivariate.GaussianMultivariate.from_dict(parameters)
+            self._fitted = True
+
+    def _rebuild_gaussian_copula(self, model_parameters: Dict[str, Any], default_params: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+        """
+        Rebuild the model parameters to recreate a Gaussian Multivariate instance.
+        
+        Args:
+            model_parameters (Dict[str, Any]): Restructured model parameters.
+            default_params (Optional[Dict[str, Any]]): Fallback parameters if sampled parameters are invalid.
+        
+        Returns:
+            Dict[str, Any]: Model parameters ready for GaussianMultivariate instantiation.
+        """
+        if default_params is None:
+            default_params = {}
+        columns: List[str] = []
+        univariates: List[Dict[str, Any]] = []
+        for column, univariate in model_parameters.get("univariates", {}).items():
+            columns.append(column)
+            if column in self._numerical_distributions:
+                univariate_type = self._numerical_distributions[column]
+            else:
+                univariate_type = self.get_distribution_class(self.default_distribution)
+            univariate["type"] = univariate_type
+            model = getattr(univariate_type, "MODEL_CLASS", None)
+            if model and hasattr(model, "_argcheck"):
+                try:
+                    # Extract the parameters required for _argcheck.
+                    arg_names = list(inspect.signature(model._argcheck).parameters.keys())
+                    to_check = {parameter: univariate[parameter] for parameter in arg_names if parameter in univariate}
+                    if not model._argcheck(**to_check):
+                        if "univariates" in default_params and column in default_params["univariates"]:
+                            LOGGER.info(
+                                f"Invalid parameters for column '{column}', falling back to default parameters."
+                            )
+                            univariate = default_params["univariates"][column]
+                            univariate["type"] = univariate_type
+                        else:
+                            LOGGER.debug(f"Column '{column}' has invalid parameters.")
+                except Exception as e:
+                    LOGGER.error(f"Error during parameter check for column '{column}': {e}")
+            else:
+                LOGGER.debug(f"Univariate for column '{column}' does not have an _argcheck method.")
+            if "scale" in univariate:
+                univariate["scale"] = max(0, univariate["scale"])
+            univariates.append(univariate)
+        model_parameters["univariates"] = univariates
+        model_parameters["columns"] = columns
+        correlation = model_parameters.get("correlation")
+        if correlation:
+            model_parameters["correlation"] = self._rebuild_correlation_matrix(correlation)
+        else:
+            model_parameters["correlation"] = [[1.0]]
+        return model_parameters
+
+    @classmethod
+    def _rebuild_correlation_matrix(cls, triangular_correlation: List[List[float]]) -> List[List[float]]:
+        """
+        Rebuild a valid correlation matrix from its lower triangular part.
+        
+        Args:
+            triangular_correlation (List[List[float]]): Lower triangular values (excluding the diagonal).
+            
+        Returns:
+            List[List[float]]: Reconstructed and adjusted full correlation matrix.
+        """
+        size = len(triangular_correlation) + 1
+        left = np.zeros((size, size))
+        right = np.zeros((size, size))
+        for idx, values in enumerate(triangular_correlation):
+            extended_values = values + [0.0] * (size - idx - 1)
+            left[idx + 1, :] = extended_values
+            right[:, idx + 1] = extended_values
+        correlation = left + right
+        max_value = np.abs(correlation).max()
+        if max_value > 1:
+            correlation /= max_value
+        correlation += np.identity(size)
+        adjusted_corr = cls._get_nearest_correlation_matrix(correlation)
+        return adjusted_corr.tolist()
diff --git a/synthpop/method/__init__.py b/synthpop/method/__init__.py
index e3db882..c338d6e 100644
--- a/synthpop/method/__init__.py
+++ b/synthpop/method/__init__.py
@@ -1,83 +1,10 @@
-from synthpop.method.base import Method
-from synthpop.method.helpers import proper, smooth
-from synthpop.method.empty import EmptyMethod
-from synthpop.method.sample import SampleMethod
-from synthpop.method.cart import CARTMethod
-from synthpop.method.norm import NormMethod
-from synthpop.method.normrank import NormRankMethod
-from synthpop.method.polyreg import PolyregMethod
-from synthpop.method.gaussian_copula import GaussianCopulaMethod
-
-EMPTY_METHOD = ''
-SAMPLE_METHOD = 'sample'
-# non-parametric methods
-CART_METHOD = 'cart'
-# parametric methods
-PARAMETRIC_METHOD = 'parametric'
-NORM_METHOD = 'norm'
-NORMRANK_METHOD = 'normrank'
-POLYREG_METHOD = 'polyreg'
-GC_METHOD = 'gaussian copula' 
-
-
-METHODS_MAP = {EMPTY_METHOD: EmptyMethod,
-               SAMPLE_METHOD: SampleMethod,
-               CART_METHOD: CARTMethod,
-               NORM_METHOD: NormMethod,
-               NORMRANK_METHOD: NormRankMethod,
-               POLYREG_METHOD: PolyregMethod,
-               GC_METHOD: GaussianCopulaMethod
-               }
-
-
-ALL_METHODS = (EMPTY_METHOD, SAMPLE_METHOD, CART_METHOD, PARAMETRIC_METHOD, NORM_METHOD, NORMRANK_METHOD, POLYREG_METHOD, GC_METHOD)
-DEFAULT_METHODS = (CART_METHOD, PARAMETRIC_METHOD, GC_METHOD)
-INIT_METHODS = (SAMPLE_METHOD, CART_METHOD, PARAMETRIC_METHOD, GC_METHOD)
-NA_METHODS = (CART_METHOD, NORM_METHOD, NORMRANK_METHOD, POLYREG_METHOD, GC_METHOD)
-
-
-# method maps
-PARAMETRIC_METHOD_MAP = {'int': NORMRANK_METHOD,
-                         'float': NORMRANK_METHOD,
-                         'datetime': NORMRANK_METHOD,
-                         'bool': POLYREG_METHOD,
-                         'category': POLYREG_METHOD
-                         }
-
-CART_METHOD_MAP = {'int': CART_METHOD,
-                   'float': CART_METHOD,
-                   'datetime': CART_METHOD,
-                   'bool': CART_METHOD,
-                   'category': CART_METHOD
-                   }
-
-GC_METHOD_MAP = {'int': GC_METHOD,
-                 'float': GC_METHOD,
-                 'datetime': GC_METHOD,
-                 'bool': GC_METHOD,
-                 'category': GC_METHOD
-                 }
-
-SAMPLE_METHOD_MAP = {'int': SAMPLE_METHOD,
-                     'float': SAMPLE_METHOD,
-                     'datetime': SAMPLE_METHOD,
-                     'bool': SAMPLE_METHOD,
-                     'category': SAMPLE_METHOD
-                     }
-
-DEFAULT_METHODS_MAP = {CART_METHOD: CART_METHOD_MAP,
-                       PARAMETRIC_METHOD: PARAMETRIC_METHOD_MAP,
-                       GC_METHOD: GC_METHOD_MAP
-                       }
-
-
-INIT_METHODS_MAP = DEFAULT_METHODS_MAP.copy()
-INIT_METHODS_MAP[SAMPLE_METHOD] = SAMPLE_METHOD_MAP
-
-
-CONT_TO_CAT_METHODS_MAP = {CART_METHOD: CART_METHOD,
-                           NORM_METHOD: POLYREG_METHOD,
-                           NORMRANK_METHOD: POLYREG_METHOD,
-                           POLYREG_METHOD: POLYREG_METHOD,
-                           GC_METHOD: GC_METHOD
-                           }
+from .cart import CARTMethod
+from .GC import GaussianCopulaMethod  # or from .gaussian_copula import GaussianCopulaMethod
+from .helpers import proper, smooth
+
+__all__ = [
+    "CARTMethod",
+    "GaussianCopulaMethod",
+    "proper",
+    "smooth",
+]
\ No newline at end of file
diff --git a/synthpop/method/cart.py b/synthpop/method/cart.py
index c5f580a..eaf227d 100644
--- a/synthpop/method/cart.py
+++ b/synthpop/method/cart.py
@@ -1,56 +1,151 @@
 import numpy as np
 import pandas as pd
 from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+import logging
+from synthpop.method.helpers import  proper, smooth 
+from synthpop.constants import NUM_COLS_DTYPES, CAT_COLS_DTYPES
 
-from synthpop.method import Method, proper, smooth 
-# global variables
-from synthpop import NUM_COLS_DTYPES, CAT_COLS_DTYPES
+# Set up logging
+LOGGER = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
 
 
-class CARTMethod(Method): 
-    def __init__(self, dtype, smoothing=False, proper=False, minibucket=5, random_state=None, *args, **kwargs):
-        self.dtype = dtype
+class CARTMethod:
+    """
+
+    
+    Attributes:
+        metadata (dict): Mapping of column names to abstract data types
+                         (e.g., "numerical", "categorical", "boolean", "datetime", "timedelta").
+        smoothing (bool): Whether to apply smoothing to numerical predictions.
+        proper (bool): Whether to apply a resampling (proper) step during fitting.
+        minibucket (int): Minimum samples per leaf in the decision tree.
+        random_state (int or None): Random seed.
+        tree_params (dict): Additional parameters to pass to the decision tree constructors.
+    """
+    def __init__(self, metadata, smoothing=False, proper=False, minibucket=5, random_state=None, tree_params=None):
+        self.metadata = metadata
         self.smoothing = smoothing
         self.proper = proper
         self.minibucket = minibucket
         self.random_state = random_state
+        self.tree_params = tree_params or {}
+        self.models = {}         # Dict: column -> fitted decision tree model
+        self.leaf_values = {}    # Dict: column -> dict mapping leaf id -> array of training y values
+        self.y_bounds = {}       # Dict: column -> (y_real_min, y_real_max) for numerical columns
+        self.fitted = False
+        self._train_data = None  # Copy of preprocessed training data
 
-        if self.dtype in CAT_COLS_DTYPES:
-            self.cart = DecisionTreeClassifier(min_samples_leaf=self.minibucket, random_state=self.random_state)
-        if self.dtype in NUM_COLS_DTYPES:
-            self.cart = DecisionTreeRegressor(min_samples_leaf=self.minibucket, random_state=self.random_state)
-
-    def fit(self, X_df, y_df):
-        if self.proper:
-            X_df, y_df = proper(X_df=X_df, y_df=y_df, random_state=self.random_state)
-
-        X_df, y_df = self.prepare_dfs(X_df=X_df, y_df=y_df, normalise_num_cols=False, one_hot_cat_cols=True)
-        if self.dtype in NUM_COLS_DTYPES:
-            self.y_real_min, self.y_real_max = np.min(y_df), np.max(y_df)
-
-        X = X_df.to_numpy()
-        y = y_df.to_numpy()
-        self.cart.fit(X, y)
-
-        # save the y distribution wrt trained tree nodes
-        leaves = self.cart.apply(X)
-        leaves_y_df = pd.DataFrame({'leaves': leaves, 'y': y})
-        self.leaves_y_dict = leaves_y_df.groupby('leaves').apply(lambda x: x.to_numpy()[:, -1]).to_dict()
-
-    def predict(self, X_test_df):
-        X_test_df, _ = self.prepare_dfs(X_df=X_test_df, normalise_num_cols=False, one_hot_cat_cols=True, fit=False)
-
-        # predict the leaves and for each leaf randomly sample from the observed values
-        X_test = X_test_df.to_numpy()
-        leaves_pred = self.cart.apply(X_test)
-        y_pred = np.zeros(len(leaves_pred), dtype=object)
+    def fit(self, data: pd.DataFrame) -> None:
+        """
+        Fit a CART model for each column using the remaining columns as predictors.
+        For numerical (and related) columns, stores the min and max of y for smoothing.
+        Uses the 'proper' function to optionally resample the data.
+        
+        Args:
+            data (pd.DataFrame): Preprocessed data.
+        """
+        self._train_data = data.copy()
+        for col in data.columns:
+            # Prepare predictors (X) and target (y)
+            X = data.drop(columns=[col])
+            y = data[col]
+            if self.proper:
+                X, y = proper(X_df=X, y_df=y, random_state=self.random_state)
+            dtype = self.metadata.get(col, "numerical")
+            # Choose the appropriate decision tree
+            if dtype in ["numerical", "datetime", "timedelta"]:
+                model = DecisionTreeRegressor(min_samples_leaf=self.minibucket, random_state=self.random_state, **self.tree_params)
+                # Store bounds for smoothing
+                self.y_bounds[col] = (np.min(y.to_numpy()), np.max(y.to_numpy()))
+            elif dtype in ["categorical", "boolean"]:
+                model = DecisionTreeClassifier(min_samples_leaf=self.minibucket, random_state=self.random_state, **self.tree_params)
+            else:
+                warnings.warn(f"Unknown data type for column '{col}', defaulting to regressor.")
+                model = DecisionTreeRegressor(min_samples_leaf=self.minibucket, random_state=self.random_state, **self.tree_params)
+            try:
+                X_np = X.to_numpy()
+                y_np = y.to_numpy()
+                model.fit(X_np, y_np)
+                self.models[col] = model
+                # Compute leaf indices for training data and group target values by leaf.
+                leaves = model.apply(X_np)
+                df_leaves = pd.DataFrame({'leaf': leaves, 'y': y_np})
+                leaf_dict = df_leaves.groupby('leaf')['y'].apply(lambda arr: arr.values).to_dict()
+                self.leaf_values[col] = leaf_dict
+            except Exception as e:
+                LOGGER.error(f"Error fitting model for column '{col}': {e}")
+        self.fitted = True
 
-        leaves_pred_index_df = pd.DataFrame({'leaves_pred': leaves_pred, 'index': range(len(leaves_pred))})
-        leaves_pred_index_dict = leaves_pred_index_df.groupby('leaves_pred').apply(lambda x: x.to_numpy()[:, -1]).to_dict()
-        for leaf, indices in leaves_pred_index_dict.items():
-            y_pred[indices] = np.random.choice(self.leaves_y_dict[leaf], size=len(indices), replace=True)
+    def predict(self, X_test: pd.DataFrame) -> pd.DataFrame:
+        """
+        Generate synthetic predictions using leaf-based sampling.
+        For each column, the method predicts the leaf for each test row and then samples
+        randomly from the training values associated with that leaf.
+        Optionally applies smoothing to numerical columns.
+        
+        Args:
+            X_test (pd.DataFrame): Preprocessed predictors (should contain same columns as training data).
+        
+        Returns:
+            pd.DataFrame: A DataFrame with synthetic predictions for each column.
+        """
+        if not self.fitted:
+            raise ValueError("The model must be fitted before prediction.")
+        
+        predictions = {}
+        for col, model in self.models.items():
+            dtype = self.metadata.get(col, "numerical")
+            # Prepare predictors for this column (drop the target if present)
+            X = X_test.drop(columns=[col], errors='ignore')
+            X_np = X.to_numpy()
+            # Get leaf indices for test data
+            leaves_pred = model.apply(X_np)
+            y_pred = np.empty(len(leaves_pred), dtype=object)
+            # Group indices by leaf
+            leaf_indices = pd.DataFrame({'leaf': leaves_pred, 'index': range(len(leaves_pred))}) \
+                             .groupby('leaf')['index'].apply(list).to_dict()
+            for leaf, indices in leaf_indices.items():
+                if leaf in self.leaf_values[col]:
+                    samples = np.random.choice(self.leaf_values[col][leaf], size=len(indices), replace=True)
+                else:
+                    # Fallback: if unseen leaf, use direct prediction.
+                    samples = model.predict(X_np[indices])
+                for i, idx in enumerate(indices):
+                    y_pred[idx] = samples[i]
+            y_pred = np.array(y_pred)
+            # Apply smoothing if enabled and if numeric/datetime/timedelta
+            if self.smoothing and dtype in ["numerical", "datetime", "timedelta"]:
+                y_real_min, y_real_max = self.y_bounds[col]
+                y_pred = smooth(dtype, y_pred, y_real_min, y_real_max)
+            predictions[col] = y_pred
+        return pd.DataFrame(predictions)
+    
+    def sample(self, num_rows: int) -> pd.DataFrame:
+        """
+        Generate synthetic data with a specified number of rows.
+        
+        The predictor sampling uses the maximum of the requested number of rows
+        and the size of the original training data (to ensure the trees see as much data
+        as possible). However, the returned DataFrame has the user-specified number of rows.
+        
+        Args:
+            num_rows (int): The number of synthetic samples to generate.
+        
+        Returns:
+            pd.DataFrame: A DataFrame containing synthetic data with num_rows rows.
+        """
+        if not self.fitted:
+            raise ValueError("The model must be fitted before generating synthetic data.")
+        
+        # Use the maximum between num_rows and the original data size for predictor sampling
+        sample_size = max(num_rows, len(self._train_data))
+        synthetic_input = self._train_data.sample(n=sample_size, replace=True, random_state=self.random_state)
+        
+        # Generate synthetic data using the predict method
+        synthetic_full = self.predict(synthetic_input)
+        
+        # Return only the first num_rows synthetic observations
+        return synthetic_full.iloc[:num_rows].reset_index(drop=True)
 
-        if self.smoothing and self.dtype in NUM_COLS_DTYPES:
-            y_pred = smooth(self.dtype, y_pred, self.y_real_min, self.y_real_max)
 
-        return y_pred
diff --git a/synthpop/method/gaussian_copula.py b/synthpop/method/gaussian_copula.py
deleted file mode 100644
index f1e60cd..0000000
--- a/synthpop/method/gaussian_copula.py
+++ /dev/null
@@ -1,106 +0,0 @@
-import numpy as np
-import pandas as pd
-from scipy.stats import norm, ks_2samp
-# from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
-
-from synthpop.method import Method, proper, smooth
-# global variables
-# from synthpop import NUM_COLS_DTYPES, CAT_COLS_DTYPES
-
-
-class GaussianCopulaMethod(Method): 
-    def __init__(self, dtype, smoothing=False, proper=False, minibucket=5, random_state=None, *args, **kwargs):
-        self.dtype = dtype
-        self.proper = proper
-        self.random_state = random_state
-        self.smoothing = smoothing
-        self.minibucket = minibucket
-
-        # learnt parameters
-        self.means = None
-        self.cov_matrix = None
-        self.scaler = None
-        self.data_marginals = None 
-
-    def fit(self, data):
-        """
-        Fit the Gaussian Copula model to the given data.
-        """
-        # Step 1: Store data marginals (quantiles for each feature)
-        self.data_marginals = []
-        for col in data.columns:
-            sorted_data = np.sort(data[col])
-            quantiles = np.linspace(0, 1, len(sorted_data))
-            self.data_marginals.append((sorted_data, quantiles, col))
-
-        # Step 2: Convert data to normal distribution using CDF (Gaussianization)
-        uniform_data = data.rank(pct=True)  # Get percentile rank for each column (empirical CDF)
-        gaussian_data = norm.ppf(uniform_data)  # Convert uniform to standard normal
-
-        # Step 3: Fit a multivariate Gaussian to the normalized data
-        self.means = gaussian_data.mean(axis=0)
-        self.cov_matrix = np.cov(gaussian_data, rowvar=False)
-
-    def predict(self, n_samples):
-        """
-        Generate synthetic data using the fitted Gaussian Copula model.
-        """
-        # Step 1: Sample from the multivariate normal distribution
-        synthetic_gaussian = np.random.multivariate_normal(self.means, self.cov_matrix, n_samples)
-
-        # Step 2: Convert back to uniform distribution using CDF (normal -> uniform)
-        synthetic_uniform = norm.cdf(synthetic_gaussian)
-
-        # Step 3: Map uniform data back to the original marginals
-        synthetic_data = pd.DataFrame()
-        for i, (sorted_data, quantiles, col) in enumerate(self.data_marginals):
-            synthetic_data[col] = np.interp(synthetic_uniform[:, i], quantiles, sorted_data)
-
-        return synthetic_data
-    
-def evaluate_distribution(real_data, synthetic_data):
-    """
-    Compare the distribution of each column in the real and synthetic data using
-    the Kolmogorov-Smirnov (KS) test.
-    """
-    results = {}
-    for column in real_data.columns:
-        real_col = real_data[column].dropna()
-        synthetic_col = synthetic_data[column].dropna()
-
-        # Perform the KS test
-        ks_stat, p_value = ks_2samp(real_col, synthetic_col)
-
-        # Store the result
-        results[column] = {'ks_stat': ks_stat, 'p_value': p_value}
-    return results
-
-def evaluate_correlations(real_data, synthetic_data):
-    """
-    Compare the pairwise correlation matrices of the real and synthetic data.
-    """
-    real_corr = real_data.corr()
-    synthetic_corr = synthetic_data.corr()
-
-    # Compute the difference between the correlation matrices
-    corr_diff = np.abs(real_corr - synthetic_corr)
-    return corr_diff.mean().mean()  # Average correlation difference
-
-def run_diagnostic(real_data, synthetic_data, target_column):
-    """
-    Run diagnostics on synthetic data by evaluating distribution, correlations, and
-    classification model performance.
-    """
-    # Step 1: Evaluate distributions
-    distribution_results = evaluate_distribution(real_data, synthetic_data)
-
-    # Step 2: Evaluate correlations
-    correlation_diff = evaluate_correlations(real_data, synthetic_data)
-
-    # Aggregate results
-    diagnostics = {
-        'distribution_results': distribution_results,
-        'correlation_diff': correlation_diff
-    }
-
-    return diagnostics
\ No newline at end of file
diff --git a/synthpop/method/helpers.py b/synthpop/method/helpers.py
index e4c1c93..7f227e1 100644
--- a/synthpop/method/helpers.py
+++ b/synthpop/method/helpers.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pandas as pd
 from scipy.stats import mode, iqr
 
 
@@ -15,31 +16,26 @@ def proper(X_df=None, y_df=None, random_state=None):
 
 
 def smooth(dtype, y_synth, y_real_min, y_real_max):
+    # Ensure y_synth is numeric (float) before proceeding.
+    y_synth = np.asarray(y_synth, dtype=float)
+
     indices = [True for _ in range(len(y_synth))]
 
-    # exclude from smoothing if freq for a single value higher than 70%
+    # Exclude from smoothing if frequency for a single value is higher than 70%
     y_synth_mode = mode(y_synth)
     if y_synth_mode.count / len(y_synth) > 0.7:
         indices = np.logical_and(indices, y_synth != y_synth_mode.mode)
 
-    # exclude from smoothing if data are top-coded - approximate check
+    # Exclude from smoothing if data are top-coded - approximate check
     y_synth_sorted = np.sort(y_synth)
-    top_coded = 10 * np.abs(y_synth_sorted[-2]) < np.abs(y_synth_sorted[-1]) - np.abs(y_synth_sorted[-2])
+    top_coded = 10 * np.abs(y_synth_sorted[-2]) < np.abs(y_synth_sorted[-1] - y_synth_sorted[-2])
     if top_coded:
         indices = np.logical_and(indices, y_synth != y_real_max)
 
-    # R version
-    # http://www.bagualu.net/wordpress/wp-content/uploads/2015/10/Modern_Applied_Statistics_With_S.pdf
-    # R default (ned0) - [link eq5.5 in p127] - this is used as the other one is not a closed formula
-    # R recommended (SJ) - [link eq5.6 in p129]
-    bw = 0.9 * len(y_synth[indices]) ** -1/5 * np.minimum(np.std(y_synth[indices]), iqr(y_synth[indices]) / 1.34)
-
-    # # Python version - much slower as it's not a closed formula and requires a girdsearch
-    # bandwidths = 10 ** np.linspace(-1, 1, 10)
-    # grid = GridSearchCV(KernelDensity(kernel='gaussian'), {'bandwidth': bandwidths}, cv=3, iid=False)
-    # grid.fit(y_synth[indices, None])
-    # bw = grid.best_estimator_.bandwidth
+    # Compute bandwidth using the provided formula
+    bw = 0.9 * len(y_synth[indices]) ** (-1/5) * np.minimum(np.std(y_synth[indices]), iqr(y_synth[indices]) / 1.34)
 
+    # Apply smoothing: for values flagged by indices, sample from a normal distribution
     y_synth[indices] = np.array([np.random.normal(loc=value, scale=bw) for value in y_synth[indices]])
     if not top_coded:
         y_real_max += bw
@@ -48,3 +44,173 @@ def smooth(dtype, y_synth, y_real_min, y_real_max):
         y_synth[indices] = y_synth[indices].astype(int)
 
     return y_synth
+
+
+
+def validate_numerical_distributions(numerical_distributions, metadata_columns):
+    """Validate ``numerical_distributions``.
+
+    Raise an error if it's not None or dict, or if its columns are not present in the metadata.
+
+    Args:
+        numerical_distributions (dict):
+            Dictionary that maps field names from the table that is being modeled with
+            the distribution that needs to be used.
+        metadata_columns (list):
+            Columns present in the metadata.
+    """
+    if numerical_distributions:
+        if not isinstance(numerical_distributions, dict):
+            raise TypeError('numerical_distributions can only be None or a dict instance.')
+
+        invalid_columns = numerical_distributions.keys() - set(metadata_columns)
+        if invalid_columns:
+            raise SynthesizerInputError(
+                'Invalid column names found in the numerical_distributions dictionary '
+                f'{invalid_columns}. The column names you provide must be present '
+                'in the metadata.'
+            )
+        
+def warn_missing_numerical_distributions(numerical_distributions, processed_data_columns):
+    """Raise an `UserWarning` when numerical distribution columns don't exist anymore."""
+    unseen_columns = numerical_distributions.keys() - set(processed_data_columns)
+    for column in unseen_columns:
+        warnings.warn(
+            f"Cannot use distribution '{numerical_distributions[column]}' for column "
+            f"'{column}' because the column is not statistically modeled.",
+            UserWarning,
+        )
+
+def flatten_array(nested, prefix=''):
+    """Flatten an array as a dict.
+
+    Args:
+        nested (list, numpy.array):
+            Iterable to flatten.
+        prefix (str):
+            Name to append to the array indices. Defaults to ``''``.
+
+    Returns:
+        dict:
+            Flattened array.
+    """
+    result = {}
+    for index in range(len(nested)):
+        prefix_key = '__'.join([prefix, str(index)]) if len(prefix) else str(index)
+
+        value = nested[index]
+        if isinstance(value, (list, np.ndarray)):
+            result.update(flatten_array(value, prefix=prefix_key))
+
+        elif isinstance(value, dict):
+            result.update(flatten_dict(value, prefix=prefix_key))
+
+        else:
+            result[prefix_key] = value
+
+    return result
+
+
+def flatten_dict(nested, prefix=''):
+    """Flatten a dictionary.
+
+    This method returns a flatten version of a dictionary, concatenating key names with
+    double underscores.
+
+    Args:
+        nested (dict):
+            Original dictionary to flatten.
+        prefix (str):
+            Prefix to append to key name. Defaults to ``''``.
+
+    Returns:
+        dict:
+            Flattened dictionary.
+    """
+    result = {}
+
+    for key, value in nested.items():
+        prefix_key = '__'.join([prefix, str(key)]) if len(prefix) else key
+
+        if key in IGNORED_DICT_KEYS and not isinstance(value, (dict, list)):
+            continue
+
+        elif isinstance(value, dict):
+            result.update(flatten_dict(value, prefix_key))
+
+        elif isinstance(value, (np.ndarray, list)):
+            result.update(flatten_array(value, prefix_key))
+
+        else:
+            result[prefix_key] = value
+
+    return result
+
+def unflatten_dict(flat):
+    """Transform a flattened dict into its original form.
+
+    Args:
+        flat (dict):
+            Flattened dict.
+
+    Returns:
+        dict:
+            Nested dict (if corresponds)
+    """
+    unflattened = {}
+
+    for key, value in sorted(flat.items(), key=_key_order):
+        if '__' in key:
+            key, subkey = key.split('__', 1)
+            subkey, name = subkey.rsplit('__', 1)
+
+            if name.isdigit():
+                column_index = int(name)
+                row_index = int(subkey)
+
+                array = unflattened.setdefault(key, [])
+
+                if len(array) == row_index:
+                    row = []
+                    array.append(row)
+                elif len(array) == row_index + 1:
+                    row = array[row_index]
+                else:
+                    # This should never happen
+                    raise ValueError('There was an error unflattening the extension.')
+
+                if len(row) == column_index:
+                    row.append(value)
+                else:
+                    # This should never happen
+                    raise ValueError('There was an error unflattening the extension.')
+
+            else:
+                subdict = unflattened.setdefault(key, {})
+                if subkey.isdigit() and key != 'univariates':
+                    subkey = int(subkey)
+
+                inner = subdict.setdefault(subkey, {})
+                inner[name] = value
+
+        else:
+            unflattened[key] = value
+
+    return unflattened
+
+
+
+def extract_metadata(df: pd.DataFrame) -> dict:
+    """
+    Extract metadata from a pandas DataFrame.
+    
+    Args:
+        df (pd.DataFrame): The input DataFrame.
+        
+    Returns:
+        dict: A dictionary where keys are column names and values are column types.
+    """
+    return {col: str(df[col].dtype) for col in df.columns}
+
+
+
diff --git a/synthpop/metrics/__init__.py b/synthpop/metrics/__init__.py
new file mode 100644
index 0000000..b1599ca
--- /dev/null
+++ b/synthpop/metrics/__init__.py
@@ -0,0 +1,29 @@
+# __init__.py
+
+from .diagnostic_report import MetricsReport
+from .efficacy_metrics import EfficacyMetrics
+from .privacy_metrics import DisclosureProtection
+from .single_columns_metrics import (
+    category_coverage,
+    range_coverage,
+    boundary_adherence,
+    category_adherence,
+    ks_complement,
+    tv_complement,
+    statistic_similarity,
+    missing_value_similarity
+)
+
+__all__ = [
+    "MetricsReport",
+    "EfficacyMetrics",
+    "DisclosureProtection",
+    "category_coverage",
+    "range_coverage",
+    "boundary_adherence",
+    "category_adherence",
+    "ks_complement",
+    "tv_complement",
+    "statistic_similarity",
+    "missing_value_similarity"
+]
diff --git a/synthpop/metrics/diagnostic_report.py b/synthpop/metrics/diagnostic_report.py
new file mode 100644
index 0000000..8c0ce4f
--- /dev/null
+++ b/synthpop/metrics/diagnostic_report.py
@@ -0,0 +1,107 @@
+# metrics_report.py
+
+import pandas as pd
+import numpy as np
+from .single_columns_metrics import (
+    category_coverage,
+    range_coverage,
+    boundary_adherence,
+    category_adherence,
+    ks_complement,
+    tv_complement,
+    statistic_similarity,
+    missing_value_similarity
+)
+
+class MetricsReport:
+    """
+    A class to produce a report comparing real and synthetic datasets with respect
+    to data validity and data structure.
+
+    The report computes the following metrics for each column:
+      - For numerical (or datetime/timedelta) columns:
+          * Range Coverage: Proportion of the real data's range covered by the synthetic data.
+          * Boundary Adherence: Fraction of synthetic values within the real data's min/max.
+          * KS Complement: 1 minus the Kolmogorov-Smirnov statistic.
+          * TV Complement: 1 minus the Total Variation distance computed over histograms.
+          * Statistic Similarity: Similarity of mean, std, and median.
+          * Missing Value Similarity: Similarity in the proportion of missing values.
+      - For categorical (or boolean) columns:
+          * Category Coverage: Proportion of real categories found in synthetic data.
+          * Category Adherence: Fraction of synthetic values that are valid real categories.
+          * Missing Value Similarity.
+
+    Optionally, you may provide a metadata dictionary mapping column names to abstract types.
+    If metadata is not provided, the type is inferred from the pandas dtype.
+    """
+    
+    def __init__(self, real_df: pd.DataFrame, synthetic_df: pd.DataFrame, metadata: dict = None):
+        """
+        Args:
+            real_df (pd.DataFrame): The real dataset.
+            synthetic_df (pd.DataFrame): The synthetic dataset.
+            metadata (dict, optional): Mapping from column names to types (e.g., "numerical",
+                "categorical", "boolean", "datetime", "timedelta"). If not provided, types are inferred.
+        """
+        self.real_df = real_df
+        self.synthetic_df = synthetic_df
+        # If no metadata is provided, infer types based on the dtype string.
+        if metadata is None:
+            metadata = {}
+            for col in real_df.columns:
+                dtype = str(real_df[col].dtype)
+                if "float" in dtype or "int" in dtype:
+                    metadata[col] = "numerical"
+                elif "datetime" in dtype:
+                    metadata[col] = "datetime"
+                elif "timedelta" in dtype:
+                    metadata[col] = "timedelta"
+                elif "bool" in dtype:
+                    metadata[col] = "boolean"
+                else:
+                    metadata[col] = "categorical"
+        self.metadata = metadata
+
+    def generate_report(self) -> pd.DataFrame:
+        """
+        Generate a report comparing the real and synthetic datasets.
+
+        Returns:
+            pd.DataFrame: A DataFrame where each row corresponds to a column in the data and
+            contains computed metrics. Non-applicable metrics are marked as 'N/A'.
+        """
+        report_data = []
+        for col in self.real_df.columns:
+            col_type = self.metadata.get(col, "numerical")
+            real = self.real_df[col]
+            synthetic = self.synthetic_df[col]
+            col_report = {"column": col, "type": col_type}
+            
+            # Missing value similarity applies to all columns.
+            col_report["missing_value_similarity"] = missing_value_similarity(real, synthetic)
+            
+            # For numerical/datetime/timedelta columns, compute numerical metrics and mark categorical metrics as 'N/A'
+            if col_type in ["numerical", "datetime", "timedelta"]:
+                col_report["range_coverage"] = range_coverage(real, synthetic)
+                col_report["boundary_adherence"] = boundary_adherence(real, synthetic)
+                col_report["ks_complement"] = ks_complement(real, synthetic)
+                col_report["tv_complement"] = tv_complement(real, synthetic)
+                col_report["statistic_similarity"] = statistic_similarity(real, synthetic)
+                col_report["category_coverage"] = "N/A"
+                col_report["category_adherence"] = "N/A"
+            
+            # For categorical/boolean columns, compute categorical metrics and mark numerical metrics as 'N/A'
+            elif col_type in ["categorical", "boolean"]:
+                col_report["range_coverage"] = "N/A"
+                col_report["boundary_adherence"] = "N/A"
+                col_report["ks_complement"] = "N/A"
+                col_report["tv_complement"] = "N/A"
+                col_report["statistic_similarity"] = "N/A"
+                col_report["category_coverage"] = category_coverage(real, synthetic)
+                col_report["category_adherence"] = category_adherence(real, synthetic)
+            
+            else:
+                col_report["note"] = "Unknown type; metrics not computed"
+            
+            report_data.append(col_report)
+        return pd.DataFrame(report_data)
diff --git a/synthpop/metrics/efficacy_metrics.py b/synthpop/metrics/efficacy_metrics.py
new file mode 100644
index 0000000..c0edcd1
--- /dev/null
+++ b/synthpop/metrics/efficacy_metrics.py
@@ -0,0 +1,101 @@
+# efficacy_metrics.py
+
+import numpy as np
+import pandas as pd
+from sklearn.linear_model import LinearRegression
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.metrics import (
+    mean_squared_error, 
+    mean_absolute_error, 
+    r2_score, 
+    accuracy_score, 
+    f1_score
+)
+from sklearn.model_selection import train_test_split
+
+class EfficacyMetrics:
+    """
+    A class to compute efficacy metrics comparing real and synthetic datasets
+    for downstream predictive tasks. The idea is to train a predictive model on
+    synthetic data and evaluate its performance on real data. The type of metrics
+    computed depends on the task:
+    
+      - For regression (when the target is numerical):
+            * Mean Squared Error (MSE)
+            * Mean Absolute Error (MAE)
+            * R^2 Score
+            
+      - For classification (when the target is categorical/boolean):
+            * Accuracy Score
+            * Weighted F1 Score
+
+    Parameters
+    ----------
+    task : str, optional (default='regression')
+        The predictive task type. Must be either 'regression' or 'classification'.
+    target_column : str
+        The name of the target column. Must exist in both real and synthetic data.
+    test_size : float, optional (default=0.3)
+        (Optional) Proportion of the real data to be used for testing.
+        (Note: In the default approach we train on all synthetic data and test on full real data.)
+    random_state : int, optional (default=42)
+        Random seed for reproducibility.
+    """
+    
+    def __init__(self, task='regression', target_column=None, test_size=0.3, random_state=42):
+        if task not in ['regression', 'classification']:
+            raise ValueError("Task must be either 'regression' or 'classification'.")
+        if target_column is None:
+            raise ValueError("A target column must be specified.")
+            
+        self.task = task
+        self.target_column = target_column
+        self.test_size = test_size
+        self.random_state = random_state
+
+    def evaluate(self, real_df: pd.DataFrame, synthetic_df: pd.DataFrame) -> dict:
+        """
+        Evaluate the efficacy of synthetic data by training a model on synthetic data
+        and testing its performance on real data.
+
+        Args:
+            real_df (pd.DataFrame): The real dataset.
+            synthetic_df (pd.DataFrame): The synthetic dataset.
+
+        Returns:
+            dict: A dictionary of performance metrics.
+        """
+        # Verify that the target column exists in both datasets.
+        if self.target_column not in real_df.columns or self.target_column not in synthetic_df.columns:
+            raise ValueError("The target column must exist in both real and synthetic datasets.")
+
+        # Separate features and target.
+        X_syn = synthetic_df.drop(columns=[self.target_column])
+        y_syn = synthetic_df[self.target_column]
+        X_real = real_df.drop(columns=[self.target_column])
+        y_real = real_df[self.target_column]
+
+        # For the purposes of efficacy metrics, we train on synthetic data and test on real data.
+        if self.task == 'regression':
+            model = LinearRegression()
+            model.fit(X_syn, y_syn)
+            predictions = model.predict(X_real)
+            mse = mean_squared_error(y_real, predictions)
+            mae = mean_absolute_error(y_real, predictions)
+            r2 = r2_score(y_real, predictions)
+            return {
+                "mse": mse,
+                "mae": mae,
+                "r2": r2
+            }
+        else:  # classification
+            model = DecisionTreeClassifier(random_state=self.random_state)
+            model.fit(X_syn, y_syn)
+            predictions = model.predict(X_real)
+            accuracy = accuracy_score(y_real, predictions)
+            f1 = f1_score(y_real, predictions, average='weighted')
+            return {
+                "accuracy": accuracy,
+                "f1_score": f1
+            }
+
diff --git a/synthpop/metrics/privacy_metrics.py b/synthpop/metrics/privacy_metrics.py
new file mode 100644
index 0000000..211c315
--- /dev/null
+++ b/synthpop/metrics/privacy_metrics.py
@@ -0,0 +1,86 @@
+# privacy_metrics.py
+
+import numpy as np
+import pandas as pd
+from sklearn.neighbors import NearestNeighbors
+
+class DisclosureProtection:
+    """
+    A class to compute the disclosure protection metric for synthetic data.
+
+    The metric is defined as 1 minus the proportion of synthetic records that are too similar
+    (i.e. within a risk threshold) to a record in the real dataset.
+
+    Parameters
+    ----------
+    real_data : pd.DataFrame
+        A DataFrame containing the real data. The data should be numeric or preprocessed.
+    synthetic_data : pd.DataFrame
+        A DataFrame containing the synthetic data (with the same columns as real_data).
+    threshold : float, optional
+        A distance threshold under which a synthetic record is considered a potential disclosure risk.
+        If not provided, it is computed as the 10th percentile of the nearest-neighbor distances among real records.
+    """
+    
+    def __init__(self, real_data: pd.DataFrame, synthetic_data: pd.DataFrame, threshold: float = None):
+        self.real_data = real_data.copy()
+        self.synthetic_data = synthetic_data.copy()
+        self.threshold = threshold
+        self._compute_threshold()
+
+    def _compute_threshold(self):
+        """
+        Compute the threshold if not provided. Uses the 10th percentile of the nearest-neighbor
+        distances among real records (excluding self-distance).
+        """
+        if self.threshold is None:
+            # Fit a nearest neighbor model on the real data.
+            # n_neighbors=2 because the closest neighbor of a record is itself.
+            nn = NearestNeighbors(n_neighbors=2)
+            nn.fit(self.real_data)
+            distances, _ = nn.kneighbors(self.real_data)
+            # distances[:, 1] are the distances to the closest distinct record.
+            self.threshold = np.percentile(distances[:, 1], 10)
+    
+    def score(self) -> float:
+        """
+        Compute the disclosure protection score.
+        
+        For each synthetic record, compute its distance to the nearest real record.
+        The risk rate is the proportion of synthetic records with distance below the threshold.
+        The disclosure protection score is 1 - risk_rate (higher is better).
+
+        Returns
+        -------
+        float
+            Disclosure protection score between 0 and 1.
+        """
+        nn = NearestNeighbors(n_neighbors=1)
+        nn.fit(self.real_data)
+        distances, _ = nn.kneighbors(self.synthetic_data)
+        distances = distances.flatten()
+        risk_count = np.sum(distances < self.threshold)
+        risk_rate = risk_count / len(distances)
+        return 1 - risk_rate
+
+    def report(self) -> dict:
+        """
+        Generate a detailed report of the disclosure protection metric.
+
+        Returns
+        -------
+        dict
+            A dictionary containing the threshold, risk rate, and the final disclosure protection score.
+        """
+        nn = NearestNeighbors(n_neighbors=1)
+        nn.fit(self.real_data)
+        distances, _ = nn.kneighbors(self.synthetic_data)
+        distances = distances.flatten()
+        risk_count = np.sum(distances < self.threshold)
+        risk_rate = risk_count / len(distances)
+        score = 1 - risk_rate
+        return {
+            "threshold": self.threshold,
+            "risk_rate": risk_rate,
+            "disclosure_protection_score": score
+        }
diff --git a/synthpop/metrics/single_columns_metrics.py b/synthpop/metrics/single_columns_metrics.py
new file mode 100644
index 0000000..38fa5ed
--- /dev/null
+++ b/synthpop/metrics/single_columns_metrics.py
@@ -0,0 +1,253 @@
+# metrics.py
+
+import numpy as np
+import pandas as pd
+from scipy.stats import ks_2samp, iqr
+
+# ------------------------------------------------------------------------------
+# Coverage Metrics
+# ------------------------------------------------------------------------------
+
+def category_coverage(real: pd.Series, synthetic: pd.Series) -> float:
+    """
+    Measure the proportion of categories present in the real data that are
+    also present in the synthetic data.
+
+    Args:
+        real (pd.Series): Real (categorical) data.
+        synthetic (pd.Series): Synthetic (categorical) data.
+
+    Returns:
+        float: Ratio (0 to 1) of real categories that are found in the synthetic data.
+    """
+    real_cats = set(real.dropna().unique())
+    synth_cats = set(synthetic.dropna().unique())
+    if not real_cats:
+        return 1.0
+    return len(real_cats.intersection(synth_cats)) / len(real_cats)
+
+
+def range_coverage(real: pd.Series, synthetic: pd.Series) -> float:
+    """
+    Measure the proportion of the real data's numerical range that is covered by the
+    synthetic data. If the data is datetime or timedelta, convert it to seconds.
+    
+    Args:
+        real (pd.Series): Real numerical data.
+        synthetic (pd.Series): Synthetic numerical data.
+    
+    Returns:
+        float: The ratio of the intersection length of the ranges to the real range.
+    """
+    real_min, real_max = real.min(), real.max()
+    synth_min, synth_max = synthetic.min(), synthetic.max()
+    
+    # If the data is datetime, convert to seconds since epoch.
+    if isinstance(real_min, pd.Timestamp):
+        real_min = real_min.value / 1e9  # convert nanoseconds to seconds
+        real_max = real_max.value / 1e9
+        synth_min = synth_min.value / 1e9
+        synth_max = synth_max.value / 1e9
+    # If the data is timedelta, convert to total seconds.
+    elif isinstance(real_min, pd.Timedelta):
+        real_min = real_min.total_seconds()
+        real_max = real_max.total_seconds()
+        synth_min = synth_min.total_seconds()
+        synth_max = synth_max.total_seconds()
+    
+    if real_max == real_min:
+        return 1.0
+    intersection = max(0, min(real_max, synth_max) - max(real_min, synth_min))
+    return intersection / (real_max - real_min)
+
+
+# ------------------------------------------------------------------------------
+# Adherence Metrics
+# ------------------------------------------------------------------------------
+
+def boundary_adherence(real: pd.Series, synthetic: pd.Series) -> float:
+    """
+    Measure the fraction of synthetic numerical values that lie within the boundaries
+    of the real data.
+
+    Args:
+        real (pd.Series): Real numerical data.
+        synthetic (pd.Series): Synthetic numerical data.
+
+    Returns:
+        float: The fraction (0 to 1) of synthetic values within [real_min, real_max].
+    """
+    real_min, real_max = real.min(), real.max()
+    adherence = ((synthetic >= real_min) & (synthetic <= real_max)).mean()
+    return adherence
+
+
+def category_adherence(real: pd.Series, synthetic: pd.Series) -> float:
+    """
+    Measure the fraction of synthetic categorical values that are present in the set
+    of real categories.
+
+    Args:
+        real (pd.Series): Real categorical data.
+        synthetic (pd.Series): Synthetic categorical data.
+
+    Returns:
+        float: The fraction (0 to 1) of synthetic values that are among the real categories.
+    """
+    real_cats = set(real.dropna().unique())
+    if not real_cats:
+        return 1.0
+    adherence = synthetic.dropna().apply(lambda x: x in real_cats).mean()
+    return adherence
+
+# ------------------------------------------------------------------------------
+# Distribution/Shape Comparison Metrics
+# ------------------------------------------------------------------------------
+
+def ks_complement(real: pd.Series, synthetic: pd.Series) -> float:
+    """
+    Compute the complement of the Kolmogorov-Smirnov statistic comparing the
+    real and synthetic data distributions.
+
+    Args:
+        real (pd.Series): Real numerical data.
+        synthetic (pd.Series): Synthetic numerical data.
+
+    Returns:
+        float: 1 - KS statistic (ranges between 0 and 1, where 1 means identical distributions).
+    """
+    real_clean = real.dropna()
+    synthetic_clean = synthetic.dropna()
+    if len(real_clean) == 0 or len(synthetic_clean) == 0:
+        return 0.0
+    ks_stat, _ = ks_2samp(real_clean, synthetic_clean)
+    return 1 - ks_stat
+
+
+def tv_complement(real: pd.Series, synthetic: pd.Series, bins: int = 10) -> float:
+    """
+    Compute the complement of the Total Variation (TV) distance between the histograms
+    of the real and synthetic data. A value of 1 indicates identical distributions.
+    
+    If the data is datetime or timedelta, convert it to numeric values (in seconds).
+    
+    Args:
+        real (pd.Series): Real numerical data.
+        synthetic (pd.Series): Synthetic numerical data.
+        bins (int, optional): Number of bins to use for the histograms. Defaults to 10.
+    
+    Returns:
+        float: 1 - TV distance, where TV is computed over the normalized histograms.
+    """
+    real_clean = real.dropna()
+    synthetic_clean = synthetic.dropna()
+    
+    if len(real_clean) == 0 or len(synthetic_clean) == 0:
+        return 0.0
+
+    # Convert datetime/timedelta to numeric values if necessary.
+    if np.issubdtype(real_clean.dtype, np.datetime64):
+        # Convert to seconds since epoch
+        real_clean = real_clean.astype('int64') / 1e9
+        synthetic_clean = synthetic_clean.astype('int64') / 1e9
+    elif np.issubdtype(real_clean.dtype, np.timedelta64):
+        # Convert to total seconds
+        if hasattr(real_clean, 'dt'):
+            real_clean = real_clean.dt.total_seconds()
+            synthetic_clean = synthetic_clean.dt.total_seconds()
+        else:
+            real_clean = real_clean.astype('int64') / 1e9
+            synthetic_clean = synthetic_clean.astype('int64') / 1e9
+
+    all_data = pd.concat([real_clean, synthetic_clean])
+    bin_edges = np.histogram_bin_edges(all_data, bins=bins)
+    real_hist, _ = np.histogram(real_clean, bins=bin_edges, density=True)
+    synth_hist, _ = np.histogram(synthetic_clean, bins=bin_edges, density=True)
+    
+    # Normalize the histograms
+    real_hist = real_hist / np.sum(real_hist)
+    synth_hist = synth_hist / np.sum(synth_hist)
+    
+    tv_distance = 0.5 * np.sum(np.abs(real_hist - synth_hist))
+    return 1 - tv_distance
+
+
+# ------------------------------------------------------------------------------
+# Statistical Similarity Metrics
+# ------------------------------------------------------------------------------
+
+def statistic_similarity(real: pd.Series, synthetic: pd.Series) -> float:
+    """
+    Compare basic statistics (mean, standard deviation, and median) of the real and
+    synthetic data and return an average similarity score between 0 and 1 (1 means perfect similarity).
+    
+    If the data is datetime or timedelta, it is converted to a numeric representation (seconds).
+    
+    Args:
+        real (pd.Series): Real data.
+        synthetic (pd.Series): Synthetic data.
+    
+    Returns:
+        float: Similarity score between 0 and 1.
+    """
+    real_clean = real.dropna()
+    synthetic_clean = synthetic.dropna()
+    if len(real_clean) == 0 or len(synthetic_clean) == 0:
+        return 0.0
+
+    eps = 1e-8  # small constant to avoid division by zero
+    
+    # Convert datetime/timedelta to numeric values (in seconds)
+    if np.issubdtype(real_clean.dtype, np.datetime64):
+        real_vals = real_clean.astype('int64') / 1e9
+        synth_vals = synthetic_clean.astype('int64') / 1e9
+    elif np.issubdtype(real_clean.dtype, np.timedelta64):
+        # Use the .dt accessor if available
+        if hasattr(real_clean, 'dt'):
+            real_vals = real_clean.dt.total_seconds()
+            synth_vals = synthetic_clean.dt.total_seconds()
+        else:
+            real_vals = real_clean.astype('int64') / 1e9
+            synth_vals = synthetic_clean.astype('int64') / 1e9
+    else:
+        real_vals = real_clean
+        synth_vals = synthetic_clean
+
+    stats = ['mean', 'std', 'median']
+    real_stats = {
+        'mean': real_vals.mean(),
+        'std': real_vals.std(),
+        'median': real_vals.median()
+    }
+    synth_stats = {
+        'mean': synth_vals.mean(),
+        'std': synth_vals.std(),
+        'median': synth_vals.median()
+    }
+    
+    similarities = []
+    for stat in stats:
+        diff = abs(real_stats[stat] - synth_stats[stat])
+        denom = abs(real_stats[stat]) + eps
+        sim = 1 - (diff / denom)
+        sim = max(0, min(1, sim))
+        similarities.append(sim)
+    return np.mean(similarities)
+
+
+
+def missing_value_similarity(real: pd.Series, synthetic: pd.Series) -> float:
+    """
+    Compare the proportion of missing values (NaNs) in the real and synthetic data.
+
+    Args:
+        real (pd.Series): Real data.
+        synthetic (pd.Series): Synthetic data.
+
+    Returns:
+        float: 1 minus the absolute difference in missing value proportions (ranges from 0 to 1).
+    """
+    real_missing = real.isna().mean()
+    synth_missing = synthetic.isna().mean()
+    return 1 - abs(real_missing - synth_missing)
+
diff --git a/synthpop/processor/__init__.py b/synthpop/processor/__init__.py
index 3ef0883..b486b97 100644
--- a/synthpop/processor/__init__.py
+++ b/synthpop/processor/__init__.py
@@ -1,2 +1,4 @@
-from synthpop.processor.processor import Processor
-from synthpop.processor.processor import NAN_KEY, NUMTOCAT_KEY
+from synthpop.processor.data_processor import DataProcessor
+from synthpop.processor.missing_data_handler import MissingDataHandler
+
+__all__ = ['DataProcessor', 'MissingDataHandler']
diff --git a/synthpop/processor/data_processor.py b/synthpop/processor/data_processor.py
new file mode 100644
index 0000000..34e5a39
--- /dev/null
+++ b/synthpop/processor/data_processor.py
@@ -0,0 +1,141 @@
+import pandas as pd
+import numpy as np
+import warnings
+import logging
+from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler, MinMaxScaler
+
+# Set up logging
+LOGGER = logging.getLogger(__name__)
+
+class InvalidDataError(Exception):
+    """Custom exception for invalid data errors."""
+    pass
+
+class DataProcessor:
+    """Preprocess and post-process data before and after synthetic data generation.
+
+    Handles:
+    - Type conversions (categorical ↔ numerical).
+    - Feature transformations for Gaussian Copula.
+    - Reverse transformations to restore original data types.
+    """
+
+    def __init__(self, metadata, enforce_rounding=True, enforce_min_max_values=True, model_kwargs=None, table_name=None, locales=['en_US']):
+        self.metadata = metadata
+        self.enforce_rounding = enforce_rounding
+        self.enforce_min_max_values = enforce_min_max_values
+        self.model_kwargs = model_kwargs or {}
+        self.table_name = table_name
+        self.locales = locales
+        self._fitted = False
+        self._prepared_for_fitting = False
+        self.encoders = {}  # Stores encoders for categorical columns
+        self.scalers = {}  # Stores scalers for numerical columns
+        self.original_columns = None  # To restore column order
+
+    def preprocess(self, data: pd.DataFrame) -> pd.DataFrame:
+        """Transform the raw data into numerical space."""
+        if self._fitted:
+            warnings.warn(
+                "This model has already been fitted. To use new preprocessed data, "
+                "please refit the model using 'fit'."
+            )
+
+        self.validate(data)
+        self.original_columns = data.columns  # Store original column order
+        processed_data = self._preprocess(data)
+
+        return processed_data
+
+    def _preprocess(self, data: pd.DataFrame) -> pd.DataFrame:
+        """Handles encoding, scaling."""
+        data = data.copy()
+
+        for col, dtype in self.metadata.items():
+            if dtype == "categorical":
+                # Use Label Encoding for small categories, OneHot for larger
+                encoder = LabelEncoder() if len(data[col].unique()) < 10 else OneHotEncoder(sparse=False, drop="first")
+                transformed_data = self._encode_categorical(data[col], encoder)
+                self.encoders[col] = encoder
+                data.drop(columns=[col], inplace=True)
+                data = pd.concat([data, transformed_data], axis=1)
+
+            elif dtype == "numerical":
+                scaler = StandardScaler()
+                data[col] = scaler.fit_transform(data[[col]])
+                self.scalers[col] = scaler
+
+            elif dtype == "boolean":
+                data[col] = data[col].astype(int)  # Convert True/False to 1/0
+
+            elif dtype == "datetime":
+                data[col] = data[col].apply(lambda x: x.timestamp() if pd.notnull(x) else np.nan)  # Convert to Unix timestamp
+            
+            elif dtype == "timedelta": 
+                data[col] = pd.to_timedelta(data[col]).dt.total_seconds()
+
+        return data
+
+    def postprocess(self, synthetic_data: pd.DataFrame) -> pd.DataFrame:
+        """Transform numerical synthetic data back to its original format."""
+        synthetic_data = synthetic_data.copy()
+
+        for col, dtype in self.metadata.items():
+            if dtype == "categorical" and col in self.encoders:
+                encoder = self.encoders[col]
+                synthetic_data[col] = self._decode_categorical(synthetic_data[col], encoder)
+
+            elif dtype == "numerical" and col in self.scalers:
+                scaler = self.scalers[col]
+                synthetic_data[col] = scaler.inverse_transform(synthetic_data[[col]])
+
+            elif dtype == "boolean":
+                synthetic_data[col] = synthetic_data[col].round().astype(bool)
+
+            elif dtype == "datetime":
+                synthetic_data[col] = pd.to_datetime(synthetic_data[col], unit='s')
+
+            elif dtype == "timedelta":
+                synthetic_data[col] = pd.to_timedelta(synthetic_data[col], unit='s')
+
+        return synthetic_data[self.original_columns]  # Restore original column order
+
+    def validate(self, data: pd.DataFrame):
+        """Validate input data."""
+        if not isinstance(data, pd.DataFrame):
+            raise ValueError("Input data must be a pandas DataFrame.")
+
+        missing_columns = set(self.metadata.keys()) - set(data.columns)
+        if missing_columns:
+            raise InvalidDataError(f"Missing columns: {missing_columns}")
+
+        primary_keys = [col for col, dtype in self.metadata.items() if dtype == "primary_key"]
+        for key in primary_keys:
+            if data[key].duplicated().any():
+                raise InvalidDataError(f"Primary key '{key}' is not unique.")
+
+    def _encode_categorical(self, series: pd.Series, encoder):
+        """Encode categorical columns."""
+        if isinstance(encoder, LabelEncoder):
+            return pd.DataFrame(encoder.fit_transform(series), columns=[series.name])
+        elif isinstance(encoder, OneHotEncoder):
+            encoded_array = encoder.fit_transform(series.values.reshape(-1, 1))
+            encoded_df = pd.DataFrame(encoded_array, columns=encoder.get_feature_names_out([series.name]))
+            return encoded_df
+
+    def _decode_categorical(self, series: pd.Series, encoder):
+        """Decode categorical columns."""
+        if isinstance(encoder, LabelEncoder):
+            return encoder.inverse_transform(series.astype(int))
+        elif isinstance(encoder, OneHotEncoder):
+            category_index = np.argmax(series.values, axis=1)
+            return encoder.categories_[0][category_index]
+
+    def _handle_missing_values(self, series: pd.Series):
+        """Handle missing values based on column type."""
+        if series.dtype in ["float64", "int64"]:
+            return series.fillna(series.median())
+        elif series.dtype == "object":
+            return series.fillna(series.mode()[0])
+        else:
+            return series.fillna(0)
diff --git a/synthpop/processor/missing_data_handler.py b/synthpop/processor/missing_data_handler.py
new file mode 100644
index 0000000..1f03885
--- /dev/null
+++ b/synthpop/processor/missing_data_handler.py
@@ -0,0 +1,277 @@
+import numpy as np
+import pandas as pd
+import scipy.stats as stats
+from sklearn.experimental import enable_iterative_imputer  # For MICE and EM
+from sklearn.impute import SimpleImputer, IterativeImputer
+from sklearn.linear_model import LogisticRegression
+from sklearn.preprocessing import LabelEncoder
+import warnings
+
+
+class MissingDataHandler:
+    """Detects missingness type (MCAR, MAR, MNAR) and applies automatic imputation."""
+
+    def __init__(self):
+        self.imputers = {}
+
+    @staticmethod
+    def get_column_dtypes(data) -> dict:
+        """
+        Returns a dictionary mapping column names to abstract data types
+        that are compatible with the processor.
+        
+        The mapping is as follows:
+        - float64, float32, int64, int32 -> "numerical"
+        - bool -> "boolean"
+        - datetime64[...] -> "datetime"
+        - timedelta64[...] -> "timedelta"
+        - All others (e.g., object) -> "categorical"
+        """
+        def map_dtype(dtype: str) -> str:
+            if dtype in ['float64', 'float32', 'int64', 'int32']:
+                return "numerical"
+            elif dtype == 'bool':
+                return "boolean"
+            elif 'datetime' in dtype:
+                return "datetime"
+            elif 'timedelta' in dtype:
+                return "timedelta"
+            else:
+                return "categorical"
+        
+        if isinstance(data, pd.DataFrame):
+            return {col: map_dtype(str(dtype)) for col, dtype in data.dtypes.items()}
+        elif isinstance(data, np.ndarray) and data.dtype.names is not None:
+            return {name: map_dtype(str(data.dtype.fields[name][0])) for name in data.dtype.names}
+        else:
+            raise TypeError("Data must be a pandas DataFrame or a structured numpy array.")
+
+    def encode_predictors(
+        self, df: pd.DataFrame, drop_cols: list = None
+    ) -> pd.DataFrame:
+        """
+        Encodes all columns in the DataFrame so that they are numeric.
+        Optionally, drops specified columns (e.g., the target column).
+
+        Steps:
+        1. Extract numeric columns.
+        2. Convert datetime columns to Unix timestamp (numeric).
+        3. Convert timedelta columns to total seconds as float.
+        4. For categorical columns (object, category), create dummy variables.
+        5. For boolean columns, convert to int (0/1).
+        6. Concatenate everything and fill any remaining NaNs with each column's median.
+
+        Args:
+            df (pd.DataFrame): Input DataFrame.
+            drop_cols (list): List of column names to drop (optional).
+
+        Returns:
+            pd.DataFrame: DataFrame with only numeric values and no missing entries.
+        """
+        df_work = df.copy()
+        if drop_cols is not None:
+            df_work = df_work.drop(columns=drop_cols)
+
+        # 1. Extract numeric columns.
+        num_df = df_work.select_dtypes(include=[np.number], exclude = ["timedelta64[ns]"]).copy()
+
+        # 2. Convert datetime columns to Unix timestamp (numeric).
+        datetime_cols = df_work.select_dtypes(include=["datetime64[ns]"])
+        if not datetime_cols.empty:
+            datetime_numeric = datetime_cols.apply(
+                lambda col: col.astype(np.int64) // 10**9
+            )
+            num_df = pd.concat([num_df, datetime_numeric], axis=1)
+
+        # 3. Convert timedelta columns to total seconds (as float).
+        timedelta_cols = df_work.select_dtypes(include=["timedelta64[ns]"])
+        if not timedelta_cols.empty:
+            timedelta_numeric = pd.DataFrame({
+                col: timedelta_cols[col].dt.total_seconds() for col in timedelta_cols.columns
+            }, index=df_work.index)
+            num_df = pd.concat([num_df, timedelta_numeric], axis=1)
+
+
+        # 4. Encode categorical columns using get_dummies.
+        cat_df = df_work.select_dtypes(include=["object", "category"])
+        if not cat_df.empty:
+            dummies = pd.get_dummies(cat_df, drop_first=True)
+        else:
+            dummies = pd.DataFrame(index=df_work.index)
+
+        # 5. Handle boolean columns: convert them to int explicitly.
+        bool_df = df_work.select_dtypes(include=["bool"]).astype(int)
+
+        # 6. Concatenate all predictors and fill any remaining missing values with the median.
+        result_df = pd.concat([num_df, dummies, bool_df], axis=1)
+        result_df = result_df.apply(lambda col: col.fillna(0), axis=0)
+        return result_df
+
+    def detect_missingness(self, dfc: pd.DataFrame) -> dict:
+        """Detects missingness type for each column, handling multiple data types."""
+        df = dfc.copy()
+        missingness = {}
+        for col in df.columns:
+            missing_values = df[col].isna().sum()
+            if missing_values == 0:
+                continue  # No missing values → Skip detection
+            col_type = df[col].dtype
+
+            # **Categorical Data Handling (object, category)**
+            if col_type == "object" or df[col].nunique() < 10:
+                observed_counts = df[col].dropna().value_counts()
+                if len(observed_counts) > 1:
+                    _, p_value = stats.chisquare(observed_counts)
+                    if p_value > 0.05:
+                        missingness[col] = "MCAR"
+                        continue
+
+                missing_mask = df[col].isna().astype(int)
+                # Use our helper to encode all predictors (drop the target col)
+                encoded_data = self.encode_predictors(df, drop_cols=[col])
+                model = LogisticRegression()
+                model.fit(encoded_data, missing_mask)
+                if model.score(encoded_data, missing_mask) > 0.6:
+                    missingness[col] = "MAR"
+                    continue
+                missingness[col] = "MNAR"
+                continue
+
+            # **Numerical Data Handling (int, float)**
+            elif np.issubdtype(col_type, np.number):
+                _, p_value = stats.shapiro(df[col].dropna())
+                if p_value > 0.05:
+                    missingness[col] = "MCAR"
+                    continue
+                missing_mask = df[col].isna().astype(int)
+                observed_data = self.encode_predictors(df, drop_cols=[col])
+                model = LogisticRegression()
+                model.fit(observed_data, missing_mask)
+                if model.score(observed_data, missing_mask) > 0.6:
+                    missingness[col] = "MAR"
+                    continue
+                observed_values = df[col].dropna()
+                missing_rows = df[col].isna()
+                if missing_rows.sum() > 0:
+                    encoded_missing_vals = self.encode_predictors(df.loc[missing_rows, df.columns != col])
+                    missing_vals = encoded_missing_vals.mean(axis=1)
+                    _, p_value = stats.ks_2samp(observed_values, missing_vals)
+                    if p_value < 0.05:
+                        missingness[col] = "MNAR"
+                        continue
+                missingness[col] = "MAR"
+                continue
+
+            # **Boolean Data Handling (bool)**
+            elif np.issubdtype(col_type, np.bool_):
+                bool_as_int = df[col].astype(float)
+                _, p_value = stats.chisquare(bool_as_int.value_counts())
+                if p_value > 0.05:
+                    missingness[col] = "MCAR"
+                    continue
+                missingness[col] = "MNAR"
+                continue
+
+            # **Datetime Handling (datetime64)**
+            elif np.issubdtype(col_type, np.datetime64):
+                timestamps = df[col].dropna().astype(int) // 10**9
+                _, p_value = stats.shapiro(timestamps)
+                if p_value > 0.05:
+                    missingness[col] = "MCAR"
+                    continue
+                missing_mask = df[col].isna().astype(int)
+                observed_data = self.encode_predictors(df, drop_cols=[col])
+                model = LogisticRegression()
+                model.fit(observed_data, missing_mask)
+                if model.score(observed_data, missing_mask) > 0.6:
+                    missingness[col] = "MAR"
+                    continue
+                missingness[col] = "MNAR"
+                continue
+
+            # **Timedelta Handling (timedelta64)**
+            elif np.issubdtype(col_type, np.timedelta64):
+                durations = df[col].dropna().dt.total_seconds()
+                _, p_value = stats.shapiro(durations)
+                if p_value > 0.05:
+                    missingness[col] = "MCAR"
+                    continue
+                missingness[col] = "MNAR"
+                continue
+
+        return missingness
+
+    def apply_imputation(self, df: pd.DataFrame, missingness: dict) -> pd.DataFrame:
+        """Automatically applies imputation based on missingness type and column data type."""
+        df = df.copy()
+        for col, mtype in missingness.items():
+            if df[col].isna().sum() == 0:
+                continue
+
+            # --- Categorical Data (object, category or few unique values) ---
+            if (
+                pd.api.types.is_object_dtype(df[col])
+                or pd.api.types.is_categorical_dtype(df[col])
+                or (df[col].nunique() < 10)
+            ):
+                if mtype == "MCAR":
+                    df[col].fillna(df[col].mode()[0], inplace=True)
+                elif mtype == "MAR":
+                    # Use get_dummies encoding for categorical data
+                    dummies = pd.get_dummies(df[col], prefix=col, dummy_na=True)
+                    imputer = IterativeImputer(random_state=42)
+                    imputed = imputer.fit_transform(dummies)
+                    imputed_rounded = np.rint(imputed).astype(int)
+                    imputed_df = pd.DataFrame(
+                        imputed_rounded, columns=dummies.columns, index=df.index
+                    )
+                    # Convert back to a single categorical column by taking the column with the maximum value.
+                    predicted_category = imputed_df.idxmax(axis=1)
+                    df[col] = predicted_category.str.split(f"{col}_").str[-1]
+                elif mtype == "MNAR":
+                    df[col].fillna("Missing", inplace=True)
+
+            # --- Numerical Data ---
+            elif pd.api.types.is_numeric_dtype(df[col]):
+                if mtype == "MCAR":
+                    imputer = SimpleImputer(strategy="mean")
+                    df[col] = imputer.fit_transform(df[[col]]).ravel()
+                elif mtype in ["MAR", "MNAR"]:
+                    imputer = IterativeImputer(random_state=42)
+                    df[col] = imputer.fit_transform(df[[col]]).ravel()
+
+            # --- Boolean Data ---
+            elif pd.api.types.is_bool_dtype(df[col]):
+                if mtype == "MCAR":
+                    df[col].fillna(df[col].mode()[0], inplace=True)
+                elif mtype in ["MAR", "MNAR"]:
+                    numeric_vals = df[col].astype(float)
+                    imputer = IterativeImputer(random_state=42)
+                    imputed = imputer.fit_transform(numeric_vals.values.reshape(-1, 1))
+                    df[col] = np.rint(imputed).astype(bool).flatten()
+
+            # --- Datetime Data ---
+            elif pd.api.types.is_datetime64_any_dtype(df[col]):
+                print("entering here")
+                numeric_series = df[col].apply(lambda x: x.timestamp() if pd.notnull(x) else np.nan)
+                if mtype == "MCAR":
+                    imputer = SimpleImputer(strategy="median")
+                elif mtype in ["MAR", "MNAR"]:
+                    imputer = IterativeImputer(random_state=42)
+                imputed_numeric = imputer.fit_transform(
+                    numeric_series.values.reshape(-1, 1)
+                )
+                df[col] = pd.to_datetime(imputed_numeric.flatten(), unit='s')
+
+            # --- Timedelta Data ---
+            elif pd.api.types.is_timedelta64_dtype(df[col]):
+                numeric_series = df[col].apply(lambda x: x.total_seconds() if pd.notnull(x) else np.nan).values.reshape(-1, 1)
+                if mtype == "MCAR":
+                    imputer = SimpleImputer(strategy="median" )
+                elif mtype in ["MAR", "MNAR"]:
+                    imputer = IterativeImputer(random_state=42)
+                imputed_numeric = imputer.fit_transform(numeric_series)
+                df[col] = pd.to_timedelta(imputed_numeric.flatten(), unit="s")
+            else:
+                df[col].fillna(df[col].mode()[0], inplace=True)
+        return df
diff --git a/synthpop/processor/processor.py b/synthpop/processor/processor.py
deleted file mode 100644
index b2065c3..0000000
--- a/synthpop/processor/processor.py
+++ /dev/null
@@ -1,122 +0,0 @@
-import numpy as np
-import pandas as pd
-
-# global variables
-from synthpop import NUM_COLS_DTYPES, CAT_COLS_DTYPES
-
-NAN_KEY = 'nan'
-NUMTOCAT_KEY = 'numtocat'
-
-
-class Processor:
-    def __init__(self, spop):
-        self.spop = spop
-        self.processing_dict = {NUMTOCAT_KEY: {},
-                                NAN_KEY: {}
-                                }
-
-
-    def preprocess(self, df, dtypes):
-        for col in self.spop.visited_columns:
-            col_nan_indices = df[col].isna()
-            cont_nan_indices = {v: df[col] == v for v in self.spop.cont_na.get(col, [])}
-            col_nan_series = [(np.nan, col_nan_indices)] + list(cont_nan_indices.items())
-
-            col_all_nan_indices = pd.DataFrame({index: value[1] for index, value in enumerate(col_nan_series)}).max(axis=1)
-            col_not_nan_indices = np.invert(col_all_nan_indices)
-
-            # transform numerical columns in numtocat to categorical
-            if col in self.spop.numtocat:
-                self.processing_dict[NUMTOCAT_KEY][col] = {'dtype': self.spop.df_dtypes[col],
-                                                           'categories': {}
-                                                           }
-
-                # Dealing With Non-NaN Values
-                not_nan_values = df.loc[col_not_nan_indices, col].copy()
-                df.loc[col_not_nan_indices, col] = pd.cut(df.loc[col_not_nan_indices, col], self.spop.catgroups[col], labels=range(self.spop.catgroups[col]), include_lowest=True)
-
-                grouped = pd.DataFrame({'grouped': df.loc[col_not_nan_indices, col], 'real': not_nan_values}).groupby('grouped')
-                self.processing_dict[NUMTOCAT_KEY][col]['categories'] = grouped['real'].apply(np.array).to_dict()
-
-                # Dealing with NaN
-                for index, (_, bool_series) in enumerate(col_nan_series):
-                    nan_cat = self.spop.catgroups[col] + index
-                    self.processing_dict[NUMTOCAT_KEY][col]['categories'][nan_cat] = df.loc[bool_series, col].to_numpy()
-                    df.loc[bool_series, col] = nan_cat
-
-                df[col] = df[col].astype('category')
-                self.spop.df_dtypes[col] = 'category'
-
-            else:
-                # NaNs in category columns
-                # need to process NaNs only as all other categories will be taken care automatically
-                if self.spop.df_dtypes[col] in CAT_COLS_DTYPES:
-                    if col_nan_indices.any():
-                        # TODO beware of 'NaN_category' naming
-                        col_nan_category = 'NaN_category'
-                        self.processing_dict[NAN_KEY][col] = {'dtype': self.spop.df_dtypes[col],
-                                                              'nan_value': col_nan_category
-                                                              }
-
-                        df[col] = df[col].cat.add_categories(col_nan_category) #argument 'inplace' is deprecated and removed
-                        df[col].fillna(col_nan_category, inplace=True)
-
-                # NaNs in numerical columns
-                elif self.spop.df_dtypes[col] in NUM_COLS_DTYPES:
-                    if col_all_nan_indices.any():
-                        # insert new column in df
-                        # TODO beware of '_NaN' naming
-                        col_nan_name = col + '_NaN'
-                        df.insert(df.columns.get_loc(col), col_nan_name, 0) #inserts columName_NaN in dataframe
-
-                        self.processing_dict[NAN_KEY][col] = {'col_nan_name': col_nan_name,
-                                                              'dtype': self.spop.df_dtypes[col],
-                                                              'nan_flags': {}
-                                                              }
-
-                        for index, (cat, bool_series) in enumerate(col_nan_series):
-                            cat_index = index + 1
-                            self.processing_dict[NAN_KEY][col]['nan_flags'][cat_index] = cat
-                            df.loc[bool_series, col_nan_name] = cat_index
-                        df.loc[col_all_nan_indices, col] = 0
-
-                        df.loc[:,col_nan_name] = df[col_nan_name].astype('category')
-                        self.spop.df_dtypes[col_nan_name] = 'category'
-
-        return df
-
-    def postprocess(self, synth_df):
-        #sex_NaN is not a column of synth_df
-        for col, processing_numtocat_col_dict in self.processing_dict[NUMTOCAT_KEY].items():
-            synth_df[col] = synth_df[col].astype(object)
-            col_synth_df = synth_df[col].copy()
-
-            for category, category_values in processing_numtocat_col_dict['categories'].items():
-                category_indices = col_synth_df == category
-                synth_df.loc[category_indices, col] = np.random.choice(category_values, size=category_indices.sum(), replace=True)
-
-            # cast dtype back to original (float for int column with NaNs)
-            if synth_df[col].isna().any() and processing_numtocat_col_dict['dtype'] == 'int':
-                synth_df[col] = synth_df[col].astype(float)
-            else:
-                synth_df[col] = synth_df[col].astype(processing_numtocat_col_dict['dtype'])
-            # self.spop.df_dtypes[col] = processing_numtocat_col_dict['dtype']
-
-        for col, processing_nan_col_dict in self.processing_dict[NAN_KEY].items():
-            # NaNs in category columns
-            # need to postprocess NaNs only all other categories will be taken care automatically
-            if processing_nan_col_dict['dtype'] in CAT_COLS_DTYPES:
-                col_nan_value = processing_nan_col_dict['nan_value']
-                synth_df[col] = synth_df[col].astype(object)
-                synth_df.loc[synth_df[col] == col_nan_value, col] = np.nan
-                synth_df[col] = synth_df[col].astype('category')
-
-            # NaNs in numerical columns
-            #The code below sets changes NANs in numerical columns to a given value, and removes the NAN indicator column.
-            elif processing_nan_col_dict['dtype'] in NUM_COLS_DTYPES:
-                for col_nan_flag, col_nan_value in processing_nan_col_dict['nan_flags'].items():
-                    nan_flag_indices = synth_df[processing_nan_col_dict['col_nan_name']] == col_nan_flag #expects columnName_NAN in the synthetic data set
-                    synth_df.loc[nan_flag_indices, col] = col_nan_value
-                synth_df.drop(columns=processing_nan_col_dict['col_nan_name'], inplace=True)
-
-        return synth_df
diff --git a/synthpop/synthpop.py b/synthpop/synthpop.py
deleted file mode 100644
index 03a4051..0000000
--- a/synthpop/synthpop.py
+++ /dev/null
@@ -1,203 +0,0 @@
-import numpy as np
-import pandas as pd
-
-# classes
-from synthpop.validator import Validator
-from synthpop.processor import Processor
-# global variables
-from synthpop import NUM_COLS_DTYPES
-from synthpop.processor import NAN_KEY
-from synthpop.method import CART_METHOD, GC_METHOD, METHODS_MAP, NA_METHODS
-
-
-class Synthpop:
-    def __init__(self,
-                 method=None,
-                 visit_sequence=None,
-                 # predictor_matrix=None,
-                 proper=False,
-                 cont_na=None,
-                 smoothing=False,
-                 default_method=CART_METHOD,
-                 numtocat=None,
-                 catgroups=None,
-                 seed=None):
-        # initialise the validator and processor
-        self.validator = Validator(self)
-        self.processor = Processor(self)
-
-        # initialise arguments
-        self.method = method
-        self.visit_sequence = visit_sequence
-        self.predictor_matrix = None
-        self.proper = proper
-        self.cont_na = cont_na
-        self.smoothing = smoothing
-        self.default_method = default_method
-        self.numtocat = numtocat
-        self.catgroups = catgroups
-        self.seed = seed
-        self.map_column_to_NaN_column = {}
-        # check init
-        self.validator.check_init()
-
-    def include_nan_columns(self):
-        for (col,nan_col) in self.map_column_to_NaN_column.items():
-            if col not in self.visit_sequence:
-                continue
-
-            index_of_col = self.visit_sequence.index(col)
-            self.visit_sequence.insert(index_of_col,nan_col)
-
-    def pre_preprocess(self,df,dtypes,nan_fill):
-        for column in df:
-            if dtypes[column] != 'float':
-                continue
-            maybe_nans = df[column].isnull()
-            if not maybe_nans.any():
-                continue
-
-            df.loc[maybe_nans,column] = nan_fill
-
-            nan_col_name = column+"_NaN"
-            df.loc[:,nan_col_name] = maybe_nans
-            self.map_column_to_NaN_column[column] = nan_col_name
-
-            dtypes[nan_col_name] = 'category'
-
-        return df,dtypes
-
-    def post_postprocessing(self,syn_df):
-        for column in syn_df:
-            if column in self.map_column_to_NaN_column.keys():
-                nan_col_name = self.map_column_to_NaN_column[column]
-                column_NaN_at = syn_df[nan_col_name]
-                syn_df.loc[column_NaN_at,column] = None
-                syn_df = syn_df.drop(columns=nan_col_name)
-
-        return syn_df
-
-    def _infer_dtypes(self, df):
-        """Automatically infer data types from DataFrame.
-        
-        Args:
-            df: pandas DataFrame
-            
-        Returns:
-            dict: Mapping of column names to inferred types ('int', 'float', 'datetime', 'category', 'bool')
-        """
-        dtypes = {}
-        for column in df.columns:
-            pd_dtype = str(df[column].dtype)
-            
-            if pd_dtype.startswith('int'):
-                dtypes[column] = 'int'
-            elif pd_dtype.startswith('float'):
-                dtypes[column] = 'float'
-            elif pd_dtype.startswith('datetime'):
-                dtypes[column] = 'datetime'
-            elif pd_dtype.startswith('bool'):
-                dtypes[column] = 'bool'
-            else:
-                # For object or string dtypes, check if it should be categorical
-                dtypes[column] = 'category'
-                
-        return dtypes
-
-    def fit(self, df, dtypes=None):
-        """Fit the synthetic data generator.
-        
-        Args:
-            df: pandas DataFrame to learn from
-            dtypes: Optional dict mapping column names to types. If not provided, types will be inferred.
-        """
-        # Infer dtypes if not provided
-        if dtypes is None:
-            dtypes = self._infer_dtypes(df)
-            
-        # Validate DataFrame
-        if not df.columns.is_unique:
-            raise ValueError("DataFrame column names must be unique")
-            
-        df,dtypes = self.pre_preprocess(df,dtypes,-8)
-
-        self.df_columns = df.columns.tolist()
-        # Only set visit_sequence if not provided in init
-        if self.visit_sequence is None:
-            self.visit_sequence = df.columns.tolist()
-        elif isinstance(self.visit_sequence, list) and all(isinstance(x, int) for x in self.visit_sequence):
-            # Convert numeric indices to column names
-            self.visit_sequence = [df.columns[i] for i in self.visit_sequence]
-        
-        self.include_nan_columns()
-        self.n_df_rows, self.n_df_columns = np.shape(df)
-        self.df_dtypes = dtypes
-
-        # check processor
-        self.validator.check_processor()
-        # preprocess
-
-        #processor.preprocess has side effects on the processor object and on this (self) object
-        #processor.processing_dict[NAN_KEY][col]
-        #spop.df_dtypes[col_nan_name]
-        processed_df = self.processor.preprocess(df, self.df_dtypes)
-        print(processed_df)
-        self.processed_df_columns = processed_df.columns.tolist()
-        self.n_processed_df_columns = len(self.processed_df_columns)
-
-        # check fit
-        self.validator.check_fit()
-        # fit
-        self._fit(processed_df)
-
-    def _fit(self, df):
-        self.saved_methods = {}
-
-        # train
-        self.predictor_matrix_columns = self.predictor_matrix.columns.to_numpy()
-        for col, visit_step in self.visit_sequence.sort_values().items():
-            print('train_{}'.format(col))
-
-            # initialise the method
-            col_method = METHODS_MAP[self.method[col]](dtype=self.df_dtypes[col], smoothing=self.smoothing[col], proper=self.proper, random_state=self.seed)
-            # fit the method
-            col_predictors = self.predictor_matrix_columns[self.predictor_matrix.loc[col].to_numpy() == 1]
-            col_method.fit(X_df=df[col_predictors], y_df=df[col])
-            # save the method
-            self.saved_methods[col] = col_method
-
-    def generate(self, k=None):
-        self.k = k
-
-        # check generate
-        self.validator.check_generate()
-        # generate
-        synth_df = self._generate()
-        # postprocess
-        processed_synth_df = self.processor.postprocess(synth_df)
-
-        return self.post_postprocessing(processed_synth_df)
-
-    def _generate(self):
-        # Only generate columns that were in the visit sequence
-        synth_df = pd.DataFrame(data=np.zeros([self.k, len(self.visit_sequence)]), columns=self.visit_sequence.index)
-
-        for col, visit_step in self.visit_sequence.sort_values().items():
-            print('generate_{}'.format(col))
-
-            # reload the method
-            col_method = self.saved_methods[col]
-            # predict with the method
-            col_predictors = self.predictor_matrix_columns[self.predictor_matrix.loc[col].to_numpy() == 1]
-            synth_df[col] = col_method.predict(synth_df[col_predictors])
-
-            # change all missing values to 0
-            if col in self.processor.processing_dict[NAN_KEY] and self.df_dtypes[col] in NUM_COLS_DTYPES and self.method[col] in NA_METHODS:
-                nan_indices = synth_df[self.processor.processing_dict[NAN_KEY][col]['col_nan_name']] != 0
-                synth_df.loc[nan_indices, col] = 0
-
-            # map dtype to original dtype (only excpetion if column is full of NaNs)
-            if synth_df[col].notna().any():
-                synth_df[col] = synth_df[col].astype(self.df_dtypes[col])
-
-        return synth_df
\ No newline at end of file
diff --git a/synthpop/validator/__init__.py b/synthpop/validator/__init__.py
index 3143690..d55577d 100644
--- a/synthpop/validator/__init__.py
+++ b/synthpop/validator/__init__.py
@@ -1 +1,5 @@
-from synthpop.validator.validator import Validator
+from .validator import Validator
+
+__all__ = [
+    "Validator",
+]
\ No newline at end of file
diff --git a/synthpop/validator/validator.py b/synthpop/validator/validator.py
index aa71005..f54852f 100644
--- a/synthpop/validator/validator.py
+++ b/synthpop/validator/validator.py
@@ -1,322 +1,5 @@
-import numpy as np
-import pandas as pd
-
-# global variables
-from synthpop import NUM_COLS_DTYPES
-from synthpop.method import EMPTY_METHOD, SAMPLE_METHOD
-from synthpop.method import DEFAULT_METHODS_MAP, INIT_METHODS_MAP, CONT_TO_CAT_METHODS_MAP
-from synthpop.method import ALL_METHODS, INIT_METHODS, DEFAULT_METHODS, NA_METHODS
-from synthpop.processor import NAN_KEY
-
-
-INIT_STEP = 'init'
-PROCESSOR_STEP = 'processor'
-FIT_STEP = 'fit'
-GENERATE_STEP = 'generate'
-
-NONE_TYPE = type(None)
-
-DENSITY = 'density'
 
 
 class Validator:
-    def __init__(self, spop):
-        self.spop = spop
-        self.attributes_types = {'method': (NONE_TYPE, str, list),
-                                 'visit_sequence': (NONE_TYPE, np.ndarray, list),
-                                 # 'predictor_matrix': (NONE_TYPE,),
-                                 'proper': (bool,),
-                                 'cont_na': (NONE_TYPE, dict),
-                                 'smoothing': (bool, str, dict),
-                                 'default_method': (str,),
-                                 'numtocat': (NONE_TYPE, list),
-                                 'catgroups': (NONE_TYPE, int, dict),
-                                 'seed': (NONE_TYPE, int),
-                                 'k': (NONE_TYPE, int)}
-
-    def check_init(self):
-        step = INIT_STEP
-
-        self.default_method_validator(step=step)
-        self.method_validator(step=step)
-        self.visit_sequence_validator(step=step)
-        self.predictor_matrix_validator(step=step)
-        self.proper_validator(step=step)
-        self.cont_na_validator(step=step)
-        self.smoothing_validator(step=step)
-        self.numtocat_validator(step=step)
-        self.catgroups_validator(step=step)
-        self.seed_validator(step=step)
-
-    def check_processor(self):
-        step = PROCESSOR_STEP
-
-        self.visit_sequence_validator(step=step)
-        self.method_validator(step=step)
-        self.predictor_matrix_validator(step=step)
-        self.smoothing_validator(step=step)
-
-        self.cont_na_validator(step=step)
-        self.numtocat_validator(step=step)
-        self.catgroups_validator(step=step)
-
-    def check_fit(self):
-        step = FIT_STEP
-
-        self.method_validator(step=step)
-        self.visit_sequence_validator(step=step)
-        self.predictor_matrix_validator(step=step)
-        self.smoothing_validator(step=step)
-
-    def check_generate(self):
-        step = GENERATE_STEP
-
-        self.k_validator(step=step)
-
-    def check_valid_type(self, attribute_name, return_type=False):
-        attribute_type = getattr(self.spop, attribute_name)
-        expected_types = self.attributes_types[attribute_name]
-        assert isinstance(attribute_type, expected_types)
-
-        if return_type:
-            return attribute_type
-
-    def method_validator(self, step=None):
-        if step == INIT_STEP:
-            # validate method type is allowed
-            method_type = self.check_valid_type('method', return_type=True)
-            print(method_type)
-
-            if isinstance(method_type, str):
-                # if method type is str
-                # validate method is in allowed init methods
-                print(method_type)
-                assert self.spop.method in INIT_METHODS
-
-            elif isinstance(method_type, list):
-                # if method type is list
-                # validate all methods are allowed
-                assert all(m in ALL_METHODS for m in self.spop.method)
-
-        if step == PROCESSOR_STEP:
-            first_visited_col = self.spop.visit_sequence.index[self.spop.visit_sequence == 0].values[0]
-
-            if self.spop.method is None:
-                # if method is not specified
-                # for each column set method to default method according to its dtype (method for first visited column is sample_method)
-                self.spop.method = [DEFAULT_METHODS_MAP[self.spop.default_method][self.spop.df_dtypes[col]] if col != first_visited_col else SAMPLE_METHOD
-                                    for col in self.spop.df_columns]
-
-            elif isinstance(self.spop.method, str):
-                # if method type is str
-                # for each column set method to the corresponding allowed method according to its dtype (method for first visited column is sample_method)
-                self.spop.method = [INIT_METHODS_MAP[self.spop.method][self.spop.df_dtypes[col]] if col != first_visited_col else SAMPLE_METHOD
-                                    for col in self.spop.df_columns]
-
-            else:
-                # validate method for first visited column with non empty method is sample_method
-                for col, visit_order in self.spop.visit_sequence.sort_values().items():
-                    col_method = self.spop.method[self.spop.df_columns.index(col)]
-                    if col_method != EMPTY_METHOD:
-                        assert col_method == SAMPLE_METHOD
-                        break
-                # assert all(self.spop.method[i] == SAMPLE_METHOD for i, col in enumerate(self.spop.df_columns) if col == first_visited_col)
-
-            # validate all columns have specified methods
-            assert len(self.spop.method) == self.spop.n_df_columns
-            self.spop.method = pd.Series(self.spop.method, index=self.spop.df_columns)
-
-        if step == FIT_STEP:
-            for col in self.spop.method.index:
-                if col in self.spop.numtocat:
-                    self.spop.method[col] = CONT_TO_CAT_METHODS_MAP[self.spop.method[col]]
-
-                elif col in self.spop.processor.processing_dict[NAN_KEY] and self.spop.df_dtypes[col] in NUM_COLS_DTYPES and self.spop.method[col] in NA_METHODS:
-                    # TODO put in a function
-                    nan_col_index = self.spop.method.index.get_loc(col)
-                    index_list = self.spop.method.index.tolist()
-                    index_list.insert(nan_col_index, self.spop.processed_df_columns[nan_col_index])
-                    self.spop.method = self.spop.method.reindex(index_list, fill_value=CONT_TO_CAT_METHODS_MAP[self.spop.method[col]])
-
-    def visit_sequence_validator(self, step=None):
-        if step == INIT_STEP:
-            print('A')
-            # validate visit_sequence type is allowed
-            visit_sequence_type = self.check_valid_type('visit_sequence', return_type=True)
-
-            if isinstance(visit_sequence_type, np.ndarray):
-                # if visit_sequence type is numpy array
-                # transform visit_sequence into a list
-                self.spop.visit_sequence = [col.item() for col in self.spop.visit_sequence]
-                visit_sequence_type = list
-
-            if isinstance(visit_sequence_type, list):
-                # if visit_sequence type is list
-                # validate all visits are unique
-                assert len(set(self.spop.visit_sequence)) == len(self.spop.visit_sequence)
-                # validate all visits are either type int or type str
-                assert all(isinstance(col, int) for col in self.spop.visit_sequence) or all(isinstance(col, str) for col in self.spop.visit_sequence)
-
-        if step == PROCESSOR_STEP:
-            print('TestX')
-            if self.spop.visit_sequence is None:
-                # if visit_sequence is not specified
-                # visit all columns in a row
-                self.spop.visit_sequence = [col.item() for col in np.arange(self.spop.n_df_columns)]
-
-            if isinstance(self.spop.visit_sequence[0], int):
-                # if visit_sequence is list of column indices
-                # validate every index in visit_sequence is a valid column index
-                assert set(self.spop.visit_sequence).issubset(set(np.arange(self.spop.n_df_columns)))
-                # transform visit_sequence into a list of column names
-                self.spop.visit_sequence = [self.spop.df_columns[i] for i in self.spop.visit_sequence]
-            else:
-                # validate every column name in visit_sequence is a valid column name
-                assert set(self.spop.visit_sequence).issubset(set(self.spop.df_columns))
-
-            self.spop.visited_columns = [col for col in self.spop.df_columns if col in self.spop.visit_sequence]
-            self.spop.visit_sequence = pd.Series([self.spop.visit_sequence.index(col) for col in self.spop.visited_columns], index=self.spop.visited_columns)
-
-        if step == FIT_STEP:
-            for col in self.spop.visit_sequence.index:
-                if col in self.spop.processor.processing_dict[NAN_KEY] and self.spop.df_dtypes[col] in NUM_COLS_DTYPES and self.spop.method[col] in NA_METHODS:
-                    visit_step = self.spop.visit_sequence[col]
-                    self.spop.visit_sequence.loc[self.spop.visit_sequence >= visit_step] += 1
-
-                    nan_col_index = self.spop.visit_sequence.index.get_loc(col)
-                    index_list = self.spop.visit_sequence.index.tolist()
-                    index_list.insert(nan_col_index, self.spop.processed_df_columns[nan_col_index])
-                    self.spop.visit_sequence = self.spop.visit_sequence.reindex(index_list, fill_value=visit_step)
-
-    def predictor_matrix_validator(self, step=None):
-        # if step == INIT_STEP:
-        #     # validate predictor_matrix type is allowed
-        #     self.check_valid_type('predictor_matrix')
-
-        if step == PROCESSOR_STEP:
-            # build predictor_matrix so all previously visited columns are used for the prediction of the currently visited
-            self.spop.predictor_matrix = np.zeros([len(self.spop.visit_sequence), len(self.spop.visit_sequence)], dtype=int)
-            self.spop.predictor_matrix = pd.DataFrame(self.spop.predictor_matrix, index=self.spop.visit_sequence.index, columns=self.spop.visit_sequence.index)
-            visited_columns = []
-            for col, _ in self.spop.visit_sequence.sort_values().items():
-                self.spop.predictor_matrix.loc[col, visited_columns] = 1
-                visited_columns.append(col)
-
-        if step == FIT_STEP:
-            for col in self.spop.predictor_matrix:
-                if col in self.spop.processor.processing_dict[NAN_KEY] and self.spop.df_dtypes[col] in NUM_COLS_DTYPES and self.spop.method[col] in NA_METHODS:
-                    nan_col_index = self.spop.predictor_matrix.columns.get_loc(col)
-                    self.spop.predictor_matrix.insert(nan_col_index, self.spop.processed_df_columns[nan_col_index], self.spop.predictor_matrix[col])
-
-                    index_list = self.spop.predictor_matrix.index.tolist()
-                    index_list.insert(nan_col_index, self.spop.processed_df_columns[nan_col_index])
-                    self.spop.predictor_matrix = self.spop.predictor_matrix.reindex(index_list, fill_value=0)
-                    self.spop.predictor_matrix.loc[self.spop.processed_df_columns[nan_col_index]] = self.spop.predictor_matrix.loc[col]
-
-                    self.spop.predictor_matrix.loc[col, self.spop.processed_df_columns[nan_col_index]] = 1
-
-    def proper_validator(self, step=None):
-        if step == INIT_STEP:
-            # validate proper type is allowed
-            self.check_valid_type('proper')
-
-    def cont_na_validator(self, step=None):
-        if step == INIT_STEP:
-            # validate cont_na type is allowed
-            self.check_valid_type('cont_na')
-
-        if step == PROCESSOR_STEP:
-            if self.spop.cont_na is None:
-                self.spop.cont_na = {}
-            else:
-                # validate columns in cont_na are valid columns
-                assert all(col in self.spop.df_columns for col in self.spop.cont_na)
-                # assert all(col in self.spop.visited_columns for col in self.spop.cont_na)
-                # validate the type of columns in cont_na are valid types
-                assert all(self.spop.df_dtypes[col] in NUM_COLS_DTYPES for col in self.spop.cont_na)
-                self.spop.cont_na = {col: col_cont_na for col, col_cont_na in self.spop.cont_na.items() if self.spop.method[col] in NA_METHODS}
-
-    def smoothing_validator(self, step=None):
-        if step == INIT_STEP:
-            # validate smoothing type is allowed
-            self.check_valid_type('smoothing')
-
-        if step == PROCESSOR_STEP:
-            if self.spop.smoothing is False:
-                self.spop.smoothing = {col: False for col in self.spop.df_columns}
-            elif isinstance(self.spop.smoothing, str):
-                # if smoothing type is str
-                # validate smoothing is 'density'
-                assert self.spop.smoothing == DENSITY
-                self.spop.smoothing = {col: self.spop.df_dtypes[col] in NUM_COLS_DTYPES for col in self.spop.df_columns}
-            else:
-                # validate smoothing is 'denisty' for some/all numerical columns and False for all other columns
-                assert all((smoothing_method == DENSITY and self.spop.df_dtypes[col] in NUM_COLS_DTYPES) or smoothing_method is False
-                           for col, smoothing_method in self.spop.smoothing.items())
-                self.spop.smoothing = {col: (self. spop.smoothing.get(col, False) == DENSITY and self.spop.df_dtypes[col] in NUM_COLS_DTYPES) for col in self.spop.df_columns}
-
-        if step == FIT_STEP:
-            for col in self.spop.processed_df_columns:
-                if col in self.spop.numtocat:
-                    self.spop.smoothing[col] = False
-                elif col in self.spop.processor.processing_dict[NAN_KEY] and self.spop.df_dtypes[col] in NUM_COLS_DTYPES:
-                    self.spop.smoothing[self.spop.processor.processing_dict[NAN_KEY][col]['col_nan_name']] = False
-
-    def default_method_validator(self, step=None):
-        if step == INIT_STEP:
-            # validate default_method type is allowed
-            self.check_valid_type('default_method')
-
-            # validate default_method is in allowed default methods
-            assert self.spop.default_method in DEFAULT_METHODS
-
-    def numtocat_validator(self, step=None):
-        if step == INIT_STEP:
-            # validate numtocat type is allowed
-            self.check_valid_type('numtocat')
-
-        if step == PROCESSOR_STEP:
-            if self.spop.numtocat is None:
-                self.spop.numtocat = []
-            else:
-                # validate all columns in numtocat are valid columns
-                assert all(col in self.spop.df_columns for col in self.spop.numtocat)
-                # assert all(col in self.spop.visited_columns for col in self.spop.numtocat)
-                # validate all columns in numtocat are numerical columns
-                assert all(self.spop.df_dtypes[col] in NUM_COLS_DTYPES for col in self.spop.numtocat)
-
-    def catgroups_validator(self, step=None):
-        if step == INIT_STEP:
-            # validate catgroups type is allowed
-            catgroups_type = self.check_valid_type('catgroups', return_type=True)
-
-            if isinstance(catgroups_type, int):
-                # if catgroups type is int
-                # validate catgroups is more than 1
-                assert self.spop.catgroups > 1
-
-            elif isinstance(catgroups_type, dict):
-                # if catgroups type is dict
-                # validate the keys in catgroups are the same as the columns in numtocat
-                assert set(self.spop.catgroups.keys()) == set(self.spop.numtocat)
-                # validate all values in catgroups are type int and more than 1
-                assert all((isinstance(col_groups, int) and col_groups > 1) for col_groups in self.spop.catgroups.values())
-
-        if step == PROCESSOR_STEP:
-            if self.spop.catgroups is None:
-                self.spop.catgroups = {col: 5 for col in self.spop.numtocat}
-            elif isinstance(self.spop.catgroups, int):
-                self.spop.catgroups = {col: self.spop.catgroups for col in self.spop.numtocat}
-
-    def seed_validator(self, step=None):
-        if step == INIT_STEP:
-            # validate seed type is allowed
-            self.check_valid_type('seed')
-
-    def k_validator(self, step=None):
-        if step == GENERATE_STEP:
-            # validate k type is allowed
-            self.check_valid_type('k')
-
-            if self.spop.k is None:
-                self.spop.k = self.spop.n_df_rows
+    def __init__(self) -> None:
+        pass
\ No newline at end of file
diff --git a/tests/test_synthpop.py b/tests/test_synthpop.py
index 4604040..1fc12e5 100644
--- a/tests/test_synthpop.py
+++ b/tests/test_synthpop.py
@@ -1,103 +1,215 @@
-import pytest
+# test_synthpop.py
+
+import unittest
+import numpy as np
 import pandas as pd
-from synthpop import Synthpop
-from datasets.adult import df, dtypes
+from synthpop.metrics import MetricsReport, EfficacyMetrics, DisclosureProtection
+from synthpop.processor.data_processor import DataProcessor, InvalidDataError
+from synthpop.processor.missing_data_handler import MissingDataHandler
+from synthpop.method.GC import GaussianCopulaMethod
 
-def test_synthpop_default_parameters():
-    """Test Synthpop with default parameters and automatic type inference."""
-    # Initialize Synthpop
-    spop = Synthpop()
-    
-    # Fit the model with automatic type inference
-    spop.fit(df)
-    
-    # Generate synthetic data
-    synth_df = spop.generate(len(df))
-    
-    # Verify the synthetic dataframe has the same shape as original
-    assert synth_df.shape == df.shape
-    
-    # Verify the synthetic dataframe has the same columns as original
-    assert all(synth_df.columns == df.columns)
-    
-    # Verify inferred dtypes match expected types
-    assert spop.df_dtypes['age'] == 'int'
-    assert spop.df_dtypes['workclass'] == 'category'
-    assert spop.df_dtypes['education'] == 'category'
-    
-    # Verify the method attribute contains expected default values
-    assert isinstance(spop.method, pd.Series)
-    assert 'age' in spop.method.index
-    assert spop.method['age'] == 'sample'  # age should use sample method
-    assert all(spop.method[spop.method != 'sample'] == 'cart')  # rest should use cart
-    
-    # Verify visit sequence is properly set
-    assert isinstance(spop.visit_sequence, pd.Series)
-    assert len(spop.visit_sequence) == len(df.columns)
-    assert all(spop.visit_sequence.index == df.columns)
-    
-    # Verify predictor matrix is properly set
-    assert isinstance(spop.predictor_matrix, pd.DataFrame)
-    assert spop.predictor_matrix.shape == (len(df.columns), len(df.columns))
-    assert all(spop.predictor_matrix.index == df.columns)
-    assert all(spop.predictor_matrix.columns == df.columns)
+# -------------------------------
+# Tests for MetricsReport
+# -------------------------------
+class TestMetricsReport(unittest.TestCase):
+    def setUp(self):
+        # Create sample real and synthetic data with various types.
+        self.real_df = pd.DataFrame({
+            "numeric_col": [1, 2, 3, 4, 5, np.nan],
+            "categorical_col": ["a", "b", "a", "c", "b", "b"],
+            "datetime_col": pd.date_range("2023-01-01", periods=6),
+            "boolean_col": [True, False, True, False, True, False]
+        })
+        self.synthetic_df = pd.DataFrame({
+            "numeric_col": [1.1, 2.1, 3.1, 4.0, 5.2, np.nan],
+            "categorical_col": ["a", "b", "b", "c", "d", "b"],
+            "datetime_col": pd.date_range("2023-01-01", periods=6),
+            "boolean_col": [True, True, True, False, True, False]
+        })
+        self.metadata = {
+            "numeric_col": "numerical",
+            "categorical_col": "categorical",
+            "datetime_col": "datetime",
+            "boolean_col": "boolean"
+        }
+    
+    def test_generate_report(self):
+        report = MetricsReport(self.real_df, self.synthetic_df, self.metadata)
+        report_df = report.generate_report()
+        self.assertIsInstance(report_df, pd.DataFrame)
+        expected_cols = {"column", "type", "missing_value_similarity", "range_coverage",
+                         "boundary_adherence", "ks_complement", "tv_complement",
+                         "statistic_similarity", "category_coverage", "category_adherence"}
+        self.assertTrue(expected_cols.issubset(set(report_df.columns)))
+        # Check that non-applicable metrics are marked as "N/A"
+        num_report = report_df[report_df["type"]=="numerical"].iloc[0]
+        self.assertEqual(num_report["category_coverage"], "N/A")
+        cat_report = report_df[report_df["type"]=="categorical"].iloc[0]
+        self.assertEqual(cat_report["range_coverage"], "N/A")
 
-def test_synthpop_with_manual_dtypes():
-    """Test Synthpop with manually specified dtypes."""
-    # Initialize Synthpop
-    spop = Synthpop()
-    
-    # Fit the model with explicit dtypes
-    spop.fit(df, dtypes)
-    
-    # Verify the dtypes were set correctly
-    for col, dtype in dtypes.items():
-        assert spop.df_dtypes[col] == dtype
-    
-    # Generate synthetic data
-    synth_df = spop.generate(len(df))
-    
-    # Verify the synthetic dataframe has the same shape and columns
-    assert synth_df.shape == df.shape
-    assert all(synth_df.columns == df.columns)
+# -------------------------------
+# Tests for EfficacyMetrics
+# -------------------------------
+class TestEfficacyMetrics(unittest.TestCase):
+    def test_regression(self):
+        np.random.seed(42)
+        real_reg = pd.DataFrame({
+            "feat1": np.random.normal(0, 1, 100),
+            "feat2": np.random.normal(5, 2, 100),
+            "target": np.random.normal(10, 3, 100)
+        })
+        synthetic_reg = pd.DataFrame({
+            "feat1": np.random.normal(0, 1, 100),
+            "feat2": np.random.normal(5, 2, 100),
+            "target": np.random.normal(10, 3, 100)
+        })
+        efficacy_reg = EfficacyMetrics(task='regression', target_column="target", random_state=42)
+        metrics = efficacy_reg.evaluate(real_reg, synthetic_reg)
+        self.assertIn("mse", metrics)
+        self.assertIn("mae", metrics)
+        self.assertIn("r2", metrics)
+        self.assertLessEqual(metrics["r2"], 1.0)
 
-def test_synthpop_custom_visit_sequence():
-    """Test Synthpop with custom visit sequence using Adult dataset."""
-    # Define custom visit sequence
-    visit_sequence = [0, 1, 5, 3, 2]
-    
-    # Initialize Synthpop with custom visit sequence
-    spop = Synthpop(visit_sequence=visit_sequence)
-    
-    # Fit the model with automatic type inference
-    spop.fit(df)
-    
-    # Generate synthetic data
-    synth_df = spop.generate(len(df))
-    
-    # Verify only specified columns were synthesized
-    expected_columns = ['age', 'workclass', 'marital.status', 'education', 'fnlwgt']
-    assert len(synth_df.columns) == len(expected_columns)
-    assert all(col in synth_df.columns for col in expected_columns)
-    
-    # Verify visit sequence matches what was specified
-    assert len(spop.visit_sequence) == len(visit_sequence)
-    assert spop.visit_sequence['age'] == 0
-    assert spop.visit_sequence['workclass'] == 1
-    assert spop.visit_sequence['marital.status'] == 2
-    assert spop.visit_sequence['education'] == 3
-    assert spop.visit_sequence['fnlwgt'] == 4
-    
-    # Verify predictor matrix has correct shape for subset of columns
-    assert spop.predictor_matrix.shape == (len(expected_columns), len(expected_columns))
-    assert all(col in spop.predictor_matrix.columns for col in expected_columns)
-    assert all(col in spop.predictor_matrix.index for col in expected_columns)
-    
-    # Verify specific predictor relationships from example
-    pred_matrix = spop.predictor_matrix
-    assert pred_matrix.loc['age', 'age'] == 0
-    assert pred_matrix.loc['workclass', 'age'] == 1
-    assert pred_matrix.loc['workclass', 'workclass'] == 0
-    assert pred_matrix.loc['fnlwgt', ['age', 'workclass', 'education', 'marital.status']].sum() == 4
-    assert pred_matrix.loc['education', ['age', 'workclass', 'marital.status']].sum() == 3
-    assert pred_matrix.loc['marital.status', ['age', 'workclass']].sum() == 2
+    def test_classification(self):
+        np.random.seed(42)
+        real_clf = pd.DataFrame({
+            "feat1": np.random.normal(0, 1, 100),
+            "feat2": np.random.normal(5, 2, 100),
+            "target": np.random.choice(["A", "B"], size=100)
+        })
+        synthetic_clf = pd.DataFrame({
+            "feat1": np.random.normal(0, 1, 100),
+            "feat2": np.random.normal(5, 2, 100),
+            "target": np.random.choice(["A", "B"], size=100)
+        })
+        efficacy_clf = EfficacyMetrics(task='classification', target_column="target", random_state=42)
+        metrics = efficacy_clf.evaluate(real_clf, synthetic_clf)
+        self.assertIn("accuracy", metrics)
+        self.assertIn("f1_score", metrics)
+
+# -------------------------------
+# Tests for DisclosureProtection
+# -------------------------------
+class TestDisclosureProtection(unittest.TestCase):
+    def test_score_and_report(self):
+        np.random.seed(42)
+        real_dp = pd.DataFrame({
+            "f1": np.random.normal(0, 1, 100),
+            "f2": np.random.normal(5, 2, 100)
+        })
+        # Create synthetic data by adding small noise
+        synthetic_dp = real_dp + np.random.normal(0, 0.5, real_dp.shape)
+        dp = DisclosureProtection(real_dp, synthetic_dp)
+        score = dp.score()
+        report = dp.report()
+        self.assertIsInstance(score, float)
+        self.assertIsInstance(report, dict)
+        self.assertIn("threshold", report)
+        self.assertIn("risk_rate", report)
+        self.assertIn("disclosure_protection_score", report)
+
+# -------------------------------
+# Tests for DataProcessor
+# -------------------------------
+class TestDataProcessor(unittest.TestCase):
+    def setUp(self):
+        # Create a DataFrame with different types.
+        self.df = pd.DataFrame({
+            "numeric": np.random.normal(10, 2, 50),
+            "categorical": np.random.choice(["Red", "Green", "Blue"], 50),
+            "boolean": np.random.choice([True, False], 50),
+            "datetime": pd.date_range("2023-01-01", periods=50),
+            "timedelta": pd.to_timedelta(np.random.randint(1, 100, 50), unit="D"),
+            "float": np.random.uniform(0, 1, 50)
+        })
+        self.metadata = {
+            "numeric": "numerical",
+            "categorical": "categorical",
+            "boolean": "boolean",
+            "datetime": "datetime",
+            "timedelta": "timedelta",
+            "float": "numerical"
+        }
+        self.processor = DataProcessor(self.metadata)
+    
+    def test_preprocess_postprocess(self):
+        # Preprocess the data
+        processed = self.processor.preprocess(self.df)
+        self.assertIsInstance(processed, pd.DataFrame)
+        # Check that categorical columns are encoded (i.e. no string values remain)
+        for col, dtype in self.metadata.items():
+            if dtype == "categorical":
+                self.assertTrue(np.issubdtype(processed[col].dtype, np.number) or col not in processed.columns)
+        # Simulate synthetic data as processed copy then postprocess back
+        synthetic_processed = processed.copy()
+        recovered = self.processor.postprocess(synthetic_processed)
+        self.assertIsInstance(recovered, pd.DataFrame)
+        # Check that the recovered DataFrame has the original columns order.
+        self.assertListEqual(list(recovered.columns), list(self.df.columns))
+
+    def test_validate_raises_on_missing_column(self):
+        # Remove one column so that validation should fail.
+        df_missing = self.df.drop(columns=["numeric"])
+        with self.assertRaises(InvalidDataError):
+            self.processor.validate(df_missing)
+
+# -------------------------------
+# Tests for MissingDataHandler
+# -------------------------------
+class TestMissingDataHandler(unittest.TestCase):
+    def setUp(self):
+        # Create a DataFrame with missing values in different types.
+        self.df = pd.DataFrame({
+            "num": [1, 2, np.nan, 4, 5],
+            "cat": ["a", np.nan, "b", "a", "c"],
+            "bool": [True, False, np.nan, True, False],
+            "datetime": pd.to_datetime(["2023-01-01", np.nan, "2023-01-03", "2023-01-04", "2023-01-05"]),
+            "timedelta": pd.to_timedelta([1, 2, np.nan, 4, 5], unit="D")
+        })
+        self.handler = MissingDataHandler()
+
+
+    def test_apply_imputation(self):
+        # First, detect missingness; we won't get perfect detection, but just test that imputation runs.
+        missingness = self.handler.detect_missingness(self.df)
+        imputed = self.handler.apply_imputation(self.df, missingness)
+        # Check that after imputation there are no missing values.
+        self.assertFalse(imputed.isna().any().any())
+
+# -------------------------------
+# Tests for GaussianCopulaMethod
+# -------------------------------
+class TestGaussianCopulaMethod(unittest.TestCase):
+    def setUp(self):
+        # Create a simple DataFrame with numerical and categorical columns.
+        self.df = pd.DataFrame({
+            "numeric": np.random.normal(50, 10, 100),
+            "categorical": np.random.choice(["Red", "Green", "Blue"], 100)
+        })
+        self.metadata = {
+            "numeric": "numerical",
+            "categorical": "categorical"
+        }
+        # For simplicity, we use the DataProcessor to convert data to numeric space.
+        self.processor = DataProcessor(self.metadata)
+        self.processed = self.processor.preprocess(self.df)
+        self.gc = GaussianCopulaMethod(self.metadata)
+        self.gc.fit(self.processed)
+    
+    def test_sample_shape(self):
+        # Use the sample method with a requested number of rows.
+        num_samples = 50
+        synthetic = self.gc.sample(num_samples)
+        self.assertIsInstance(synthetic, pd.DataFrame)
+        self.assertEqual(len(synthetic), num_samples)
+
+    def test_get_learned_distributions(self):
+        # After fitting, learned distributions should be available.
+        distributions = self.gc.get_learned_distributions()
+        self.assertIsInstance(distributions, dict)
+        # Check that keys correspond to columns in metadata.
+        for col in self.metadata.keys():
+            self.assertIn(col, distributions)
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests_processing.py b/tests_processing.py
deleted file mode 100644
index a01f5eb..0000000
--- a/tests_processing.py
+++ /dev/null
@@ -1,44 +0,0 @@
-import unittest
-from synthpop import Synthpop
-import pandas as pd
-import numpy as np
-
-class TestProcessing(unittest.TestCase):
-
-    def test_add_NaN_columns_for_numeric_columns(self):
-        df = pd.DataFrame({'a':[1,2,np.nan], 'b':[1,1,1], 'c':['x','y',None]})
-        spop = Synthpop()
-        dtype_map = {'a':'float','b':'float', 'c':'categorical'}
-        res,dtype_res = spop.pre_preprocess(df,dtype_map,nan_fill=-8)
-
-        self.assertTrue('a_NaN' in res,"Nan column not made")
-        self.assertFalse('b_NaN' in res,"Nan column should not be made if there are no NaNs")
-        self.assertFalse('c_NaN' in res,"Nan column should not be made for categorical columns")
-        self.assertTrue(res['a_NaN'][2])
-        self.assertEqual(res['a'][2], -8)
-        self.assertEqual(dtype_res['a_NaN'],'category')
-        self.assertEqual(spop.map_column_to_NaN_column['a'],'a_NaN')
-    def test_make_visit_sequence_when_one_is_given(self):
-
-        visit_seq = ['x','a','b']
-        spop = Synthpop(visit_sequence=visit_seq)
-        spop.map_column_to_NaN_column = {'a':'a_NaN','c':'c_NaN'}
-
-        spop.include_nan_columns()
-
-        self.assertSequenceEqual(spop.visit_sequence,['x','a_NaN','a','b'])
-
-
-    def test_apply_and_remove_added_NaN_columns(self):
-        df = pd.DataFrame({'a':[1,2,-8],'a_NaN':[False,True,False], 'b':[1,1,1], 'c':['x','y',None]})
-
-        spop = Synthpop()
-        spop.map_column_to_NaN_column = {'a':'a_NaN'}
-
-        res = spop.post_postprocessing(df)
-        self.assertTrue(np.isnan(res['a'][1]), "NaNs should be placed where indicated")
-        self.assertFalse('a_NaN' in res, "indicator columns should be removed")
-
-
-if __name__ == '__main__':
-    unittest.main()
\ No newline at end of file