In [1]:
# Install missing libraries (safe to run repeatedly)
import importlib
import subprocess
import sys

def _ensure(pkg: str, import_name: str | None = None) -> None:
    """Install a package via pip only if its import is missing."""
    name = import_name or pkg
    try:
        importlib.import_module(name)
    except ImportError:
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", pkg])

for _pkg, _imp in [
    ("pandas", "pandas"),
    ("numpy", "numpy"),
    ("sqlalchemy", "sqlalchemy"),
    ("bokeh", "bokeh"),
    ("pytest", "pytest"),  # alternative is unittest; we still install pytest for completeness
]:
    _ensure(_pkg, _imp)

print("Dependencies are ready.")


Dependencies are ready.


In [6]:
# Imports & global configuration
import math
import os
from dataclasses import dataclass
from typing import List, Tuple

import numpy as np
import pandas as pd
from sqlalchemy import Column, Float, Integer, create_engine, inspect
from sqlalchemy.orm import declarative_base, sessionmaker

from bokeh.io import output_notebook, show
from bokeh.layouts import column
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.palettes import Category10
from bokeh.plotting import figure

from IPython.display import display

# Input data paths (given)
TRAIN_PATH = "/content/train.csv"
IDEAL_PATH = "/content/ideal.csv"
TEST_PATH  = "/content/test.csv"

# Output DB path
DB_PATH = "/content/assignment.db"
os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)

output_notebook()


In [7]:
# OOP implementation (with inheritance) + custom exceptions
class DataValidationError(Exception):
    """Raised when dataset structure or values do not meet expectations."""


class MappingRuleError(Exception):
    """Raised when mapping rule prerequisites are violated."""


class DataHandler:
    """Base class for dataset loading/validation."""

    def load(self, path: str) -> pd.DataFrame:
        raise NotImplementedError


class CSVDataHandler(DataHandler):
    """CSV loader that standardizes schemas and validates alignment."""

    def load(self, path: str) -> pd.DataFrame:
        if not os.path.exists(path):
            raise DataValidationError(f"File not found: {path}")
        df = pd.read_csv(path)
        if df.empty:
            raise DataValidationError(f"Empty dataset: {path}")
        df.columns = [str(c).strip() for c in df.columns]
        return df

    @staticmethod
    def _detect_x_col(df: pd.DataFrame) -> str:
        cols = list(df.columns)
        lower_map = {c.lower(): c for c in cols}
        if "x" in lower_map:
            return lower_map["x"]
        x_like = [c for c in cols if c.lower().startswith("x")]
        if x_like:
            return x_like[0]
        return cols[0]

    @staticmethod
    def _sorted_y_cols(cols: List[str]) -> List[str]:
        def key(c: str):
            cl = c.lower()
            digits = "".join(ch for ch in cl if ch.isdigit())
            return (0, int(digits)) if digits else (1, cl)
        return sorted(cols, key=key)

    def standardize_training(self, df: pd.DataFrame) -> pd.DataFrame:
        """Return training DF with columns: x, y1..y4."""
        x_col = self._detect_x_col(df)
        out = df.rename(columns={x_col: "x"}).copy()

        y_candidates = [c for c in out.columns if c != "x"]
        if not y_candidates:
            raise DataValidationError("Training dataset has no y columns.")

        y_like = [c for c in y_candidates if c.lower().startswith("y")]
        y_cols = self._sorted_y_cols(y_like if len(y_like) >= 4 else y_candidates)
        if len(y_cols) < 4:
            raise DataValidationError(f"Training dataset must have 4 y columns; found {len(y_cols)}.")
        y_cols = y_cols[:4]

        rename_map = {y_cols[i]: f"y{i+1}" for i in range(4)}
        out = out[["x"] + y_cols].rename(columns=rename_map)

        out["x"] = pd.to_numeric(out["x"], errors="coerce")
        for c in ["y1", "y2", "y3", "y4"]:
            out[c] = pd.to_numeric(out[c], errors="coerce")

        if out.isna().any().any():
            raise DataValidationError("Training dataset contains non-numeric or missing values after standardization.")
        if out["x"].duplicated().any():
            raise DataValidationError("Training dataset has duplicate x values.")
        return out.sort_values("x").reset_index(drop=True)

    def standardize_ideal(self, df: pd.DataFrame) -> pd.DataFrame:
        """Return ideal DF with columns: x, y1..y50."""
        x_col = self._detect_x_col(df)
        out = df.rename(columns={x_col: "x"}).copy()

        y_candidates = [c for c in out.columns if c != "x"]
        if len(y_candidates) < 50:
            raise DataValidationError(f"Ideal dataset must have 50 y columns; found {len(y_candidates)}.")

        y_like = [c for c in y_candidates if c.lower().startswith("y")]
        y_cols = self._sorted_y_cols(y_like if len(y_like) >= 50 else y_candidates)[:50]

        rename_map = {y_cols[i]: f"y{i+1}" for i in range(50)}
        out = out[["x"] + y_cols].rename(columns=rename_map)

        out["x"] = pd.to_numeric(out["x"], errors="coerce")
        for i in range(1, 51):
            out[f"y{i}"] = pd.to_numeric(out[f"y{i}"], errors="coerce")

        if out.isna().any().any():
            raise DataValidationError("Ideal dataset contains non-numeric or missing values after standardization.")
        if out["x"].duplicated().any():
            raise DataValidationError("Ideal dataset has duplicate x values.")
        return out.sort_values("x").reset_index(drop=True)

    def standardize_test(self, df: pd.DataFrame) -> pd.DataFrame:
        """Return test DF with columns: x, y."""
        x_col = self._detect_x_col(df)
        out = df.rename(columns={x_col: "x"}).copy()

        y_candidates = [c for c in out.columns if c != "x"]
        if len(y_candidates) < 1:
            raise DataValidationError("Test dataset must contain a y column.")
        y_col = y_candidates[0]

        out = out[["x", y_col]].rename(columns={y_col: "y"})
        out["x"] = pd.to_numeric(out["x"], errors="coerce")
        out["y"] = pd.to_numeric(out["y"], errors="coerce")

        if out.isna().any().any():
            raise DataValidationError("Test dataset contains non-numeric or missing values after standardization.")
        return out.sort_values("x").reset_index(drop=True)

    @staticmethod
    def validate_x_alignment(train_df: pd.DataFrame, ideal_df: pd.DataFrame) -> None:
        """Ensure every training x exists in ideal x."""
        train_x = set(train_df["x"].tolist())
        ideal_x = set(ideal_df["x"].tolist())
        missing = train_x - ideal_x
        if missing:
            raise DataValidationError(
                f"Ideal dataset is missing {len(missing)} x values required by training data (example: {sorted(list(missing))[:5]})."
            )

def sse(a: np.ndarray, b: np.ndarray) -> float:
    """Sum of squared errors between two equal-length arrays."""
    if a.shape != b.shape:
        raise ValueError("SSE inputs must have the same shape.")
    d = a - b
    return float(np.sum(d * d))

class FunctionSelector:
    """Base class for selecting ideal functions."""

    def select(self, train_df: pd.DataFrame, ideal_df: pd.DataFrame) -> pd.DataFrame:
        raise NotImplementedError

@dataclass(frozen=True)
class SelectionResult:
    training_col: str
    ideal_func_no: int
    ideal_col: str
    sse: float
    max_train_dev: float

class LeastSquaresSelector(FunctionSelector):
    """Least-squares (SSE) selector: choose best ideal function per training function."""

    def select(self, train_df: pd.DataFrame, ideal_df: pd.DataFrame) -> pd.DataFrame:
        train_cols = [f"y{i}" for i in range(1, 5)]

        # Avoid column overlap by renaming ideal y-columns temporarily
        ideal_renamed = ideal_df.rename(columns={f"y{i}": f"ideal_y{i}" for i in range(1, 51)})
        aligned = train_df.merge(ideal_renamed, on="x", how="left")

        ideal_cols_renamed = [f"ideal_y{i}" for i in range(1, 51)]
        if aligned[ideal_cols_renamed].isna().any().any():
            raise DataValidationError("Alignment produced missing ideal values at some training x positions.")

        results: List[SelectionResult] = []
        for tcol in train_cols:
            y_train = aligned[tcol].to_numpy(dtype=float)
            best_sse = math.inf
            best_j = None
            best_max_dev = None

            for j in range(1, 51):
                icol = f"ideal_y{j}"
                y_ideal = aligned[icol].to_numpy(dtype=float)
                val = sse(y_train, y_ideal)
                if val < best_sse:
                    best_sse = val
                    best_j = j
                    best_max_dev = float(np.max(np.abs(y_train - y_ideal)))

            if best_j is None or best_max_dev is None:
                raise DataValidationError(f"Failed to select ideal function for {tcol}.")

            results.append(
                SelectionResult(
                    training_col=tcol,
                    ideal_func_no=int(best_j),
                    ideal_col=f"y{best_j}",
                    sse=float(best_sse),
                    max_train_dev=float(best_max_dev),
                )
            )

        return pd.DataFrame([r.__dict__ for r in results]).sort_values("training_col").reset_index(drop=True)

class DatabaseManager:
    """Base class for database operations."""

    def create_tables(self) -> None:
        raise NotImplementedError

    def insert_training(self, train_df: pd.DataFrame) -> None:
        raise NotImplementedError

    def insert_ideal(self, ideal_df: pd.DataFrame) -> None:
        raise NotImplementedError

    def insert_mappings(self, mapped_df: pd.DataFrame) -> None:
        raise NotImplementedError

class SQLiteDatabaseManager(DatabaseManager):
    """SQLite database manager using SQLAlchemy ORM."""

    def __init__(self, db_path: str):
        self.db_path = db_path
        self.engine = create_engine(f"sqlite:///{db_path}", future=True)
        self.Session = sessionmaker(bind=self.engine, future=True)
        self.Base = declarative_base()

        # Dynamically build ORM models with exact requested column sets
        self.Training = self._make_training_model()
        self.Ideal = self._make_ideal_model()
        self.Mapping = self._make_mapping_model()

    def _make_training_model(self):
        attrs = {
            "__tablename__": "training_data",
            "x": Column(Float, primary_key=True),
            "y1": Column(Float),
            "y2": Column(Float),
            "y3": Column(Float),
            "y4": Column(Float),
        }
        return type("TrainingData", (self.Base,), attrs)

    def _make_ideal_model(self):
        attrs = {"__tablename__": "ideal_functions", "x": Column(Float, primary_key=True)}
        for i in range(1, 51):
            attrs[f"y{i}"] = Column(Float)
        return type("IdealFunctions", (self.Base,), attrs)

    def _make_mapping_model(self):
        attrs = {
            "__tablename__": "test_mapping",
            "x": Column(Float, primary_key=True),
            "y": Column(Float),
            "delta_y": Column(Float),
            "ideal_func_no": Column(Integer, primary_key=True),
        }
        return type("TestMapping", (self.Base,), attrs)

    def create_tables(self) -> None:
        # Remove existing db for reproducibility
        if os.path.exists(self.db_path):
            os.remove(self.db_path)
        self.engine = create_engine(f"sqlite:///{self.db_path}", future=True)
        self.Session.configure(bind=self.engine)
        self.Base.metadata.create_all(self.engine)

    def insert_training(self, train_df: pd.DataFrame) -> None:
        recs = train_df.to_dict("records")
        with self.Session() as session:
            session.bulk_insert_mappings(self.Training, recs)
            session.commit()

    def insert_ideal(self, ideal_df: pd.DataFrame) -> None:
        recs = ideal_df.to_dict("records")
        with self.Session() as session:
            session.bulk_insert_mappings(self.Ideal, recs)
            session.commit()

    def insert_mappings(self, mapped_df: pd.DataFrame) -> None:
        recs = mapped_df.to_dict("records")
        with self.Session() as session:
            session.bulk_insert_mappings(self.Mapping, recs)
            session.commit()

    def sample_table(self, table_name: str, n: int = 5) -> pd.DataFrame:
        return pd.read_sql_query(f"SELECT * FROM {table_name} LIMIT {n};", self.engine)

class TestPointMapper:
    """Maps test points to selected ideal functions using the √2 rule."""

    @staticmethod
    def qualifies(dev_test: float, max_train_dev: float) -> bool:
        """Return True iff |y_test - y_ideal| <= max_train_dev * sqrt(2)."""
        return dev_test <= max_train_dev * math.sqrt(2.0)

    def map_points(
        self,
        test_df: pd.DataFrame,
        ideal_df: pd.DataFrame,
        selection_df: pd.DataFrame,
        insert_only_mapped: bool = True,
    ) -> Tuple[pd.DataFrame, pd.DataFrame]:
        """Return (mapped_df, full_results_df) with mapping decisions."""
        if selection_df.shape[0] != 4:
            raise MappingRuleError("Selection must contain exactly 4 rows (for y1..y4).")

        selected_ideal_cols = selection_df["ideal_col"].tolist()
        selected_func_nos = selection_df["ideal_func_no"].tolist()
        thresholds = (selection_df["max_train_dev"].to_numpy(dtype=float) * math.sqrt(2.0)).reshape(1, -1)

        ideal_subset = ideal_df[["x"] + selected_ideal_cols].copy()
        merged = test_df.merge(ideal_subset, on="x", how="left")

        ideal_vals = merged[selected_ideal_cols].to_numpy(dtype=float)  # NaN for missing x
        y_test = merged["y"].to_numpy(dtype=float).reshape(-1, 1)
        diffs = np.abs(y_test - ideal_vals)
        qualifies_mask = diffs <= thresholds  # NaN comparisons -> False
        masked = np.where(qualifies_mask, diffs, np.inf)

        best_idx = np.argmin(masked, axis=1)
        best_diff = masked[np.arange(masked.shape[0]), best_idx]
        is_mapped = np.isfinite(best_diff)

        assigned_no = np.array([selected_func_nos[i] for i in best_idx], dtype=int)
        assigned_no = np.where(is_mapped, assigned_no, -1)

        results = merged[["x", "y"]].copy()
        results["delta_y"] = np.where(is_mapped, best_diff, np.nan)
        results["ideal_func_no"] = np.where(is_mapped, assigned_no, np.nan)
        results["mapped"] = is_mapped

        mapped_df = results.loc[results["mapped"], ["x", "y", "delta_y", "ideal_func_no"]].copy()
        mapped_df["ideal_func_no"] = mapped_df["ideal_func_no"].astype(int)

        if insert_only_mapped:
            return mapped_df.reset_index(drop=True), results.reset_index(drop=True)

        # Alternative (not used by default): include unmapped with NULL ideal_func_no/delta_y
        return results[["x", "y", "delta_y", "ideal_func_no"]].reset_index(drop=True), results.reset_index(drop=True)


In [8]:
# Load datasets
handler = CSVDataHandler()

raw_train = handler.load(TRAIN_PATH)
raw_ideal = handler.load(IDEAL_PATH)
raw_test  = handler.load(TEST_PATH)

train_df = handler.standardize_training(raw_train)
ideal_df = handler.standardize_ideal(raw_ideal)
test_df  = handler.standardize_test(raw_test)

handler.validate_x_alignment(train_df, ideal_df)

print("Training shape:", train_df.shape)
print("Ideal shape:   ", ideal_df.shape)
print("Test shape:    ", test_df.shape)

train_df.head()


Training shape: (400, 5)
Ideal shape:    (400, 51)
Test shape:     (100, 2)


Unnamed: 0,x,y1,y2,y3,y4
0,-20.0,39.778572,-40.07859,-20.214268,-0.324914
1,-19.9,39.604813,-39.784,-20.07095,-0.05882
2,-19.8,40.09907,-40.018845,-19.906782,-0.45183
3,-19.7,40.1511,-39.518402,-19.389118,-0.612044
4,-19.6,39.795662,-39.360065,-19.81589,-0.306076


In [9]:
# Create DB + insert tables
db = SQLiteDatabaseManager(DB_PATH)
db.create_tables()

db.insert_training(train_df)
db.insert_ideal(ideal_df)

print("Database created:", DB_PATH)
print("Tables:", inspect(db.engine).get_table_names())

print("\n--- Sample: training_data ---")
display(db.sample_table("training_data", 5))

print("\n--- Sample: ideal_functions ---")
display(db.sample_table("ideal_functions", 5))


Database created: /content/assignment.db
Tables: ['ideal_functions', 'test_mapping', 'training_data']

--- Sample: training_data ---


Unnamed: 0,x,y1,y2,y3,y4
0,-20.0,39.778572,-40.07859,-20.214268,-0.324914
1,-19.9,39.604813,-39.784,-20.07095,-0.05882
2,-19.8,40.09907,-40.018845,-19.906782,-0.45183
3,-19.7,40.1511,-39.518402,-19.389118,-0.612044
4,-19.6,39.795662,-39.360065,-19.81589,-0.306076



--- Sample: ideal_functions ---


Unnamed: 0,x,y1,y2,y3,y4,y5,y6,y7,y8,y9,...,y41,y42,y43,y44,y45,y46,y47,y48,y49,y50
0,-20.0,-0.912945,0.408082,9.087055,5.408082,-9.087055,0.912945,-0.839071,-0.850919,0.816164,...,-40.456474,40.20404,2.995732,-0.008333,12.995732,5.298317,-5.298317,-0.186278,0.912945,0.39685
1,-19.9,-0.867644,0.497186,9.132356,5.497186,-9.132356,0.867644,-0.865213,0.168518,0.994372,...,-40.23382,40.04859,2.99072,-0.00834,12.99072,5.293305,-5.293305,-0.21569,0.867644,0.476954
2,-19.8,-0.813674,0.581322,9.186326,5.581322,-9.186326,0.813674,-0.889191,0.612391,1.162644,...,-40.006836,39.89066,2.985682,-0.008347,12.985682,5.288267,-5.288267,-0.236503,0.813674,0.549129
3,-19.7,-0.751573,0.659649,9.248426,5.659649,-9.248426,0.751573,-0.910947,-0.994669,1.319299,...,-39.775787,39.729824,2.980619,-0.008354,12.980619,5.283204,-5.283204,-0.247887,0.751573,0.61284
4,-19.6,-0.681964,0.731386,9.318036,5.731386,-9.318036,0.681964,-0.930426,0.774356,1.462772,...,-39.54098,39.565693,2.97553,-0.008361,12.97553,5.278115,-5.278115,-0.249389,0.681964,0.667902


In [10]:
selector = LeastSquaresSelector()
selection_summary = selector.select(train_df, ideal_df)

summary_display = selection_summary.copy()
summary_display["ideal_func_no"] = summary_display["ideal_func_no"].astype(int)
summary_display["SSE"] = summary_display["sse"]
summary_display = summary_display[["training_col", "ideal_func_no", "SSE", "max_train_dev"]]

print("Least-squares selection summary:")
display(summary_display)

selected_ideal_cols = selection_summary["ideal_col"].tolist()
selected_func_nos = selection_summary["ideal_func_no"].tolist()


Least-squares selection summary:


Unnamed: 0,training_col,ideal_func_no,SSE,max_train_dev
0,y1,42,34.246594,0.495968
1,y2,41,35.601847,0.497703
2,y3,11,29.86183,0.498936
3,y4,48,31.963434,0.499742


In [12]:
mapper = TestPointMapper()
mapped_df, test_results_df = mapper.map_points(test_df, ideal_df, selection_summary, insert_only_mapped=True)


mapped_df_dedup = mapped_df.drop_duplicates(subset=['x', 'ideal_func_no']).copy()

# Persist mapped points
db.insert_mappings(mapped_df_dedup)

total_test = len(test_df)
total_mapped = len(mapped_df)
counts_by_ideal = mapped_df_dedup["ideal_func_no"].value_counts().sort_index()

print(f"Total test points:   {total_test}")
print(f"Total mapped points (before deduplication): {total_mapped}")
print(f"Total unique mapped points (after deduplication for DB): {len(mapped_df_dedup)}")
print("\nMapped points per ideal function (after deduplication):")
display(counts_by_ideal.rename("count").to_frame())

print("\n--- Sample: test_mapping ---")
display(db.sample_table("test_mapping", 10))

Total test points:   100
Total mapped points (before deduplication): 48
Total unique mapped points (after deduplication for DB): 46

Mapped points per ideal function (after deduplication):


Unnamed: 0_level_0,count
ideal_func_no,Unnamed: 1_level_1
11,11
41,12
42,12
48,11



--- Sample: test_mapping ---


Unnamed: 0,x,y,delta_y,ideal_func_no
0,-19.8,-19.915014,0.115014,11
1,-19.3,-38.458572,0.359112,41
2,-19.1,-38.155376,0.168541,41
3,-15.0,-0.205363,0.452371,48
4,-12.5,25.675346,0.176446,42
5,-12.0,0.670648,0.444253,48
6,-11.8,24.606413,0.646196,42
7,-10.0,18.979424,0.601039,42
8,-9.5,-9.652251,0.152251,11
9,-8.8,16.571745,0.622709,42


In [14]:
# --- Plot 1: Training vs chosen ideal functions ---
p1 = figure(title="Training functions vs selected ideal functions",
            x_axis_label="x", y_axis_label="y", width=900, height=350,
            tools="pan,wheel_zoom,box_zoom,reset,save")

for row in selection_summary.itertuples(index=False):
    tcol = row.training_col
    func_no = int(row.ideal_func_no)
    icol = f"y{func_no}"

    p1.line(train_df["x"], train_df[tcol], line_width=2, legend_label=f"train {tcol}")
    p1.line(ideal_df["x"], ideal_df[icol], line_dash="dashed", line_width=2,
            legend_label=f"ideal y{func_no} (for {tcol})")

p1.legend.click_policy = "hide"
p1.legend.location = "top_left"

# --- Plot 2: Test points mapped vs unmapped (hover includes deviation when mapped) ---
mapped = test_results_df[test_results_df["mapped"]].copy()
unmapped = test_results_df[~test_results_df["mapped"]].copy()

src_m = ColumnDataSource(mapped)
src_u = ColumnDataSource(unmapped)

p2 = figure(title="Test points — mapped vs unmapped",
            x_axis_label="x", y_axis_label="y", width=900, height=350,
            tools="pan,wheel_zoom,box_zoom,reset,save")

r_m = p2.scatter("x", "y", source=src_m, size=7, legend_label="mapped")
p2.scatter("x", "y", source=src_u, size=7, marker="x", legend_label="unmapped")

hover2 = HoverTool(renderers=[r_m], tooltips=[
    ("x", "@x{0.000}"),
    ("y_test", "@y{0.000}"),
    ("ideal_func_no", "@ideal_func_no{0}"),
    ("delta_y (abs)", "@delta_y{0.00000}"),
])
p2.add_tools(hover2)
p2.legend.location = "top_left"

# --- Plot 3: Mapped points colored by ideal function; size encodes deviation ---
palette = Category10[10]
factors = [str(n) for n in sorted(mapped_df["ideal_func_no"].unique().tolist())]

plot3_df = mapped_df.copy()
plot3_df["ideal_str"] = plot3_df["ideal_func_no"].astype(str)

# size encoding for deviation
max_d = float(plot3_df["delta_y"].max()) if len(plot3_df) else 0.0
if max_d > 0:
    plot3_df["size"] = 6 + 12 * (plot3_df["delta_y"] / max_d)
else:
    plot3_df["size"] = 8

p3 = figure(title="Mapped test points grouped by assigned ideal function",
            x_axis_label="x", y_axis_label="y", width=900, height=350,
            tools="pan,wheel_zoom,box_zoom,reset,save")

renderers = []
for i, f in enumerate(factors):
    sub = plot3_df[plot3_df["ideal_str"] == f]
    src_sub = ColumnDataSource(sub)
    rend = p3.scatter("x", "y", size="size", source=src_sub, legend_label=f"ideal {f}")
    renderers.append(rend)

hover3 = HoverTool(tooltips=[
    ("x", "@x{0.000}"),
    ("y_test", "@y{0.000}"),
    ("ideal_func_no", "@ideal_func_no{0}"),
    ("delta_y (abs)", "@delta_y{0.00000}"),
])
p3.add_tools(hover3)
p3.legend.location = "top_left"
p3.legend.click_policy = "hide"

show(column(p1, p2, p3))


In [15]:
import unittest

class TestLeastSquaresIdealMapping(unittest.TestCase):
    def test_selection_count(self):
        self.assertEqual(len(selection_summary), 4)
        self.assertTrue(set(selection_summary["training_col"]) == {"y1", "y2", "y3", "y4"})
        self.assertTrue(selection_summary["ideal_func_no"].between(1, 50).all())

    def test_sse_synthetic(self):
        a = np.array([1.0, 2.0, 3.0])
        b = np.array([1.0, 2.0, 3.0])
        c = np.array([0.0, 0.0, 0.0])
        self.assertAlmostEqual(sse(a, b), 0.0, places=12)
        self.assertAlmostEqual(sse(a, c), 1.0**2 + 2.0**2 + 3.0**2, places=12)

    def test_mapping_threshold(self):
        max_train_dev = 1.0
        self.assertTrue(TestPointMapper.qualifies(1.0, max_train_dev))
        self.assertTrue(TestPointMapper.qualifies(1.4142135, max_train_dev))  # ~= sqrt(2)
        self.assertFalse(TestPointMapper.qualifies(1.5, max_train_dev))

    def test_db_schema(self):
        eng = create_engine(f"sqlite:///{DB_PATH}", future=True)
        insp = inspect(eng)
        tables = set(insp.get_table_names())
        self.assertTrue({"training_data", "ideal_functions", "test_mapping"}.issubset(tables))

        train_cols = [c["name"] for c in insp.get_columns("training_data")]
        self.assertEqual(train_cols, ["x", "y1", "y2", "y3", "y4"])

        ideal_cols = [c["name"] for c in insp.get_columns("ideal_functions")]
        self.assertEqual(ideal_cols[0], "x")
        self.assertEqual(len(ideal_cols), 51)
        self.assertEqual(ideal_cols[1], "y1")
        self.assertEqual(ideal_cols[-1], "y50")

        map_cols = {c["name"] for c in insp.get_columns("test_mapping")}
        self.assertEqual(map_cols, {"x", "y", "delta_y", "ideal_func_no"})

suite = unittest.defaultTestLoader.loadTestsFromTestCase(TestLeastSquaresIdealMapping)
runner = unittest.TextTestRunner(verbosity=2)
result = runner.run(suite)

if not result.wasSuccessful():
    raise RuntimeError("Unit tests failed.")


test_db_schema (__main__.TestLeastSquaresIdealMapping.test_db_schema) ... ok
test_mapping_threshold (__main__.TestLeastSquaresIdealMapping.test_mapping_threshold) ... ok
test_selection_count (__main__.TestLeastSquaresIdealMapping.test_selection_count) ... ok
test_sse_synthetic (__main__.TestLeastSquaresIdealMapping.test_sse_synthetic) ... ok

----------------------------------------------------------------------
Ran 4 tests in 0.013s

OK


## Git commands

```bash
# Clone the repository and check out the develop branch
git clone <REPO_URL>
cd <REPO_DIRECTORY>
git checkout develop

# After adding a new function/file, commit and push changes to develop
git status
git add .
git commit -m "Add new function for ideal mapping"
git push origin develop
```
