# Imports

In [21]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from math import sqrt
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
output_notebook()

#  Load CSV files

In [24]:
train_df = pd.read_csv("train.csv")
ideal_df = pd.read_csv("ideal.csv")
test_df = pd.read_csv("test.csv")

# Normalize column names

In [25]:
train_df.columns = train_df.columns.str.lower()
ideal_df.columns = ideal_df.columns.str.lower()
test_df.columns = test_df.columns.str.lower()

display(train_df.head(), ideal_df.head(), test_df.head())

Unnamed: 0,x,y1,y2,y3,y4
0,-20.0,39.778572,-40.07859,-20.214268,-0.324914
1,-19.9,39.604813,-39.784,-20.07095,-0.05882
2,-19.8,40.09907,-40.018845,-19.906782,-0.45183
3,-19.7,40.1511,-39.518402,-19.389118,-0.612044
4,-19.6,39.795662,-39.360065,-19.81589,-0.306076


Unnamed: 0,x,y1,y2,y3,y4,y5,y6,y7,y8,y9,...,y41,y42,y43,y44,y45,y46,y47,y48,y49,y50
0,-20.0,-0.912945,0.408082,9.087055,5.408082,-9.087055,0.912945,-0.839071,-0.850919,0.816164,...,-40.456474,40.20404,2.995732,-0.008333,12.995732,5.298317,-5.298317,-0.186278,0.912945,0.39685
1,-19.9,-0.867644,0.497186,9.132356,5.497186,-9.132356,0.867644,-0.865213,0.168518,0.994372,...,-40.23382,40.04859,2.99072,-0.00834,12.99072,5.293305,-5.293305,-0.21569,0.867644,0.476954
2,-19.8,-0.813674,0.581322,9.186326,5.581322,-9.186326,0.813674,-0.889191,0.612391,1.162644,...,-40.006836,39.89066,2.985682,-0.008347,12.985682,5.288267,-5.288267,-0.236503,0.813674,0.549129
3,-19.7,-0.751573,0.659649,9.248426,5.659649,-9.248426,0.751573,-0.910947,-0.994669,1.319299,...,-39.775787,39.729824,2.980619,-0.008354,12.980619,5.283204,-5.283204,-0.247887,0.751573,0.61284
4,-19.6,-0.681964,0.731386,9.318036,5.731386,-9.318036,0.681964,-0.930426,0.774356,1.462772,...,-39.54098,39.565693,2.97553,-0.008361,12.97553,5.278115,-5.278115,-0.249389,0.681964,0.667902


Unnamed: 0,x,y
0,17.5,34.16104
1,0.3,1.215102
2,-8.7,-16.843908
3,-19.2,-37.17087
4,-11.0,-20.263054


# Create SQLite Database

In [26]:
from sqlalchemy import create_engine

# Create SQLite DB
engine = create_engine("sqlite:///functions.db")

# Save tables
train_df.to_sql("train", engine, if_exists="replace", index=False)
ideal_df.to_sql("ideal", engine, if_exists="replace", index=False)
test_df.to_sql("test", engine, if_exists="replace", index=False)
print(" Data successfully saved into SQLite database.")

 Data successfully saved into SQLite database.


# Object-Oriented Structure

In [27]:
class FunctionMappingError(Exception):
    """Custom exception for failed mapping of test data."""
    pass


class FunctionMapper:
    """Handles loading, analysis, and mapping of functions."""

    def __init__(self, train_df, ideal_df):
        self.train_df = train_df
        self.ideal_df = ideal_df
        self.best_functions = {}
        self.max_dev = {}

    def find_best_ideals(self):
        """Find 4 best ideal functions minimizing least squares error."""
        for col in self.train_df.columns[1:]:
            min_dev = float("inf")
            best_col = None
            for ideal_col in self.ideal_df.columns[1:]:
                mse = np.mean((self.train_df[col] - self.ideal_df[ideal_col]) ** 2)
                if mse < min_dev:
                    min_dev = mse
                    best_col = ideal_col
            self.best_functions[col] = best_col
        return self.best_functions

    def compute_max_deviation(self):
        """Compute max |train - ideal| deviation for each selected pair."""
        deviations = {}
        for train_col, ideal_col in self.best_functions.items():
            merged = pd.merge(
                self.train_df[["x", train_col]],
                self.ideal_df[["x", ideal_col]],
                on="x"
            ).rename(columns={train_col: "y_train", ideal_col: "y_ideal"})
            deviations[ideal_col] = np.max(np.abs(merged["y_train"] - merged["y_ideal"]))
        self.max_dev = deviations
        return self.max_dev


# Initialize mapper
mapper = FunctionMapper(train_df, ideal_df)

# Find 4 best ideal functions
best_functions = mapper.find_best_ideals()
print("Best ideal functions:", best_functions)

# Calculate max deviations
max_dev = mapper.compute_max_deviation()
print("Max deviations:", max_dev)


Best ideal functions: {'y1': 'y42', 'y2': 'y41', 'y3': 'y11', 'y4': 'y48'}
Max deviations: {'y42': 0.4959680000000013, 'y41': 0.49770300000000134, 'y11': 0.4989360000000005, 'y48': 0.49974158999999996}


# Map Test Data

In [28]:
class TestDataMapper(FunctionMapper):
    """Subclass extending FunctionMapper to handle test data mapping."""

    def map_test_data(self, test_df):
        results, skipped = [], []
        for _, row in test_df.iterrows():
            x, y = row["x"], row["y"]
            min_diff = float("inf")
            best_func, best_delta = None, None

            for ideal_col in self.max_dev.keys():
                ideal_row = self.ideal_df.iloc[(self.ideal_df["x"] - x).abs().argsort()[:1]]
                ideal_y = ideal_row[ideal_col].values[0]
                diff = abs(y - ideal_y)

                if diff <= sqrt(2) * self.max_dev[ideal_col] and diff < min_diff:
                    min_diff = diff
                    best_func = ideal_col
                    best_delta = diff

            if best_func:
                results.append([x, y, best_delta, best_func])
            else:
                skipped.append((x, y))

        if skipped:
            print(f"{len(skipped)} test points could not be mapped.")

        if not results:
            raise FunctionMappingError("No test points were mapped successfully.")

        return pd.DataFrame(results, columns=["x", "y", "delta_y", "ideal_func"])


# Perform test data mapping
test_mapper = TestDataMapper(train_df, ideal_df)
test_mapper.best_functions = best_functions
test_mapper.max_dev = max_dev

mapped_df = test_mapper.map_test_data(test_df.copy())
display(mapped_df.head())


52 test points could not be mapped.


Unnamed: 0,x,y,delta_y,ideal_func
0,17.5,34.16104,0.351148,y41
1,0.3,1.215102,0.467342,y41
2,0.8,1.426456,0.532222,y41
3,14.0,-0.066506,0.134233,y48
4,-15.0,-0.205363,0.452371,y48


# Save Mapped Results to Database

In [29]:
mapped_df.to_sql("mapped_test", engine, if_exists="replace", index=False)
print(" Mapped test data saved to SQLite database (table: mapped_test).")


 Mapped test data saved to SQLite database (table: mapped_test).


# Visualization with Bokeh

In [30]:
p = figure(title="Train vs Ideal Functions",
           x_axis_label="X",
           y_axis_label="Y",
           width=800, height=500)

# Plot each train + corresponding ideal
for train_col, ideal_col in best_functions.items():
    t_col, i_col = train_col.lower(), ideal_col.lower()

    if t_col not in train_df.columns or i_col not in ideal_df.columns:
        continue

    p.line(train_df["x"], train_df[t_col],
           legend_label=f"Train: {train_col}", line_width=2)
    p.line(ideal_df["x"], ideal_df[i_col],
           legend_label=f"Ideal: {ideal_col}", line_dash="dashed")

show(p)

# Visualization of Mapped Test Points

In [31]:
p2 = figure(title="Mapped Test Data",
            x_axis_label="X",
            y_axis_label="Y",
            width=800, height=500)

# Plot test data grouped by ideal function
for func in mapped_df["ideal_func"].unique():
    subset = mapped_df[mapped_df["ideal_func"] == func]
    p2.circle(subset["x"], subset["y"], size=6, legend_label=f"Mapped to {func}")

show(p2)



# Simple Unit Tests

In [32]:
import unittest

class TestFunctionMapper(unittest.TestCase):
    def test_best_functions_count(self):
        self.assertEqual(len(best_functions), 4)
    def test_max_dev_not_empty(self):
        self.assertTrue(len(max_dev) > 0)
    def test_mapped_df_not_empty(self):
        self.assertTrue(len(mapped_df) > 0)

unittest.main(argv=[''], verbosity=2, exit=False)


test_best_functions_count (__main__.TestFunctionMapper.test_best_functions_count) ... ok
test_mapped_df_not_empty (__main__.TestFunctionMapper.test_mapped_df_not_empty) ... ok
test_max_dev_not_empty (__main__.TestFunctionMapper.test_max_dev_not_empty) ... ok
test_find_best_ideals (__main__.TestFunctionMapping.test_find_best_ideals) ... ok
test_mapped_results_not_empty (__main__.TestFunctionMapping.test_mapped_results_not_empty) ... ok

----------------------------------------------------------------------
Ran 5 tests in 0.009s

OK


<unittest.main.TestProgram at 0x79749e6e5a90>