In [1]:
# importing the libraries Pandas,Numpy and Bokeh
import pandas as pd
import numpy as np
from math import sqrt
from sqlalchemy import create_engine, Column, Integer, Float, String
from sqlalchemy.orm import declarative_base
from sqlalchemy.orm import sessionmaker
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource
from bokeh.layouts import column
import math
output_notebook()

# SQLAlchemy setup
Base = declarative_base()

In [2]:
class TrainingData(Base):
    __tablename__ = 'training_data'
    id = Column(Integer, primary_key=True)
    x_value = Column(Float)
    y1 = Column(Float)
    y2 = Column(Float)
    y3 = Column(Float)
    y4 = Column(Float)

class IdealFunction(Base):
    __tablename__ = 'ideal_functions'
    id = Column(Integer, primary_key=True)
    x_value = Column(Float)
    y1 = Column(Float)
    y2 = Column(Float)
    y3 = Column(Float)
    y4 = Column(Float)

class TestResult(Base):
    __tablename__ = 'test_results'
    id = Column(Integer, primary_key=True)
    x_value = Column(Float)
    y_value = Column(Float)
    delta_y = Column(Float)
    ideal_function_number = Column(Integer)


In [5]:
# for deleting the existing db file if its present in the path.
db_file = Path("function_matching.db")
if db_file.exists():
    db_file.unlink()
    print("Corrupted database deleted.")
else:
    print("Database file not found.")
# for creating the function matching database file which contains the models from training data and ideal data.
from sqlalchemy import create_engine
from sqlalchemy.orm import declarative_base, sessionmaker

Base = declarative_base()

engine = create_engine('sqlite:///function_matching.db') # function_matching.db is created.
Base.metadata.create_all(engine) # this creates the tables in database
Session = sessionmaker(bind=engine)
session = Session()
print("✅ New database created successfully.")

Corrupted database deleted.
✅ New database created successfully.


In [4]:
# for creating the function matching database file which contains the models from training data and ideal data.
from sqlalchemy import create_engine
from sqlalchemy.orm import declarative_base, sessionmaker

Base = declarative_base()

engine = create_engine('sqlite:///function_matching.db') # function_matching.db is created.
Base.metadata.create_all(engine) # this creates the tables in database
Session = sessionmaker(bind=engine)
session = Session()
print("✅ New database created successfully.")


✅ New database created successfully.


In [6]:
#loading the  CSV Files
train_df = pd.read_csv('train.csv')
ideal_df = pd.read_csv('ideal.csv')
test_df = pd.read_csv('test.csv')


In [7]:
def least_square_deviation(y_true, y_pred):
    return np.sum((y_true - y_pred) ** 2)
# training the ideal datasets for the best matches
def find_best_ideal_matches(train_df, ideal_df):
    best_matches = {}
    for y_col in ['Y1', 'Y2', 'Y3', 'Y4']:
        min_dev = float('inf')
        best_func = None
        for ideal_col in ideal_df.columns[1:]:  # skip X
            dev = least_square_deviation(train_df[y_col], ideal_df[ideal_col])
            if dev < min_dev:
                min_dev = dev
                best_func = ideal_col
        best_matches[y_col] = best_func # stroing the best matches
    return best_matches


In [7]:
# The function map_test_data trys to match test data points to the best-fitting ideal functions and returns a list of results and the index of the matching ideal functions.
def map_test_data(test_df, ideal_df, best_matches):
    results = []
    for _, row in test_df.iterrows():
        x_val, y_val = row['X'], row['Y']
        matched = False
        for idx, (train_col, ideal_col) in enumerate(best_matches.items(), 1):
            ideal_row = ideal_df[ideal_df['X'] == x_val]
            if not ideal_row.empty:
                ideal_y = ideal_row[ideal_col].values[0]
                delta = abs(y_val - ideal_y)
                max_dev = np.max(np.abs(train_df[train_col] - ideal_df[ideal_col]))
                if delta <= max_dev * sqrt(2):
                    results.append({
                        'x_value': x_val,
                        'y_value': y_val,
                        'delta_y': delta,
                        'ideal_function_number': idx
                    })
                    matched = True
                    break
        if not matched:
            # storing all the matches.
            results.append({
                'x_value': x_val,
                'y_value': y_val,
                'delta_y': None,
                'ideal_function_number': None
            })
    return results


In [10]:
# save all the results to function matching.db file
def save_results(results):
    for r in results:
        session.add(TestResult(**r))
    session.commit()
    
# plot all the results(train_df, ideal_df, results and best matches).
def plot_results(train_df, ideal_df, results, best_matches):
    p = figure(title="Function Matching", x_axis_label='X', y_axis_label='Y')
    p.scatter(train_df['X'], train_df['Y1'], color='blue', size=6, legend_label='Training Y1')

    # ploting the ideal function results.
    ideal_col = best_matches['Y1']
    p.line(ideal_df['X'], ideal_df[ideal_col], color='green', line_width=2, legend_label=f'Ideal {ideal_col}')

    import pandas as pd
    results_df = pd.DataFrame(results)
    mapped = results_df[results_df['ideal_function_number'] == 1] # ploting the test data where ideal function number==1
    p.square(mapped['x_value'], mapped['y_value'], color='red', size=6, legend_label='Test Mapped')

    p.legend.location = 'top_left'
    show(p)


In [13]:
train_df = pd.DataFrame({
    'X': [1, 2, 3, 4, 5],
    'Y1': [2, 4, 6, 8, 10],
    'Y2': [1, 3, 5, 7, 9],
    'Y3': [2, 2, 2, 2, 2],
    'Y4': [5, 4, 3, 2, 1]
})

ideal_df = pd.DataFrame({
    'X': [1, 2, 3, 4, 5],
    'Y1': [2.1, 4.1, 6.1, 8.1, 10.1],
    'Y2': [0.9, 3.1, 5.2, 7.1, 9.1],
    'Y3': [2, 2, 2, 2, 2],
    'Y4': [5, 4, 3, 2, 1]
})

results = [
    {'x_value': 1, 'y_value': 2, 'delta_y': 0.1, 'ideal_function_number': 1},
    {'x_value': 2, 'y_value': 4, 'delta_y': 0.1, 'ideal_function_number': 1},
    {'x_value': 3, 'y_value': 5, 'delta_y': 0.1, 'ideal_function_number': 2}
]

best_matches = {'Y1': 'Y1', 'Y2': 'Y2', 'Y3': 'Y3', 'Y4': 'Y4'}


In [14]:
import pandas as pd
from bokeh.plotting import figure, show, output_notebook
from bokeh.layouts import column
from bokeh.palettes import Category10

output_notebook()  # This makes sure plots display inline


def plot_all_results(train_df, ideal_df, results, best_matches):
    plots = []
    palette = Category10[4]  # Up to 4 colors for 4 training functions

    results_df = pd.DataFrame(results)

    for idx, train_col in enumerate(['Y1', 'Y2', 'Y3', 'Y4']):
        p = figure(
            title=f"Function Matching: {train_col}",
            x_axis_label='X',
            y_axis_label='Y',
            width=800,
            height=400
        )

        # Training data
        p.scatter(
            train_df['X'], train_df[train_col],
            color=palette[idx],
            size=6,
            legend_label=f"Training {train_col}"
        )

        # Best ideal match
        ideal_col = best_matches[train_col]
        p.line(
            ideal_df['X'], ideal_df[ideal_col],
            color='black',
            line_width=2,
            legend_label=f"Ideal {ideal_col}"
        )

        # Test points mapped to this function
        mapped = results_df[results_df['ideal_function_number'] == idx+1]
        p.square(
            mapped['x_value'], mapped['y_value'],
            color='red',
            size=6,
            legend_label="Test Mapped"
        )

        p.legend.location = "top_left"
        p.legend.click_policy = "hide"

        plots.append(p)

    show(column(*plots))


In [16]:
plot_all_results(train_df, ideal_df, results, best_matches)

