# Importing necessary library

In [1]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import sqlalchemy
from sqlalchemy import create_engine, Column, Integer, Float, String, MetaData, Table
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
import bokeh.plotting as bp
from bokeh.models import ColumnDataSource
from bokeh.layouts import gridplot
import unittest
import os

# Load training, ideal, and test datasets

In [2]:
# Load training, ideal, and test datasets
training_df = pd.read_csv('train.csv')
ideal_df = pd.read_csv('ideal.csv')
test_df = pd.read_csv('test.csv')


# Create SQLite Database and Tables

In [3]:
# Define the declarative base
Base = declarative_base()
class TrainingData(Base):
    """ORM class for the training data table."""
    __tablename__ = 'training_data'
    id = Column(Integer, primary_key=True)
    x = Column(Float, nullable=False)
    y1 = Column(Float, nullable=False)
    y2 = Column(Float, nullable=False)
    y3 = Column(Float, nullable=False)
    y4 = Column(Float, nullable=False)

class IdealFunctions(Base):
    """ORM class for the ideal functions table."""
    __tablename__ = 'ideal_functions'
    id = Column(Integer, primary_key=True)
    x = Column(Float, nullable=False)
    y1 = Column(Float, nullable=False)
    y2 = Column(Float, nullable=False)
    y3 = Column(Float, nullable=False)
    y4 = Column(Float, nullable=False)
    y5 = Column(Float, nullable=False)
    y6 = Column(Float, nullable=False)
    y7 = Column(Float, nullable=False)
    y8 = Column(Float, nullable=False)
    y9 = Column(Float, nullable=False)
    y10 = Column(Float, nullable=False)
    y11 = Column(Float, nullable=False)
    y12 = Column(Float, nullable=False)
    y13 = Column(Float, nullable=False)
    y14 = Column(Float, nullable=False)
    y15 = Column(Float, nullable=False)
    y16 = Column(Float, nullable=False)
    y17 = Column(Float, nullable=False)
    y18 = Column(Float, nullable=False)
    y19 = Column(Float, nullable=False)
    y20 = Column(Float, nullable=False)
    y21 = Column(Float, nullable=False)
    y22 = Column(Float, nullable=False)
    y23 = Column(Float, nullable=False)
    y24 = Column(Float, nullable=False)
    y25 = Column(Float, nullable=False)
    y26 = Column(Float, nullable=False)
    y27 = Column(Float, nullable=False)
    y28 = Column(Float, nullable=False)
    y29 = Column(Float, nullable=False)
    y30 = Column(Float, nullable=False)
    y31 = Column(Float, nullable=False)
    y32 = Column(Float, nullable=False)
    y33 = Column(Float, nullable=False)
    y34 = Column(Float, nullable=False)
    y35 = Column(Float, nullable=False)
    y36 = Column(Float, nullable=False)
    y37 = Column(Float, nullable=False)
    y38 = Column(Float, nullable=False)
    y39 = Column(Float, nullable=False)
    y40 = Column(Float, nullable=False)
    y41 = Column(Float, nullable=False)
    y42 = Column(Float, nullable=False)
    y43 = Column(Float, nullable=False)
    y44 = Column(Float, nullable=False)
    y45 = Column(Float, nullable=False)
    y46 = Column(Float, nullable=False)
    y47 = Column(Float, nullable=False)
    y48 = Column(Float, nullable=False)
    y49 = Column(Float, nullable=False)
    y50 = Column(Float, nullable=False)

class TestData(Base):
    """ORM class for the test data table."""
    __tablename__ = 'test_data'
    id = Column(Integer, primary_key=True)
    x = Column(Float, nullable=False)
    y = Column(Float, nullable=False)
    delta_y = Column(Float, nullable=True)
    ideal_function = Column(String, nullable=True)

  Base = declarative_base()


# Load Data into Database

In [4]:
# Create an SQLite database and a session
engine = create_engine('sqlite:///data.db')
Base.metadata.drop_all(engine)  # Drop existing tables
Base.metadata.create_all(engine)  # Create new tables
Session = sessionmaker(bind=engine)
session = Session()

def load_data_to_db(session, df, table_class):
    """
    Load data from a DataFrame into a specified database table.

    Args:
        session: SQLAlchemy session object.
        df: DataFrame containing data to be loaded.
        table_class: SQLAlchemy ORM class representing the target table.
    """
    for _, row in df.iterrows():
        session.add(table_class(**row.to_dict()))
    session.commit()

# Load data into database
load_data_to_db(session, training_df, TrainingData)
load_data_to_db(session, ideal_df, IdealFunctions)
load_data_to_db(session, test_df, TestData)  # Load test data

# Select Ideal Functions

In [5]:
def select_ideal_functions(training_df, ideal_df):
    """
    Select the ideal functions that best match the training data.

    Args:
        training_df: DataFrame containing the training data.
        ideal_df: DataFrame containing the ideal functions.

    Returns:
        A list of tuples, each containing the training column name and the best matching ideal function column name.
    """
    ideal_functions = []
    for col in training_df.columns[1:]:  # Exclude the 'x' column
        deviations = []
        for ideal_col in ideal_df.columns[1:]:  # Exclude the 'x' column
            deviation = np.sum((training_df[col] - ideal_df[ideal_col]) ** 2)
            deviations.append((ideal_col, deviation))
        deviations.sort(key=lambda x: x[1])
        ideal_functions.append((col, deviations[0][0]))  # Select the best matching ideal function for each training column
    return ideal_functions

selected_ideal_functions = select_ideal_functions(training_df, ideal_df)
print("Selected ideal functions:", selected_ideal_functions)

Selected ideal functions: [('y1', 'y42'), ('y2', 'y41'), ('y3', 'y11'), ('y4', 'y48')]


# Map Test Data

In [6]:
def map_test_data(test_df, ideal_df, selected_ideal_functions):
    """
    Map test data to the ideal functions and calculate the deviations.

    Args:
        test_df: DataFrame containing the test data.
        ideal_df: DataFrame containing the ideal functions.
        selected_ideal_functions: List of tuples containing the selected ideal functions for each training column.

    Returns:
        A DataFrame containing the test data, deviations, and corresponding ideal function.
    """
    results = []
    for _, test_row in test_df.iterrows():
        x_test = test_row['x']
        y_test = test_row['y']
        best_deviation = float('inf')
        best_ideal_function = None
        for y_train_col, y_ideal_col in selected_ideal_functions:
            ideal_y_values = ideal_df[y_ideal_col].values
            x_ideal_values = ideal_df['x'].values
            interpolated_y_ideal = np.interp(x_test, x_ideal_values, ideal_y_values)
            deviation = abs(y_test - interpolated_y_ideal)
            if deviation < best_deviation:
                best_deviation = deviation
                best_ideal_function = y_ideal_col
        results.append((x_test, y_test, best_deviation, best_ideal_function))
    result_df = pd.DataFrame(results, columns=['x', 'y', 'deviation', 'ideal_function'])
    return result_df


In [7]:
# Map the test data to the selected ideal functions
mapping_results = map_test_data(test_df, ideal_df, selected_ideal_functions)
print("Mapping results:\n", mapping_results.head())

Mapping results:
       x          y  deviation ideal_function
0  17.5  34.161040   0.351148            y41
1   0.3   1.215102   0.467342            y41
2  -8.7 -16.843908   0.887577            y41
3 -19.2 -37.170870   1.400790            y41
4 -11.0 -20.263054   1.236952            y41


# Function to visualize data

In [8]:
# Function to visualize data
def visualize_data(training_df, test_df, ideal_df, selected_ideal_functions, mapping_results):
    """
    Visualize the training data, ideal functions, and test data with deviations.

    Args:
        training_df: DataFrame containing the training data.
        test_df: DataFrame containing the test data.
        ideal_df: DataFrame containing the ideal functions.
        selected_ideal_functions: List of tuples containing the selected ideal functions for each training column.
        mapping_results: DataFrame containing the test data, deviations, and corresponding ideal function.
    """
    p1 = bp.figure(title="Training Data vs Ideal Functions", x_axis_label='x', y_axis_label='y', width=400, height=400)
    for col in training_df.columns[1:]:
        p1.line(training_df['x'], training_df[col], legend_label=col, line_width=2)
    for ideal_col in ideal_df.columns[1:]:
        p1.line(ideal_df['x'], ideal_df[ideal_col], legend_label=f'Ideal {ideal_col}', line_width=1, line_color='red')
    source = ColumnDataSource(mapping_results)
    p2 = bp.figure(title="Test Data with Deviations", x_axis_label='x', y_axis_label='deviation', width=400, height=400)
    p2.scatter('x', 'deviation', source=source, legend_label='Test Data', color='green')
    layout = gridplot([[p1, p2]])
    bp.show(layout)

In [9]:
# Call visualization function
visualize_data(training_df, test_df, ideal_df, selected_ideal_functions, mapping_results)

# Unit Test

In [10]:
# Unit Tests
class TestFunctions(unittest.TestCase):
    """Unit tests for select_ideal_functions and map_test_data functions."""

    def test_select_ideal_functions(self):
        """Test the select_ideal_functions function."""
        training_df = pd.DataFrame({'x': [1, 2, 3], 'y1': [1, 2, 3], 'y2': [2, 3, 4]})
        ideal_df = pd.DataFrame({'x': [1, 2, 3], 'y1': [1, 2, 3], 'y2': [2, 3, 5]})
        selected_ideal_functions = select_ideal_functions(training_df, ideal_df)
        self.assertEqual(selected_ideal_functions, [('y1', 'y1'), ('y2', 'y2')])

    def test_map_test_data(self):
        """Test the map_test_data function."""
        test_df = pd.DataFrame({'x': [1, 2, 3], 'y': [1, 2, 3]})
        ideal_df = pd.DataFrame({'x': [1, 2, 3], 'y1': [1, 2, 3], 'y2': [2, 3, 4]})
        selected_ideal_functions = [('y1', 'y1'), ('y2', 'y2')]
        result_df = map_test_data(test_df, ideal_df, selected_ideal_functions)
        expected_result_df = pd.DataFrame({'x': [1, 2, 3], 'y': [1, 2, 3], 'deviation': [0.0, 0.0, 0.0], 'ideal_function': ['y1', 'y1', 'y1']})
        pd.testing.assert_frame_equal(result_df, expected_result_df)

# Run the tests
loader = unittest.TestLoader()
tests = loader.loadTestsFromTestCase(TestFunctions)
testRunner = unittest.TextTestRunner(verbosity=2)
testRunner.run(tests)

test_map_test_data (__main__.TestFunctions)
Test the map_test_data function. ... ok
test_select_ideal_functions (__main__.TestFunctions)
Test the select_ideal_functions function. ... ok

----------------------------------------------------------------------
Ran 2 tests in 0.006s

OK


<unittest.runner.TextTestResult run=2 errors=0 failures=0>