<a href="https://colab.research.google.com/github/SaddamRafiq/Programming-With-Python/blob/main/ProgrammingWithPython_SaddamRafiq.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show, output_file
from bokeh.io import output_notebook

# Enable Bokeh output in the notebook
output_notebook()

# --- Data Loading Function ---
def load_data(training_data_path, ideal_functions_path, test_data_path):
    """Load the CSV files into Pandas DataFrames with exception handling."""
    try:
        train_df = pd.read_csv(training_data_path)
        ideal_df = pd.read_csv(ideal_functions_path)
        test_df = pd.read_csv(test_data_path)
        return train_df, ideal_df, test_df
    except FileNotFoundError as e:
        print(f"Error: {e}")
        raise
    except pd.errors.EmptyDataError as e:
        print(f"Error: {e}")
        raise
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        raise

# Load data
try:
    training_data_path = 'data/train.csv'
    ideal_functions_path = 'data/ideal.csv'
    test_data_path = 'data/test.csv'

    train_df, ideal_df, test_df = load_data(training_data_path, ideal_functions_path, test_data_path)
    print("Data loaded successfully.")
except Exception as e:
    print(f"Failed to load data: {e}")

Data loaded successfully.


In [5]:
train_df.head()

Unnamed: 0,x,y1,y2,y3,y4
0,-20.0,-45.29234,-15999.796,99.52958,899.8275
1,-19.9,-44.36496,-15761.017,99.89567,893.4274
2,-19.8,-44.565968,-15524.681,98.85578,887.16046
3,-19.7,-44.76245,-15290.5,98.1261,881.4487
4,-19.6,-44.188698,-15058.586,97.511475,875.37726


In [7]:
test_df.head()

Unnamed: 0,x,y
0,-13.1,-4494.98
1,3.4,78.95702
2,11.1,2.493696
3,1.9,-7730.0913
4,-14.0,13840.699


In [8]:
ideal_df.head()

Unnamed: 0,x,y1,y2,y3,y4,y5,y6,y7,y8,y9,...,y41,y42,y43,y44,y45,y46,y47,y48,y49,y50
0,-20.0,-0.912945,0.408082,9.087055,5.408082,-9.087055,0.912945,-0.839071,-0.850919,0.816164,...,-40.456474,40.20404,2.995732,-0.008333,12.995732,5.298317,-5.298317,-0.186278,0.912945,0.39685
1,-19.9,-0.867644,0.497186,9.132356,5.497186,-9.132356,0.867644,-0.865213,0.168518,0.994372,...,-40.23382,40.04859,2.99072,-0.00834,12.99072,5.293305,-5.293305,-0.21569,0.867644,0.476954
2,-19.8,-0.813674,0.581322,9.186326,5.581322,-9.186326,0.813674,-0.889191,0.612391,1.162644,...,-40.006836,39.89066,2.985682,-0.008347,12.985682,5.288267,-5.288267,-0.236503,0.813674,0.549129
3,-19.7,-0.751573,0.659649,9.248426,5.659649,-9.248426,0.751573,-0.910947,-0.994669,1.319299,...,-39.775787,39.729824,2.980619,-0.008354,12.980619,5.283204,-5.283204,-0.247887,0.751573,0.61284
4,-19.6,-0.681964,0.731386,9.318036,5.731386,-9.318036,0.681964,-0.930426,0.774356,1.462772,...,-39.54098,39.565693,2.97553,-0.008361,12.97553,5.278115,-5.278115,-0.249389,0.681964,0.667902




In [9]:
# --- Function to Calculate Deviation ---
def calculate_deviation(series1, series2):
    """Calculate the sum of squared deviations between two series."""
    try:
        return np.sum((series1 - series2) ** 2)
    except Exception as e:
        print(f"Error during deviation calculation: {e}")
        raise

# --- Function to Select Best-Fit Functions ---
def select_best_fit_functions(train_df, ideal_df):
    """Select the best-fitting ideal functions for each training function."""
    best_fit_functions = {}
    try:
        # Determine the list of y columns in train_df dynamically
        train_columns = [col for col in train_df.columns if col.startswith('y')]

        for i, train_col in enumerate(train_columns, start=1):
            min_deviation = float('inf')
            best_function = None
            for j in range(1, 51):  # Assuming ideal_df always has y1 to y50
                ideal_col = f'y{j}'
                if ideal_col in ideal_df.columns:
                    deviation = calculate_deviation(train_df[train_col], ideal_df[ideal_col])
                    if deviation < min_deviation:
                        min_deviation = deviation
                        best_function = ideal_col
            best_fit_functions[train_col] = best_function
        print("Best-fit functions selected successfully.")
    except KeyError as e:
        print(f"Error: Missing expected column in the data. {e}")
        raise
    except Exception as e:
        print(f"An unexpected error occurred while selecting best fit functions: {e}")
        raise
    return best_fit_functions

# Select best-fitting ideal functions
try:
    best_fit_functions = select_best_fit_functions(train_df, ideal_df)
    print(f"Best-fit functions: {best_fit_functions}")
except Exception as e:
    print(f"Failed to select best-fit functions: {e}")


Best-fit functions selected successfully.
Best-fit functions: {'y1': 'y13', 'y2': 'y24', 'y3': 'y36', 'y4': 'y40'}


In [10]:
# --- Function to Calculate Deviation ---
def calculate_deviation(series1, series2):
    """Calculate the sum of squared deviations between two series."""
    try:
        return np.sum((series1 - series2) ** 2)
    except Exception as e:
        print(f"Error during deviation calculation: {e}")
        raise

# --- Function to Select Best-Fit Functions ---
def select_best_fit_functions(train_df, ideal_df):
    """Select the best-fitting ideal functions for each training function."""
    best_fit_functions = {}
    try:
        # Determine the list of y columns in train_df dynamically
        train_columns = [col for col in train_df.columns if col.startswith('y')]

        for i, train_col in enumerate(train_columns, start=1):
            min_deviation = float('inf')
            best_function = None
            for j in range(1, 51):  # Assuming ideal_df always has y1 to y50
                ideal_col = f'y{j}'
                if ideal_col in ideal_df.columns:
                    deviation = calculate_deviation(train_df[train_col], ideal_df[ideal_col])
                    if deviation < min_deviation:
                        min_deviation = deviation
                        best_function = ideal_col
            best_fit_functions[train_col] = best_function
        print("Best-fit functions selected successfully.")
    except KeyError as e:
        print(f"Error: Missing expected column in the data. {e}")
        raise
    except Exception as e:
        print(f"An unexpected error occurred while selecting best fit functions: {e}")
        raise
    return best_fit_functions

# Select best-fitting ideal functions and visualize
try:
    best_fit_functions = select_best_fit_functions(train_df, ideal_df)
    print(f"Best-fit functions: {best_fit_functions}")

    # Visualizing the training data with selected ideal functions
    p = figure(title="Training Data and Best-Fit Ideal Functions", x_axis_label='X', y_axis_label='Y', width=800, height=600)
    colors = ['blue', 'green', 'red', 'purple']
    for i, (train_func, ideal_func) in enumerate(best_fit_functions.items()):
        p.line(train_df['x'], train_df[train_func], legend_label=f'Training {train_func}', line_width=2, color=colors[i])
        p.line(ideal_df['x'], ideal_df[ideal_func], legend_label=f'Ideal {ideal_func}', line_dash="dotted", line_width=2, color=colors[i])
    p.legend.location = 'top_left'
    p.legend.click_policy = 'hide'
    show(p)
except Exception as e:
    print(f"Failed to select best-fit functions: {e}")


Best-fit functions selected successfully.
Best-fit functions: {'y1': 'y13', 'y2': 'y24', 'y3': 'y36', 'y4': 'y40'}


In [11]:
# --- Function to Map Test Data to Ideal Functions ---
def map_test_data_to_ideal(test_df, train_df, ideal_df, best_fit_functions):
    """Map the test data to the best-fitting ideal functions with exception handling."""
    mapping_results = []
    sqrt_2 = np.sqrt(2)
    try:
        for index, test_row in test_df.iterrows():
            x_val = test_row['x']
            y_test = test_row['y']
            best_match = None
            min_delta_y = float('inf')
            for train_func, ideal_func in best_fit_functions.items():
                try:
                    # Ensure the x_val exists in the ideal_df
                    if not (ideal_df['x'] == x_val).any():
                        continue
                    y_ideal = ideal_df[ideal_df['x'] == x_val][ideal_func].values[0]
                    delta_y = abs(y_test - y_ideal)
                    max_deviation = np.max(abs(train_df[train_func] - ideal_df[ideal_func]))
                    if delta_y <= max_deviation * sqrt_2 and delta_y < min_delta_y:
                        min_delta_y = delta_y
                        best_match = ideal_func
                except IndexError:
                    print(f"Index error encountered for x={x_val} and ideal_func={ideal_func}")
                    continue
            if best_match:
                mapping_results.append({
                    'x': x_val,
                    'y': y_test,
                    'delta_y': min_delta_y,
                    'ideal_function': best_match
                })
        print("Test data mapped successfully.")
    except KeyError as e:
        print(f"Error: Missing expected column in the data. {e}")
        raise
    except Exception as e:
        print(f"An unexpected error occurred while mapping test data: {e}")
        raise
    return pd.DataFrame(mapping_results)

# Map test data to ideal functions and visualize
try:
    mapping_df = map_test_data_to_ideal(test_df, train_df, ideal_df, best_fit_functions)
    print("Mapping DataFrame:")
    print(mapping_df.head())

    # Visualizing the mapping of test data to the selected ideal functions
    p = figure(title="Mapping Test Data to Ideal Functions", x_axis_label='X', y_axis_label='Y', width=800, height=600)
    colors = ['blue', 'green', 'red', 'purple']
    for i, (train_func, ideal_func) in enumerate(best_fit_functions.items()):
        p.line(ideal_df['x'], ideal_df[ideal_func], legend_label=f'Ideal {ideal_func}', line_dash="dotted", line_width=2, color=colors[i])
    p.scatter(mapping_df['x'], mapping_df['y'], legend_label="Mapped Test Data", color="orange", size=8)
    p.legend.location = 'top_left'
    p.legend.click_policy = 'hide'
    show(p)
except Exception as e:
    print(f"Failed to map test data: {e}")


Test data mapped successfully.
Mapping DataFrame:
      x            y   delta_y ideal_function
0   3.4    78.957020  0.349020            y24
1  17.6    57.985920  0.541664            y40
2  -0.7    -1.351244  0.665244            y24
3 -15.7 -7740.142600  0.356600            y24
4  -0.4   106.640510  0.598430            y40


In [12]:
# --- Visualization Function ---
def visualize_data(train_df, ideal_df, mapping_df, best_fit_functions):
    """Visualize the training data, ideal functions, and mapped test data with error handling."""
    try:
        p = figure(title="Final Visualization: Test Data Mapping to Ideal Functions", x_axis_label='X', y_axis_label='Y', width=800, height=600)
        colors = ['blue', 'green', 'red', 'purple']
        for i, (train_func, ideal_func) in enumerate(best_fit_functions.items()):
            p.line(train_df['x'], train_df[train_func], legend_label=f'Training {train_func}', line_width=2, color=colors[i])
            p.line(ideal_df['x'], ideal_df[ideal_func], legend_label=f'Ideal {ideal_func}', line_dash="dotted", line_width=2, color=colors[i])
        p.scatter(mapping_df['x'], mapping_df['y'], legend_label="Mapped Test Data", color="orange", size=8)
        p.legend.location = 'top_left'
        p.legend.click_policy = 'hide'
        show(p)
        print("Visualization completed successfully.")
    except Exception as e:
        print(f"An error occurred during visualization: {e}")
        raise

# Visualize the final results
try:
    visualize_data(train_df, ideal_df, mapping_df, best_fit_functions)
except Exception as e:
    print(f"Failed to visualize data: {e}")

# --- Unit Tests ---
import unittest

class TestPythonProject(unittest.TestCase):

    def test_calculate_deviation(self):
        series1 = np.array([1, 2, 3])
        series2 = np.array([1, 2, 3])
        result = calculate_deviation(series1, series2)
        self.assertEqual(result, 0, "Deviation should be 0 for identical series")

    def test_best_fit_functions(self):
        train_df = pd.DataFrame({'x': [-20, -19, -18], 'y1': [1, 2, 3], 'y2': [4, 5, 6], 'y3': [7, 8, 9], 'y4': [10, 11, 12]})
        ideal_df = pd.DataFrame({'x': [-20, -19, -18], 'y1': [1, 2, 3], 'y2': [4, 5, 6], 'y3': [7, 8, 9], 'y4': [10, 11, 12]})
        expected_best_fit = {'y1': 'y1', 'y2': 'y2', 'y3': 'y3', 'y4': 'y4'}
        best_fit_functions = select_best_fit_functions(train_df, ideal_df)
        self.assertEqual(best_fit_functions, expected_best_fit, "Best fit functions should match the expected output")

    def test_mapping(self):
        mapping_df = pd.DataFrame({
            'x': [3.4, 17.6],
            'y': [78.957020, 57.985920],
            'delta_y': [0.349020, 0.541664],
            'ideal_function': ['y24', 'y40']
        })
        expected_columns = ['x', 'y', 'delta_y', 'ideal_function']
        self.assertTrue(all(column in mapping_df.columns for column in expected_columns), "All expected columns should be present")

if __name__ == '__main__':
    unittest.main(argv=[''], exit=False)


...
----------------------------------------------------------------------
Ran 3 tests in 0.026s

OK


Visualization completed successfully.
Best-fit functions selected successfully.
