# Task1: Approximating Functions

## Imports and Load datasets

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.linalg import lstsq
import pandas as pd
from scipy.spatial.distance import cdist

#Load datasets from txt file
data_A = np.loadtxt('linear_function_data.txt')
data_B = np.loadtxt('nonlinear_function_data.txt')

## Main Functions

### For linear case

In [None]:
#Function to approximate linear functions
def approximate_linear_function(data):
    """
    Approximates a linear function based on the given data points.
    
    Args:
    - data: Input data containing x and f(x) values
    
    Returns:
    - A: Coefficients for the linear function in the form [intercept, slope]
    """
    #Extract x and f(x) values from the input data
    X = data[:, 0]#x values
    F = data[:, 1]#f(x) values

    #Construct matrix X for linear function approximation [1, x]
    X_matrix = np.column_stack((np.ones_like(X), X))#Create a matrix with a column of ones and x values

    #Perform least-squares minimization to find coefficients [intercept, slope]
    A, _, _, _ = lstsq(X_matrix, F, cond=None)#Find coefficients using least squares method

    #Return coefficients for the linear function
    return A

#Function to plot linear approximation for a dataset
def plot_linear_approximation(data, coefficients, dataset_label):
    """
    Plots the linear approximation for a given dataset using provided coefficients.

    Args:
    - data: Dataset containing x and f(x) values
    - coefficients: Coefficients for the linear function in the form [intercept, slope]
    - dataset_label: Label for the dataset (e.g., 'Dataset A', 'Dataset B')
    """
    #Set some parameters for the plot
    plt.figure(figsize=(8, 5))
    plt.scatter(data[:, 0], data[:, 1], label=dataset_label)
    
    #Extract x-values from the dataset
    x_values = data[:, 0]
    # Calculate y-values using the linear equation with the given coefficients
    # The linear equation: y = intercept + slope * x
    # Calculate y_values using matrix multiplication: [1, x_values] * [intercept, slope]
    y_values = np.dot(np.column_stack((np.ones_like(x_values), x_values)), coefficients)
    
    #Set the title and plot the function approximantion and real data
    plt.plot(x_values, y_values, color='red', label='Linear Approximation')
    plt.title(f'Linear Function Approximation ({dataset_label})')
    plt.legend()
    plt.show()

### For non-linear case(radial functions)

In [None]:
#Function to load data from a file
def load_data(file_path):
    """
    Loads data from a txt file by using read_csv and separates it into points and targets.
    
    Args:
    - file_path: Path to the file containing data
    
    Returns:
    - points: Numpy array containing input data points
    - targets: Numpy array containing target values
    """
    #Read data that space-separated values and has no header row from a txt file located at 'file_path'
    data = pd.read_csv(file_path, sep=" ", header=None, dtype=np.float64)
    points = data.iloc[:, 0].values.reshape(-1, 1)
    targets = data.iloc[:, 1].values
    return points, targets

#Radial Basis Function for computing basis functions
def radial_basis_function(x, x_l, eps):
    """
    Radial basis function used for computing the basis functions. 
    We chose to use squared epsilon here.
    
    Args:
    - x: Data points
    - x_l: Centers of the basis functions
    - eps: Epsilon parameter(bandwidth)
    
    Returns:
    - Radial basis function values
    """
    #Calculate the radial basis function (RBF) values between input points 'x' and centers 'x_l'
    return np.exp(-cdist(x, x_l) ** 2 / eps ** 2)

#Function to compute basis functions
def compute_bases(points, eps, n_bases, centers=None):
    """
    Computes the basis functions using radial basis functions.
    
    Args:
    - points: Data points
    - eps: Epsilon parameter(bandwidth)
    - n_bases: Number of basis functions(parameter L)
    - centers: Centers of the basis functions
    
    Returns:
    - phi: Matrix of basis functions
    - centers: Updated centers of basis functions
    """
    if centers is None:
        #If 'centers' is not provided, randomly select 'n_bases' number of points from 'points' as centers
        centers = points[np.random.choice(range(points.shape[0]), replace=False, size=n_bases)]
    
    #'phi' represents the calculated radial basis function values and 'centers' are the chosen centers
    phi = radial_basis_function(points, centers, eps)
    return phi, centers

#Function to approximate a nonlinear function using radial basis functions
def approximate_nonlinear_function(data, n_bases=8, eps=0.2, centers=None):
    """
    Approximates a nonlinear function using radial basis functions.
    
    Args:
    - data: Path to the file or dataset containing data
    - n_bases: Number of basis functions(parameter L)
    - eps: Epsilon parameter(bandwidth)
    - centers: Centers of the basis functions
    
    Returns:
    - solution: Solution of the least squares problem
    - residuals: Residuals of the solution
    - rank: Rank of the solution
    - singular_values: Singular values of the solution
    - centers: Updated centers of basis functions
    - eps: Epsilon parameter(bandwidth)
    - list_of_bases: Matrix of basis functions
    """
    #Load data points and target values from the given dataset
    points, targets = load_data(data)

    #Compute the basis functions using the given parameters and data points
    list_of_bases, centers = compute_bases(points, eps, n_bases, centers)

    #Solve the least squares problem to approximate the nonlinear function using the computed basis functions and target values
    solution, residuals, rank, singular_values = lstsq(a=list_of_bases, b=targets, cond=1e-5)

    return solution, residuals, rank, singular_values, centers, eps, list_of_bases

#Function to plot the approximated function over the actual data
def plot_nonlinear_approximation(solution, data, centers=None, eps=None, MSE=None, n_bases=None, dataset_label="Dataset B", **kwargs):
    """
    Plots the approximated function over the actual data.
    
    Args:
    - solution: Solution of the least squares problem
    - data: Path to the file or dataset containing data
    - centers: Centers of the basis functions
    - eps: Epsilon parameter(bandwidth)
    - n_bases: Number of basis functions(parameter L)
    - **kwargs: Additional keyword arguments for plot customization
    """
    #Load data points and target values from the given dataset
    points, targets = load_data(data)
    #Generate 100 evenly spaced x values from -5 to 5 for plotting purposes as stated in the task description
    x_values = np.linspace(start=-5, stop=5, num=100)

    #Compute the basis functions for the x_values using radial basis functions
    #Expand x_values to match the shape expected by the compute_bases function and calculate the y_values
    list_of_bases, centers = compute_bases(points=np.expand_dims(x_values, 1), centers=centers, eps=eps, n_bases=len(centers))
    y_values = np.sum(solution * list_of_bases, axis=1)

    #Set some parameters for the graph then plot the graph 
    plt.figure(figsize=(5, 5))
    plt.scatter(points, targets, label=dataset_label)
    plt.plot(x_values, y_values, color='r', label="Approximated function")
    plt.legend()
    plt.title(f"Nonlinear Function Approximation for {dataset_label} with L={n_bases}, eps={eps} and MSE={MSE}")
    plt.tight_layout()
    plt.show()

## Part 1: Approximate function in dataset A with a linear function

In [None]:
#Approximate linear function for Dataset A and obtain coefficients
coefficients_A_linear = approximate_linear_function(data_A)

#Plot Linear Approximation for dataset A
plot_linear_approximation(data_A, coefficients_A_linear, 'Dataset A')

## Part 2: Approximate function in dataset B with a linear function

In [None]:
#Approximate linear function for Dataset B and obtain coefficients
coefficients_B_linear = approximate_linear_function(data_B)

#Plot Linear Approximation for dataset B
plot_linear_approximation(data_B, coefficients_B_linear, 'Dataset B')

## Part 3: Approximate function in dataset B with radial functions

In [None]:
#Take the datapath of the nonlinear dataset which is the dataset B
nonlinear_data_path = "nonlinear_function_data.txt"

#Load the entire dataset B into points and targets
points, targets = load_data(nonlinear_data_path)

#Parameters for iterations
n_bases_list = (6, 9, 12, 15, 18)
eps_list = (0.2, 0.5, 0.8, 1.1, 1.4, 1.7, 2)

#Iterate through different parameters for approximation and plot the results
for n_bases in n_bases_list:
    centers = None
    for eps in eps_list:
        C, _, _, _, centers, eps, _ = approximate_nonlinear_function(nonlinear_data_path, n_bases=n_bases, eps=eps, centers=centers)
        
        #Reshape points and centers for computing bases
        expanded_points = np.expand_dims(points[:, 0], axis=1)
        if centers is not None:
            expanded_centers = np.expand_dims(centers[:, 0], axis=1)
        else:
            expanded_centers = None
        
        list_of_bases, centers = compute_bases(points=expanded_points, centers=expanded_centers, eps=eps, n_bases=n_bases)
        
        #Predict the targets using the obtained coefficients and basis functions
        pred_targets = np.sum(C * list_of_bases, axis=1)
        
        #Interpolate the predicted targets to match the length of actual targets
        interpolated_pred_targets = np.interp(np.linspace(0, len(targets) - 1, len(pred_targets)), np.arange(len(pred_targets)), pred_targets)
        
        #Calculate MSE
        mse = np.linalg.norm(interpolated_pred_targets - targets) ** 2 / len(targets)
        
        #Plot the approximated function over the actual data
        plot_nonlinear_approximation(solution=C, data=nonlinear_data_path, centers=centers, eps=eps, MSE=round(mse, 3), n_bases=n_bases, dataset_label="Dataset B")


## Why is it not a good idea to use radial basis functions for dataset (A)?

In [None]:
#Take the datapath of the linear dataset which is the dataset A
linear_data_path = "linear_function_data.txt"

#Load the entire dataset
points, targets = load_data(linear_data_path)

#Parameters for iterations
n_bases_list = (6, 9, 12, 15, 18)
#Here also there is 100 because the function f is very smooth and with the very smooth function it is benefical to choose eps larger to aproximate
eps_list = (0.2, 0.5, 0.8, 1.1, 1.4, 1.7, 2, 100)

#Iterate through different parameters for approximation and plot the results
for n_bases in n_bases_list:
    centers = None
    for eps in eps_list:
        C, _, _, _, centers, eps, _ = approximate_nonlinear_function(linear_data_path, n_bases=n_bases, eps=eps, centers=centers)
        
        #Reshape points and centers for computing bases
        expanded_points = np.expand_dims(points[:, 0], axis=1)
        if centers is not None:
            expanded_centers = np.expand_dims(centers[:, 0], axis=1)
        else:
            expanded_centers = None
        
        list_of_bases, centers = compute_bases(points=expanded_points, centers=expanded_centers, eps=eps, n_bases=n_bases)
        
        #Predict the targets using the obtained coefficients and basis functions
        pred_targets = np.sum(C * list_of_bases, axis=1)
        
        #Interpolate the predicted targets to match the length of actual targets
        interpolated_pred_targets = np.interp(np.linspace(0, len(targets) - 1, len(pred_targets)), np.arange(len(pred_targets)), pred_targets)
        
        #Calculate MSE
        mse = np.linalg.norm(interpolated_pred_targets - targets) ** 2 / len(targets)
        
        #Plot the approximated function over the actual data
        plot_nonlinear_approximation(solution=C, data=linear_data_path, centers=centers, eps=eps, MSE=round(mse, 3), n_bases=n_bases, dataset_label="Dataset A")
