In [None]:
import joblib, os, sys
from glob import glob
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
functions = ['linear','quadratic','sine']

In [None]:
func_dict = {}
for f in functions:
    print(f)
    
    path = f'function-calc-test/{f}/{f}'
    train = joblib.load(f'{path}_train.bin')
    test = joblib.load(f'{path}_test.bin')
    validation = joblib.load(f'{path}_validate.bin')
    scaler = joblib.load(f'{path}_scaler.bin')
    
    X,y = np.vstack([train['X'],test['X'],validation['X']]),np.vstack([train['y'],test['y'],validation['y']])
    plt.scatter(X,y)
    
    print("Scaled var",np.var(X),np.var(y))
    print("Scaled std",np.std(X),np.std(y))
    print("Scaled Min/Max X",np.min(X),np.max(X))
    print("Scaled Min/Max y",np.min(y),np.max(y))
    print()
    y = scaler.inverse_transform(y.reshape(-1, 1))
    X = scaler.inverse_transform(X.reshape(-1, 1))
    func_dict[f] = np.hstack([X,y])
    # plt.scatter(X,y)
    # print("UnScaled var",np.var(X),np.var(y))
    # print("UnScaled std",np.std(X),np.std(y))
    # print("UnScaled Min/Max X",np.min(X),np.max(X))
    # print("UnScaled Min/Max y",np.min(y),np.max(y))    
    # print()
    # print()
    plt.show()

In [None]:
# Parameters for the noisy linear function
n_points = 100  # Number of points
x = np.linspace(0, 50, n_points)  # x values
sigma_epsilon = 0.58  # Standard deviation of the noise

# Generate Gaussian noise
epsilon = np.random.normal(0, sigma_epsilon, n_points)

# Define the noisy linear function
f_x = x + epsilon
x,f_x = scaler.fit_transform(x.reshape(-1,1)),scaler.fit_transform(f_x.reshape(-1,1))

In [None]:
from sklearn.linear_model import LinearRegression

f='linear'
path = f'function-calc-test/{f}/{f}'
train = joblib.load(f'{path}_train.bin')
test = joblib.load(f'{path}_test.bin')
validation = joblib.load(f'{path}_validate.bin')
scaler = joblib.load(f'{path}_scaler.bin')
X,y = np.vstack([train['X'],test['X'],validation['X']]),np.vstack([train['y'],test['y'],validation['y']])
# y = scaler.inverse_transform(y.reshape(-1, 1))
# X = scaler.inverse_transform(X.reshape(-1, 1))

# Given data
data = np.hstack([X,y])

# Separate x and y
x = data[:, 0].reshape(-1, 1)
y = data[:, 1]

# Fit linear regression model
model = LinearRegression()
model.fit(x, y)

# Retrieve slope and intercept
slope = model.coef_[0]
intercept = model.intercept_

print(f"Slope: {slope}")
print(f"Intercept: {intercept}")

def linear_function(x,m,b):
    return m*x + b
    
# Plot data and the regression line
plt.scatter(x, y, label="Data points")
plt.plot(x, linear_function(x,slope,intercept), color="red", label="Best-fit line")
plt.plot(x, linear_function(x,1,0), color="red", label="Best-fit line")
plt.xlabel("x")
plt.ylabel("y")
plt.legend()
plt.title("Linear Fit")
plt.show()


In [None]:
# plt.scatter(x,x)
plt.scatter(x,f_x,label='new')
plt.scatter(X,y,label='old')
plt.legend()
plt.show()

In [None]:
np.std(f_x),np.std(y)

In [None]:
np.var(f_x),np.var(y)

In [None]:
x_data, y_data = func_dict['quadratic'].T
# Fit a quadratic function to the data
coefficients = np.polyfit(x_data, y_data, deg=2)  # Coefficients of the form [a, b, c]

a, b, c= coefficients


In [None]:
plt.scatter(*func_dict['quadratic'].T)

In [None]:
import numpy as np
from scipy.optimize import minimize

# Observed data (x_obs, y_obs)
x_obs = x_data
y_obs = y_data

# Define the model: quadratic function
def quadratic_model(x, a, b, c):
    return a * x**2 + b * x + c

# Define the error function for TLS
def tls_error(params, x_obs, y_obs):
    a, b, c = params
    x_adj = np.copy(x_obs)  # Adjusted x values
    # Iteratively refine x_adj by minimizing perpendicular distances
    for i in range(len(x_obs)):
        def point_distance(x_adj_i):
            return (y_obs[i] - quadratic_model(x_adj_i, a, b, c))**2 + (x_obs[i] - x_adj_i)**2
        res = minimize(point_distance, x_obs[i])
        x_adj[i] = res.x
    # Total perpendicular error
    return np.sum((y_obs - quadratic_model(x_adj, a, b, c))**2 + (x_obs - x_adj)**2)

# Initial guess for a, b, c
initial_params = [1, 1, 1]

# Optimize to find best a, b, c
result = minimize(tls_error, initial_params, args=(x_obs, y_obs))
a, b, c = result.x

print(f"Fitted coefficients: a = {a}, b = {b}, c = {c}")


a = np.array([(x,quadratic_model(x, a, b, c)) for x in x_data])
a = a[a[:,0].argsort()]
plt.plot(*a.T)
plt.scatter(x_data,y_data)

In [None]:
from scipy.optimize import curve_fit


# Provided data
data = func_dict['sine'][func_dict['sine'][:,0].argsort()]
x_obs = data[:, 0]
y_obs = data[:, 1]



In [None]:
# Define the sine model
def sine_model(x, a, b, c):
    return a * np.sin(b * x + c)

# Initial guess for parameters: amplitude, frequency, phase
initial_guess = [1, 2 * np.pi, 0]  # Amplitude 1, frequency ~1 cycle, phase 0

# Fit the sine model to the data
params, _ = curve_fit(
    sine_model,
    x_obs,
    y_obs
)

# Extract parameters
amplitude, frequency, phase = params

# Print the results
print(f"Amplitude (a): {amplitude:.4f}")
print(f"Frequency (b): {frequency:.4f}")
print(f"Phase (c): {phase:.4f}")

# Generate smooth sine wave for plotting
y_fitted = sine_model(x_obs, amplitude, frequency, phase)
# y_fitted = sine_model(x_obs, 1, frequency, 0)

# Parameters for the noisy linear function
n_points = 100  # Number of points
x = np.linspace(y_obs.min(),y_obs.max(), n_points)  # x values
# sigma_epsilon = 0.58  # Standard deviation of the noise
sigma_epsilon = np.std(y_obs)
# Generate Gaussian noise
epsilon = np.random.normal(0, sigma_epsilon, n_points)

# Plot the result
plt.figure(figsize=(10, 6))
plt.scatter(x_obs, y_fitted, label='Fitted Sine Function', color='blue')
plt.scatter(x_obs, y_obs, label='Observed Data', color='red', alpha=0.6)
plt.legend()
plt.xlabel('x')
plt.ylabel('y')
plt.title('Sine Function Fit')
plt.show()

In [None]:
from sympy import nsimplify, pi

# Convert to degrees
degrees = frequency  * 180 / np.pi

# Approximate the fraction
fraction_x = nsimplify(frequency  / np.pi)

degrees, fraction_x

In [None]:
frequency 