In [None]:
import numpy as np
import matplotlib.pyplot as plt

print("Notebook is ready")

# Curve Fitting using Least Squares
## Data Set A

In [None]:
x_A = np.array([2.5, 3.5, 5, 6, 7.5, 10, 12.5, 15, 17.5, 20])
y_A = np.array([5, 3.4, 2, 1.6, 1.2, 0.8, 0.6, 0.4, 0.3, 0.3])

print("Data Set A loaded")

In [None]:
plt.figure(figsize=(6, 4))
plt.scatter(x_A, y_A, color="black", label="Experimental data")
plt.xlabel("x")
plt.ylabel("y")
plt.title("Data Set A")
plt.grid(True)
plt.legend()
plt.show()

## Linear Least Squares Model
Model:  y = a + b x

In [None]:
# Linear least squares
coeff_linear = np.polyfit(x_A, y_A, 1)

b = coeff_linear[0]
a = coeff_linear[1]

print(f"Linear model: y = {a:.4f} + {b:.4f}x")

In [None]:
# Predicted values
y_hat_linear = a + b * x_A

In [None]:
# Residuals
residuals = y_A - y_hat_linear

# Sum of Squared Errors (SSE)
SSE_linear = np.sum(residuals**2)

# Coefficient of determination (R^2)
y_mean = np.mean(y_A)
R2_linear = 1 - np.sum((y_A - y_hat_linear)**2) / np.sum((y_A - y_mean)**2)

print(f"SSE (Linear) = {SSE_linear:.6f}")
print(f"R^2 (Linear) = {R2_linear:.6f}")

In [None]:
plt.figure(figsize=(6, 4))
plt.scatter(x_A, y_A, color="black", label="Experimental data")
plt.plot(x_A, y_hat_linear, color="red", label="Linear fit")
plt.xlabel("x")
plt.ylabel("y")
plt.title("Linear Least Squares Fit (Data Set A)")
plt.grid(True)
plt.legend()
plt.show()

## Parabolic Least Squares Model
Model:  y = a x² + b x + c

In [None]:
# Parabolic least squares (degree = 2)
coeff_parabolic = np.polyfit(x_A, y_A, 2)

a_p = coeff_parabolic[0]
b_p = coeff_parabolic[1]
c_p = coeff_parabolic[2]

print(f"Parabolic model: y = {a_p:.6f}x^2 + {b_p:.6f}x + {c_p:.6f}")

In [None]:
y_hat_parabolic = a_p * x_A**2 + b_p * x_A + c_p

In [None]:
# Residuals
residuals_p = y_A - y_hat_parabolic

# SSE
SSE_parabolic = np.sum(residuals_p**2)

# R^2
R2_parabolic = 1 - np.sum((y_A - y_hat_parabolic)**2) / np.sum((y_A - np.mean(y_A))**2)

print(f"SSE (Parabolic) = {SSE_parabolic:.6f}")
print(f"R^2 (Parabolic) = {R2_parabolic:.6f}")

In [None]:
# Smooth x for nicer curve
x_smooth = np.linspace(min(x_A), max(x_A), 200)
y_smooth_parabolic = a_p * x_smooth**2 + b_p * x_smooth + c_p

plt.figure(figsize=(6, 4))
plt.scatter(x_A, y_A, color="black", label="Experimental data")
plt.plot(x_smooth, y_smooth_parabolic, color="blue", label="Parabolic fit")
plt.xlabel("x")
plt.ylabel("y")
plt.title("Parabolic Least Squares Fit (Data Set A)")
plt.grid(True)
plt.legend()
plt.show()

## Power Model (Least Squares)
Model:  y = a x^b
Linearized:  ln(y) = ln(a) + b ln(x)

In [None]:
ln_x = np.log(x_A)
ln_y = np.log(y_A)

coeff_power = np.polyfit(ln_x, ln_y, 1)

b_pow = coeff_power[0]
ln_a_pow = coeff_power[1]
a_pow = np.exp(ln_a_pow)

print(f"Power model: y = {a_pow:.6f} * x^{b_pow:.6f}")

In [None]:
y_hat_power = a_pow * (x_A ** b_pow)

SSE_power = np.sum((y_A - y_hat_power)**2)
R2_power = 1 - np.sum((y_A - y_hat_power)**2) / np.sum((y_A - np.mean(y_A))**2)

print(f"SSE (Power) = {SSE_power:.6f}")
print(f"R^2 (Power) = {R2_power:.6f}")

In [None]:
x_smooth = np.linspace(min(x_A), max(x_A), 200)
y_smooth_power = a_pow * (x_smooth ** b_pow)

plt.figure(figsize=(6, 4))
plt.scatter(x_A, y_A, color="black", label="Experimental data")
plt.plot(x_smooth, y_smooth_power, color="green", label="Power fit")
plt.xlabel("x")
plt.ylabel("y")
plt.title("Power Model Fit (Data Set A)")
plt.grid(True)
plt.legend()
plt.show()

## Exponential Model (Least Squares)
Model:  y = a e^{b x}
Linearized:  ln(y) = ln(a) + b x

In [None]:
ln_y = np.log(y_A)

coeff_exp = np.polyfit(x_A, ln_y, 1)

b_exp = coeff_exp[0]
ln_a_exp = coeff_exp[1]
a_exp = np.exp(ln_a_exp)

print(f"Exponential model: y = {a_exp:.6f} * e^({b_exp:.6f} x)")

In [None]:
y_hat_exp = a_exp * np.exp(b_exp * x_A)

SSE_exp = np.sum((y_A - y_hat_exp)**2)
R2_exp = 1 - np.sum((y_A - y_hat_exp)**2) / np.sum((y_A - np.mean(y_A))**2)

print(f"SSE (Exponential) = {SSE_exp:.6f}")
print(f"R^2 (Exponential) = {R2_exp:.6f}")

In [None]:
x_smooth = np.linspace(min(x_A), max(x_A), 200)
y_smooth_exp = a_exp * np.exp(b_exp * x_smooth)

plt.figure(figsize=(6, 4))
plt.scatter(x_A, y_A, color="black", label="Experimental data")
plt.plot(x_smooth, y_smooth_exp, color="purple", label="Exponential fit")
plt.xlabel("x")
plt.ylabel("y")
plt.title("Exponential Model Fit (Data Set A)")
plt.grid(True)
plt.legend()
plt.show()

## Logarithmic Model (Least Squares)
Model:  y = a + b ln(x)

In [None]:
ln_x = np.log(x_A)

coeff_log = np.polyfit(ln_x, y_A, 1)

b_log = coeff_log[0]
a_log = coeff_log[1]

print(f"Logarithmic model: y = {a_log:.6f} + {b_log:.6f} ln(x)")

In [None]:
y_hat_log = a_log + b_log * ln_x

SSE_log = np.sum((y_A - y_hat_log)**2)
R2_log = 1 - np.sum((y_A - y_hat_log)**2) / np.sum((y_A - np.mean(y_A))**2)

print(f"SSE (Logarithmic) = {SSE_log:.6f}")
print(f"R^2 (Logarithmic) = {R2_log:.6f}")

In [None]:
x_smooth = np.linspace(min(x_A), max(x_A), 200)
y_smooth_log = a_log + b_log * np.log(x_smooth)

plt.figure(figsize=(6, 4))
plt.scatter(x_A, y_A, color="black", label="Experimental data")
plt.plot(x_smooth, y_smooth_log, color="orange", label="Logarithmic fit")
plt.xlabel("x")
plt.ylabel("y")
plt.title("Logarithmic Model Fit (Data Set A)")
plt.grid(True)
plt.legend()
plt.show()

## Comparison of Models for Data Set A


In [None]:
import pandas as pd

results_A = pd.DataFrame({
    "Model": [
        "Linear",
        "Parabolic",
        "Power",
        "Exponential",
        "Logarithmic"
    ],
    "SSE": [
        SSE_linear,
        SSE_parabolic,
        SSE_power,
        SSE_exp,
        SSE_log
    ],
    "R^2": [
        R2_linear,
        R2_parabolic,
        R2_power,
        R2_exp,
        R2_log
    ]
})

results_A

### Conclusion (Data Set A)

Based on the comparison of all five models, the quality of approximation was evaluated using the Sum of Squared Errors (SSE) and the coefficient of determination (R²).

The linear model shows the largest error and the lowest R² value, indicating that a straight line is not sufficient to describe the nonlinear behavior of the data.

The parabolic, power, exponential, and logarithmic models provide a significantly better fit, with higher R² values and lower SSE. Among them, the model with the highest R² and the smallest SSE can be considered the most suitable for Data Set A.

Thus, nonlinear regression models are more appropriate for describing the given experimental data.


# Data Set B

In [None]:
x_B = np.array([10, 15, 22.5, 33.7, 50.6, 75.9])
y_B = np.array([0.3, 0.675, 1.519, 3.417, 7.639, 11.3])

print("Data Set B loaded")

In [None]:
plt.figure(figsize=(6, 4))
plt.scatter(x_B, y_B, color="black", label="Experimental data")
plt.xlabel("x")
plt.ylabel("y")
plt.title("Data Set B")
plt.grid(True)
plt.legend()
plt.show()