In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from utils.ZScoreScaler import ZScoreScaler
from utils.LinearRegressionModel import LinearRegressionModel

In [None]:
df = pd.read_csv('./data/data.csv')
df

In [None]:
model = LinearRegressionModel(epoch=1000)

In [None]:
x_label = 'km'
y_label = 'price'

scaler_x = ZScoreScaler()
scaler_y = ZScoreScaler()

scaler_x.fit(df[x_label])
scaler_y.fit(df[y_label])

df[x_label] = scaler_x.transform(df[x_label])
df[y_label] = scaler_y.transform(df[y_label])

model = LinearRegressionModel(epoch=1000)
model.fit(df[x_label], df[y_label])

In [None]:
input_value = 240000

predicted_value = model.predict(scaler_x.transform(input_value))
inverse_transform_output = scaler_y.inverse_transform(predicted_value)

print(f'Predicted: {predicted_value} transform backed to {inverse_transform_output}')

In [None]:
print(f'f(x) = {model.get_slope()}x + {model.get_y_intercept()}')

In [None]:
model.get_history()

In [None]:
def plot_linear_regression_results(df, model, scaler_x, scaler_y, x_label, y_label, title="Linear Regression Results", figsize=(10, 6)):
    
    fig, ax = plt.subplots(figsize=figsize)
    
    x_scaled = df[x_label].values
    y_scaled = df[y_label].values
    
    x_original = scaler_x.inverse_transform(x_scaled)
    y_original = scaler_y.inverse_transform(y_scaled)
    
    ax.scatter(x_original, y_original, alpha=0.6, color='blue', label='Data Points')
    
    x_range_scaled = np.linspace(x_scaled.min(), x_scaled.max(), 100)
    y_pred_scaled = model.predict(x_range_scaled)
    
    x_range_original = scaler_x.inverse_transform(x_range_scaled)
    y_pred_original = scaler_y.inverse_transform(y_pred_scaled)
    
    ax.plot(x_range_original, y_pred_original, color='red', linewidth=2, 
            label=f'Fitted Line (y = {scaler_x.inverse_transform(model.get_slope()):.2f}x + {scaler_y.inverse_transform(model.get_y_intercept()):.2f})')
    
    ax.set_xlabel(x_label.title())
    ax.set_ylabel(y_label.title())
    ax.set_title(title)
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    # Get final metrics for display
    history = model.get_history()
    final_mse = history['MSE'].iloc[-1]
    final_rmse = history['RMSE'].iloc[-1] if 'RMSE' in history.columns else None
    final_mae = history['MAE'].iloc[-1] if 'MAE' in history.columns else None
    final_r2 = history['R^2'].iloc[-1] if 'R^2' in history.columns else None

    # Create metrics text
    metrics_text = f'Final MSE: {final_mse:.6f}'

    if final_rmse is not None:
        metrics_text += f'\nFinal RMSE: {final_rmse:.6f}'
    
    if final_mae is not None:
        metrics_text += f'\nFinal MAE: {final_mae:.6f}'

    if final_r2 is not None:
        metrics_text += f'\nR^2: {final_r2:.4f}'

    ax.text(0.02, 0.98, metrics_text, 
        transform=ax.transAxes, verticalalignment='top',
        bbox=dict(boxstyle='round', facecolor='white', alpha=0.8)
    )
    
    plt.tight_layout()
    
    return fig, ax


def plot_training_history(model, figsize=(15, 10)):

    history = model.get_history()
    epochs = range(1, len(history) + 1)

    available_metrics = []
    metric_colors = {
        'MSE': 'green',
        'RMSE': 'orange', 
        'MAE': 'purple',
        'MAPE': 'brown',
        'R^2': 'pink',
        'Huber_Loss': 'cyan'
    }
    
    for metric in metric_colors.keys():
        if metric in history.columns:
            available_metrics.append(metric)

    n_metrics = len(available_metrics)
    n_param_plots = 2

    total_plots = n_metrics + n_param_plots
    
    # Determine optimal subplot arrangement
    if total_plots <= 3:
        rows, cols = 1, total_plots
        fig_height = 5

    elif total_plots <= 6:
        rows, cols = 2, 3
        fig_height = 8

    else:
        rows, cols = 3, 3
        fig_height = 12
    
    fig, axes = plt.subplots(rows, cols, figsize=(figsize[0], fig_height))
    
    # Handle case when there's only one subplot
    if total_plots == 1:
        axes = [axes]

    elif rows == 1:
        axes = axes if hasattr(axes, '__len__') else [axes]

    else:
        axes = axes.flatten()
    
    plot_idx = 0

    for metric in available_metrics:
        ax = axes[plot_idx]
        ax.plot(epochs, history[metric], color=metric_colors[metric], linewidth=2)
        ax.set_title(f'Training {metric}')
        ax.set_xlabel('Epoch')
        ax.set_ylabel(metric)
        ax.grid(True, alpha=0.3)
        
        # Add final value annotation
        final_value = history[metric].iloc[-1]
        if metric == 'MAPE':
            ax.text(0.02, 0.95, f'Final: {final_value:.2f}%', 
                   transform=ax.transAxes, verticalalignment='top',
                   bbox=dict(boxstyle='round', facecolor='white', alpha=0.7, pad=0.3))
        elif metric == 'R^2':
            ax.text(0.02, 0.95, f'Final: {final_value:.4f}', 
                   transform=ax.transAxes, verticalalignment='top',
                   bbox=dict(boxstyle='round', facecolor='white', alpha=0.7, pad=0.3))
        else:
            ax.text(0.02, 0.95, f'Final: {final_value:.6f}', 
                   transform=ax.transAxes, verticalalignment='top',
                   bbox=dict(boxstyle='round', facecolor='white', alpha=0.7, pad=0.3))
        
        plot_idx += 1
    
    # Plot slope history
    if plot_idx < len(axes):
        ax = axes[plot_idx]
        ax.plot(epochs, history['slope'], color='blue', linewidth=2)
        ax.set_title('Slope Evolution')
        ax.set_xlabel('Epoch')
        ax.set_ylabel('Slope (θ₁)')
        ax.grid(True, alpha=0.3)
        
        final_slope = history['slope'].iloc[-1]
        ax.text(0.02, 0.95, f'Final: {final_slope:.6f}', 
               transform=ax.transAxes, verticalalignment='top',
               bbox=dict(boxstyle='round', facecolor='white', alpha=0.7, pad=0.3))
        
        plot_idx += 1
    
    # Plot y-intercept history
    if plot_idx < len(axes):
        ax = axes[plot_idx]
        ax.plot(epochs, history['y_intercept'], color='red', linewidth=2)
        ax.set_title('Y-Intercept Evolution')
        ax.set_xlabel('Epoch')
        ax.set_ylabel('Y-Intercept (θ₀)')
        ax.grid(True, alpha=0.3)
        
        final_intercept = history['y_intercept'].iloc[-1]
        ax.text(0.02, 0.95, f'Final: {final_intercept:.6f}', 
               transform=ax.transAxes, verticalalignment='top',
               bbox=dict(boxstyle='round', facecolor='white', alpha=0.7, pad=0.3))
        
        plot_idx += 1
    
    # Hide any unused subplots
    for i in range(plot_idx, len(axes)):
        axes[i].set_visible(False)
    
    plt.tight_layout()
    return fig, axes


def plot_error_comparison(model, figsize=(12, 8)):
    """
    Plot multiple error metrics on the same chart for easy comparison
    """
    history = model.get_history()
    epochs = range(1, len(history) + 1)
    
    # Define which metrics to compare (exclude R^2 as it has different scale)
    error_metrics = ['MSE', 'RMSE', 'MAE', 'Huber_Loss']
    available_error_metrics = [m for m in error_metrics if m in history.columns]
    
    if len(available_error_metrics) < 2:
        print("Not enough error metrics available for comparison plot")
        return None, None
    
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=figsize)
    
    # Plot 1: All error metrics
    colors = ['green', 'orange', 'purple', 'cyan']
    for i, metric in enumerate(available_error_metrics):
        ax1.plot(epochs, history[metric], color=colors[i % len(colors)], 
                linewidth=2, label=metric)
    
    ax1.set_title('Error Metrics Comparison')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Error Value')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    ax1.set_yscale('log')  # Log scale for better visualization
    
    # Plot 2: R^2 if available
    if 'R^2' in history.columns:
        ax2.plot(epochs, history['R^2'], color='pink', linewidth=2)
        ax2.set_title('R^2 Score Evolution')
        ax2.set_xlabel('Epoch')
        ax2.set_ylabel('R^2 Score')
        ax2.grid(True, alpha=0.3)
        ax2.axhline(y=0, color='k', linestyle='--', alpha=0.5)
        ax2.axhline(y=1, color='k', linestyle='--', alpha=0.5)
    else:
        # Plot MAPE if R^2 not available
        if 'MAPE' in history.columns:
            ax2.plot(epochs, history['MAPE'], color='brown', linewidth=2)
            ax2.set_title('MAPE Evolution')
            ax2.set_xlabel('Epoch')
            ax2.set_ylabel('MAPE (%)')
            ax2.grid(True, alpha=0.3)
        else:
            ax2.text(0.5, 0.5, 'No additional metrics\navailable for plotting', 
                    ha='center', va='center', transform=ax2.transAxes)
            ax2.set_title('Additional Metrics')
    
    plt.tight_layout()
    return fig, (ax1, ax2)



In [None]:
plot_linear_regression_results(
    df, model, scaler_x, scaler_y, x_label, y_label,
    title="Car Price vs Kilometers - Linear Regression"
)

In [None]:
plot_training_history(model)

In [None]:
plot_error_comparison(model)