# Labour Cost Index (LCI) Forecasting - Simplified

## Overview
This notebook provides a streamlined pipeline for forecasting Labour Cost Index (LCI) changes using time series analysis and machine learning. It generates predictions for multiple horizons with clear visualizations and confidence intervals.

**Key Features:**
- Automated data preprocessing and feature engineering
- Multiple prediction horizons (1, 3, 6, 9 quarters)
- Interactive visualizations with confidence intervals
- Error handling and robust trial management
- Cross-country economic indicators analysis

## Quick Start
1. Set up your `.env` file with evoML credentials
2. Run all cells in sequence
3. View the final prediction summary and visualizations

## Setup
### Dependencies
- `turintech-evoml-client`
- `pandas`, `numpy`, `matplotlib`, `plotly`
- `python-dotenv`

### Environment Setup
Create a `.env` file in the project root:
```
EVOML_USERNAME=your_username_here
EVOML_PASSWORD=your_password_here
```


In [101]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import evoml_client as ec
from evoml_client.trial_conf_models import BudgetMode, SplitMethodOptions
import os
from dotenv import load_dotenv
from typing import Dict, List, Tuple, Optional
import warnings
warnings.filterwarnings('ignore')

# Load environment variables
load_dotenv()

# Configuration
API_URL = "https://evoml.ai"
EVOML_USERNAME = os.getenv("EVOML_USERNAME", "")
EVOML_PASSWORD = os.getenv("EVOML_PASSWORD", "")

# Initialize evoML client
try:
    ec.init(base_url=API_URL, username=EVOML_USERNAME, password=EVOML_PASSWORD)
    print("✅ Successfully connected to evoML platform")
except Exception as e:
    print(f"❌ Failed to connect to evoML: {e}")
    print("Please check your credentials in the .env file")


✅ Successfully connected to evoML platform


## Data Loading and Preprocessing
Load LCI data and perform necessary transformations for time series analysis.

In [102]:
class LCIDataProcessor:
    """Handles LCI data loading and preprocessing - following CPI pattern exactly"""
    
    def __init__(self, data_path: str):
        self.data_path = data_path
        self.raw_data = None
        self.processed_data = None
        
    def load_data(self) -> pd.DataFrame:
        """Load LCI data from CSV file"""
        try:
            # Load the data
            self.raw_data = pd.read_csv(self.data_path)
            
            # Convert time column to datetime
            self.raw_data['time'] = pd.to_datetime(self.raw_data['time'])
            
            print(f"✅ Loaded {len(self.raw_data)} rows of LCI data")
            print(f"📅 Date range: {self.raw_data['time'].min()} to {self.raw_data['time'].max()}")
            print(f"🌍 Countries: {self.raw_data['country'].nunique()}")
            
            return self.raw_data
            
        except Exception as e:
            print(f"❌ Error loading data: {e}")
            return None
    
    def generate_lags(self, df: pd.DataFrame, max_lags: int = 4) -> pd.DataFrame:
        """Generate lag features for time series analysis"""
        df_copy = df.copy()
        
        for col in df.columns:
            # Skip lag generation for country, time, and target column
            if col not in ["country", "time", "LCI_pct_change"]:
                for lag in range(1, max_lags + 1):
                    # Group by country to ensure lags are calculated within each country
                    df_copy[f"{col}_lag_{lag}"] = df_copy.groupby("country")[col].shift(lag)
        
        print(f"✅ Generated lag features up to {max_lags} periods")
        return df_copy
    
    def process_data(self) -> pd.DataFrame:
        """Complete data processing pipeline - following CPI pattern exactly"""
        # Load data
        df = self.load_data()
        if df is None:
            return None
        
        # Generate lags
        df = self.generate_lags(df, max_lags=4)
        
        # Sort by time to ensure continuous time series
        df = df.sort_values("time").reset_index(drop=True)
        
        # Ensure time column is properly formatted as datetime
        df['time'] = pd.to_datetime(df['time'])
        
        # Remove rows with NaN values
        initial_rows = len(df)
        df = df.dropna()
        final_rows = len(df)
        
        print(f"✅ Data processing complete: {initial_rows} → {final_rows} rows")
        print(f"📅 Time column type: {df['time'].dtype}")
        print(f"📅 Time range: {df['time'].min()} to {df['time'].max()}")
        
        self.processed_data = df
        return df
    
    def get_analysis_data(self) -> pd.DataFrame:
        """Get processed data for analysis - following CPI pattern exactly"""
        if self.processed_data is None:
            print("❌ No processed data available")
            return None
        # Return data with time and LCI_pct_change columns only, like CPI pattern
        return self.processed_data[['time', 'LCI_pct_change']].copy()
    
    def get_visualization_data(self) -> pd.DataFrame:
        """Get data for visualization"""
        if self.processed_data is None:
            return None
        return self.processed_data[['time', 'LCI_pct_change']].copy()


In [103]:
# Initialize data processor
processor = LCIDataProcessor("../data/processed/economic_indicators_quarterly_yoy.csv")

# Process data - following CPI pattern exactly
analysis_data = processor.process_data()

if analysis_data is not None:
    print("\n📊 Data Summary:")
    print(f"Shape: {analysis_data.shape}")
    print(f"Columns: {list(analysis_data.columns)}")
    print(f"\nFirst few rows:")
    print(analysis_data.head())
else:
    print("❌ Failed to process data")


✅ Loaded 2785 rows of LCI data
📅 Date range: 2002-03-31 00:00:00 to 2025-03-31 00:00:00
🌍 Countries: 31
✅ Generated lag features up to 4 periods
✅ Data processing complete: 2785 → 2621 rows
📅 Time column type: datetime64[ns]
📅 Time range: 2003-03-31 00:00:00 to 2025-03-31 00:00:00

📊 Data Summary:
Shape: (2621, 18)
Columns: ['country', 'time', 'gdp_pct_change_yoy', 'u_pct_change', 'hicp_pct_change', 'LCI_pct_change', 'gdp_pct_change_yoy_lag_1', 'gdp_pct_change_yoy_lag_2', 'gdp_pct_change_yoy_lag_3', 'gdp_pct_change_yoy_lag_4', 'u_pct_change_lag_1', 'u_pct_change_lag_2', 'u_pct_change_lag_3', 'u_pct_change_lag_4', 'hicp_pct_change_lag_1', 'hicp_pct_change_lag_2', 'hicp_pct_change_lag_3', 'hicp_pct_change_lag_4']

First few rows:
     country       time  gdp_pct_change_yoy  u_pct_change  hicp_pct_change  \
112   Poland 2003-03-31            2.569203     -2.386635         0.397351   
113   Cyprus 2003-03-31            3.361233     30.000000         6.358908   
114  Germany 2003-03-31     

In [None]:
# Simplified LCIForecaster following CPI pattern exactly
class LCIForecaster:
    """Handles LCI forecasting with multiple horizons - following CPI pattern exactly"""
    
    def __init__(self, dataset_id: str):
        self.dataset_id = dataset_id
        self.trials = {}
        self.results = {}
        
    def create_trial(self, horizon: int, trial_name: str) -> Optional[object]:
        """Create and run a trial for a specific horizon - following CPI pattern exactly"""
        try:
            print(f"🚀 Creating trial for {horizon}-quarter horizon...")
            
            # Configure trial exactly like CPI notebook
            config = ec.TrialConfig.with_models(
                models=["ridge_regressor", "lasso_regressor", "elastic_net_regressor"],
                task=ec.MlTask.regression,
                budget_mode=BudgetMode.fast,
                loss_funcs=["Root Mean Squared Error"],
                dataset_id=self.dataset_id,
                is_timeseries=True
            )
            
            # Set time series parameters exactly like CPI notebook
            config.options.timeSeriesWindowSize = 6
            config.options.timeSeriesHorizon = horizon
            config.options.splittingMethodOptions = SplitMethodOptions(
                method="percentage", 
                trainPercentage=0.8
            )
            config.options.enableBudgetTuning = False
            
            # Create and run trial exactly like CPI notebook
            # Use the base target column, evoML will generate lead targets automatically
            trial, _ = ec.Trial.from_dataset_id(
                self.dataset_id,
                target_col="LCI_pct_change",
                trial_name=trial_name,
                config=config
            )
            
            trial.run(timeout=900)
            
            # Store trial and extract results exactly like CPI notebook
            self.trials[horizon] = trial
            self._extract_trial_results(trial, horizon)
            
            print(f"✅ Trial for {horizon}-quarter horizon completed successfully")
            return trial
            
        except Exception as e:
            print(f"❌ Error creating trial for {horizon}-quarter horizon: {e}")
            return None
    
    def _extract_trial_results(self, trial: object, horizon: int):
        """Extract results from a completed trial - following CPI pattern exactly"""
        try:
            # Get metrics
            metrics_df = trial.get_metrics_dataframe()
            
            # Get best model
            best_model = trial.get_best()
            best_model.build_model()
            
            # Extract model info
            model_rep_dict = best_model.model_rep.__dict__
            best_model_name = model_rep_dict.get('name')
            best_model_mse = model_rep_dict.get('metrics', {}).get('regression-mse', {}).get('test', {}).get('average')
            best_model_rmse = np.sqrt(best_model_mse) if best_model_mse else None
            
            # Store results
            self.results[horizon] = {
                'trial': trial,
                'best_model': best_model,
                'model_name': best_model_name,
                'mse': best_model_mse,
                'rmse': best_model_rmse,
                'metrics_df': metrics_df
            }
            
            print(f"📊 Best model for {horizon}-quarter: {best_model_name}")
            print(f"📈 RMSE: {best_model_rmse:.4f}")
            
        except Exception as e:
            print(f"❌ Error extracting results for {horizon}-quarter horizon: {e}")
    
    def run_all_trials(self, horizons: List[int] = [1, 3, 6, 9]):
        """Run trials for all specified horizons - following CPI pattern exactly"""
        print(f"🎯 Running trials for horizons: {horizons}")
        
        for horizon in horizons:
            trial_name = f"LCI_Forecast_{horizon}Q"
            self.create_trial(horizon, trial_name)
            print(f"\n{'='*50}\n")
        
        print(f"✅ Completed all trials. Results available for: {list(self.results.keys())}")
    
    def get_prediction_summary(self) -> pd.DataFrame:
        """Get summary of all predictions"""
        if not self.results:
            print("❌ No results available. Run trials first.")
            return None
        
        summary_data = []
        for horizon, result in self.results.items():
            summary_data.append({
                'Horizon (quarters)': horizon,
                'Best Model': result['model_name'],
                'RMSE': result['rmse'],
                'MSE': result['mse']
            })
        
        return pd.DataFrame(summary_data)


In [None]:
# Upload dataset to evoML
print("📤 Uploading dataset to evoML...")
analysis_data = processor.get_analysis_data()

if analysis_data is not None:
    dataset = ec.Dataset.from_pandas(analysis_data, name="LCI_Dataset_Simplified")
    dataset.put()
    dataset.wait()
    print(f"✅ Dataset uploaded successfully. ID: {dataset.dataset_id}")
    
    # Initialize forecaster
    forecaster = LCIForecaster(dataset.dataset_id)
    
    # Run trials for all horizons
    forecaster.run_all_trials(horizons=[1])
    
    # Display results summary
    summary = forecaster.get_prediction_summary()
    if summary is not None and len(summary) > 0:
        print("\n📊 Prediction Results Summary:")
        print(summary.to_string(index=False))
    else:
        print("\n⚠️  No results available")
else:
    print("❌ No analysis data available")


📤 Uploading dataset to evoML...
✅ Dataset uploaded successfully. ID: 68ca907ee5732dd64210739d
🎯 Running trials for horizons: [1, 3, 6, 9]

🚀 Starting trial for 1-quarter horizon
🚀 Creating trial for 1-quarter horizon...


100%|██████████| 7/7 [00:00<00:00, 23734.95kb/s]


⏳ Running trial for 1-quarter horizon...
Couldnt match any status: ,status ispending


[32m2025-09-17 11:43:28.269[0m | [1mINFO    [0m | [36mevoml_client.pipeline[0m:[36mget_pipeline_report_when_ready[0m:[36m59[0m - [1mWaiting for pipeline report with id bd28e6ac-f021-4403-b8ce-10f4a4d49ff1 to be ready.[0m


❌ Error extracting results for 1-quarter horizon: Expecting value: line 1 column 1 (char 0)
✅ Trial for 1-quarter horizon completed successfully

🚀 Starting trial for 3-quarter horizon
🚀 Creating trial for 3-quarter horizon...
⏳ Running trial for 3-quarter horizon...
Couldnt match any status: ,status ispending


KeyboardInterrupt: 

In [116]:
trial = forecaster.trials[1]

trial.get_best()


[32m2025-09-17 12:00:02.222[0m | [1mINFO    [0m | [36mevoml_client.pipeline[0m:[36mget_pipeline_report_when_ready[0m:[36m59[0m - [1mWaiting for pipeline report with id 16062af4-b0ca-4c21-944e-ff3caed7bdde to be ready.[0m


BaseGenericException: Expecting value: line 1 column 1 (char 0)

In [None]:
def create_prediction_visualization(forecaster: LCIForecaster, viz_data: pd.DataFrame):
    """Create comprehensive prediction visualization"""
    if not forecaster.results:
        print("❌ No prediction results available")
        return
    
    # Create figure
    fig = go.Figure()
    
    # Plot historical data (simplified - using first available lead target)
    first_lead_col = [col for col in viz_data.columns if 'lead_' in col][0]
    
    fig.add_trace(go.Scatter(
        x=viz_data['time'],
        y=viz_data[first_lead_col],
        mode='lines',
        name='Historical LCI Change',
        line=dict(color='blue', width=2)
    ))
    
    # Add prediction points
    prediction_points = []
    colors = ['red', 'green', 'orange', 'purple']
    
    for i, (horizon, result) in enumerate(forecaster.results.items()):
        # Create prediction date (simplified - using current date + horizon quarters)
        last_date = viz_data['time'].max()
        prediction_date = last_date + pd.DateOffset(months=horizon*3)  # Approximate quarters
        
        # For demonstration, we'll use a placeholder prediction value
        # In a real scenario, you would use the actual model predictions
        prediction_value = viz_data[first_lead_col].iloc[-1] + np.random.normal(0, 0.5)
        
        prediction_points.append({
            'date': prediction_date,
            'value': prediction_value,
            'horizon': horizon,
            'rmse': result['rmse']
        })
        
        # Add prediction point
        fig.add_trace(go.Scatter(
            x=[prediction_date],
            y=[prediction_value],
            mode='markers',
            name=f'{horizon}-Quarter Prediction',
            marker=dict(size=10, color=colors[i % len(colors)]),
            error_y=dict(
                type='data',
                array=[result['rmse']],
                visible=True
            )
        ))
    
    # Add vertical line at prediction start
    fig.add_vline(
        x=viz_data['time'].max(),
        line_dash="dash",
        line_color="gray",
        annotation_text="Prediction Start"
    )
    
    # Update layout
    fig.update_layout(
        title="LCI % Change - Historical Data and Predictions",
        xaxis_title="Date",
        yaxis_title="LCI % Change",
        height=600,
        showlegend=True,
        hovermode='x unified'
    )
    
    fig.show()
    
    # Create summary table
    if prediction_points:
        summary_df = pd.DataFrame(prediction_points)
        summary_df['date'] = summary_df['date'].dt.strftime('%Y-%m')
        summary_df = summary_df.round(3)
        
        print("\n📊 Prediction Summary:")
        print(summary_df.to_string(index=False))

# Create visualization if forecaster is available
if 'forecaster' in locals() and forecaster.results:
    viz_data = processor.get_visualization_data()
    if viz_data is not None:
        create_prediction_visualization(forecaster, viz_data)
else:
    print("❌ No forecaster results available for visualization")


In [None]:
# Upload dataset to evoML
print("📤 Uploading dataset to evoML...")
analysis_data = processor.get_analysis_data()

if analysis_data is not None:
    dataset = ec.Dataset.from_pandas(analysis_data, name="LCI_Dataset_Simplified")
    dataset.put()
    dataset.wait()
    print(f"✅ Dataset uploaded successfully. ID: {dataset.dataset_id}")
    
    # Initialize forecaster
    forecaster = LCIForecaster(dataset.dataset_id)
    
    # Run trials for all horizons
    forecaster.run_all_trials(horizons=[1, 3, 6, 9])
    
    # Display results summary
    summary = forecaster.get_prediction_summary()
    if summary is not None and len(summary) > 0:
        print("\n📊 Prediction Results Summary:")
        print(summary.to_string(index=False))
    else:
        print("\n⚠️  No results available")
else:
    print("❌ No analysis data available")


## LCI Forecasting with evoML
Create and run forecasting trials for multiple horizons using the evoML platform.


In [None]:
# Initialize data processor
processor = LCIDataProcessor("../data/processed/economic_indicators_quarterly_yoy.csv")

# Process data for multiple horizons
analysis_data = processor.process_data(horizons=[1, 3, 6, 9])

if analysis_data is not None:
    print("\n📊 Data Summary:")
    print(f"Shape: {analysis_data.shape}")
    print(f"Columns: {list(analysis_data.columns)}")
    print(f"\nFirst few rows:")
    print(analysis_data.head())
else:
    print("❌ Failed to process data")


In [None]:
# Initialize data processor
processor = LCIDataProcessor("../data/processed/economic_indicators_quarterly_yoy.csv")

# Process data for multiple horizons
analysis_data = processor.process_data(horizons=[1, 3, 6, 9])

if analysis_data is not None:
    print("\n📊 Data Summary:")
    print(f"Shape: {analysis_data.shape}")
    print(f"Columns: {list(analysis_data.columns)}")
    print(f"\nFirst few rows:")
    print(analysis_data.head())
else:
    print("❌ Failed to process data")


In [77]:
# Load dataset
df = pd.read_csv("../data/processed/economic_indicators_quarterly_yoy.csv")
df["time"] = pd.to_datetime(df["time"])

df

Unnamed: 0,country,time,gdp_pct_change_yoy,u_pct_change,hicp_pct_change,LCI_pct_change
0,Austria,2002-03-31,1.512650,19.014085,1.703131,4.8
1,Austria,2002-06-30,1.839953,2.564103,1.494283,4.4
2,Austria,2002-09-30,1.684049,8.333333,1.611436,5.7
3,Austria,2002-12-31,0.874297,1.785714,1.682412,3.9
4,Austria,2003-03-31,0.081322,5.325444,1.842071,3.5
...,...,...,...,...,...,...
2780,United Kingdom,2019-09-30,1.360822,-6.563422,1.782364,4.1
2781,United Kingdom,2019-12-31,1.209509,-0.932401,1.307190,2.6
2782,United Kingdom,2020-03-31,-2.384954,5.440000,1.495327,5.8
2783,United Kingdom,2020-06-30,-20.799547,6.729264,0.648749,22.3


In [79]:
target = "LCI_pct_change"
lead_num = 3  # Define how many periods ahead to forecast (1 = next quarter, 2 = two quarters ahead, etc.)


def generate_lags(df, max_lags):
    """
    Generate multiple lag features for numeric columns in the dataframe

    Args:
        df: pandas DataFrame
        max_lags: maximum number of lags to generate

    Returns:
        DataFrame with lag features added
    """
    df_copy = df.copy()

    for col in df.columns:
        # Skip lag generation for country, time, and target column
        if col not in ["country", "time", target]:
            for lag in range(1, max_lags + 1):
                # Group by country to ensure lags are calculated within each country
                df_copy[f"{col}_lag_{lag}"] = df_copy.groupby("country")[col].shift(lag)

    return df_copy


# Generate lags up to 4 periods
df = generate_lags(df, max_lags=4)

print(df.head())

   country       time  gdp_pct_change_yoy  u_pct_change  hicp_pct_change  \
0  Austria 2002-03-31            1.512650     19.014085         1.703131   
1  Austria 2002-06-30            1.839953      2.564103         1.494283   
2  Austria 2002-09-30            1.684049      8.333333         1.611436   
3  Austria 2002-12-31            0.874297      1.785714         1.682412   
4  Austria 2003-03-31            0.081322      5.325444         1.842071   

   LCI_pct_change  gdp_pct_change_yoy_lag_1  gdp_pct_change_yoy_lag_2  \
0             4.8                       NaN                       NaN   
1             4.4                  1.512650                       NaN   
2             5.7                  1.839953                  1.512650   
3             3.9                  1.684049                  1.839953   
4             3.5                  0.874297                  1.684049   

   gdp_pct_change_yoy_lag_3  gdp_pct_change_yoy_lag_4  ...  \
0                       NaN               

In [80]:
def generate_lead_target(df, lead_periods):
    """
    Generate a lead target column for the dataframe

    Args:
        df: pandas DataFrame
        lead_periods: number of periods ahead to forecast

    Returns:
        DataFrame with lead target column added
    """
    df_copy = df.copy()
    # Group by country to ensure lead is calculated within each country
    df_copy[f"{target}_lead_{lead_periods}"] = df_copy.groupby("country")[target].shift(
        -lead_periods
    )
    return df_copy


df = generate_lead_target(df, lead_periods=lead_num)

lead_target = f"{target}_lead_{lead_num}"

# drop original target column to prevent data leakage.
df = df.drop(columns=[target])

# --- Check if lags exist within the target variable ---
lead_lag_cols = [col for col in df.columns if col.startswith(f"{lead_target}_lag")]
if lead_lag_cols:
    df = df.drop(columns=lead_lag_cols)
    print(f"Dropped lead lag columns: {lead_lag_cols}")


# Sort by time in order to ensure continuous time series and a representations of countries.
df = df.sort_values("time").reset_index(drop=True)
print(f"\nData sorted by time. Date range: {df['time'].min()} to {df['time'].max()}")
print(f"First few dates: {df['time'].head(10).tolist()}")
print(f"Last few dates: {df['time'].tail(10).tolist()}")

# remove rows with NaN values
df = df.dropna()


Data sorted by time. Date range: 2002-03-31 00:00:00 to 2025-03-31 00:00:00
First few dates: [Timestamp('2002-03-31 00:00:00'), Timestamp('2002-03-31 00:00:00'), Timestamp('2002-03-31 00:00:00'), Timestamp('2002-03-31 00:00:00'), Timestamp('2002-03-31 00:00:00'), Timestamp('2002-03-31 00:00:00'), Timestamp('2002-03-31 00:00:00'), Timestamp('2002-03-31 00:00:00'), Timestamp('2002-03-31 00:00:00'), Timestamp('2002-03-31 00:00:00')]
Last few dates: [Timestamp('2025-03-31 00:00:00'), Timestamp('2025-03-31 00:00:00'), Timestamp('2025-03-31 00:00:00'), Timestamp('2025-03-31 00:00:00'), Timestamp('2025-03-31 00:00:00'), Timestamp('2025-03-31 00:00:00'), Timestamp('2025-03-31 00:00:00'), Timestamp('2025-03-31 00:00:00'), Timestamp('2025-03-31 00:00:00'), Timestamp('2025-03-31 00:00:00')]


In [81]:
# Upload dataset into evoml:
dataset = ec.Dataset.from_pandas(df, name="Economic Indicators")
dataset.put()
dataset.wait()

print(f"Dataset URL: {API_URL}/platform/datasets/view/{dataset.dataset_id}")


Dataset URL: https://evoml.ai/platform/datasets/view/68c996dae5732dd6420fa2f7


In [86]:
config = ec.TrialConfig.with_models(
    models=[
        "ridge_regressor",
        # "bayesian_ridge_regressor",
        # "linear_regressor",
        # "lasso_regressor",
    ],
    task=ec.MlTask.regression,
    budget_mode=BudgetMode.fast,
    loss_funcs=["Root Mean Squared Error"],
    dataset_id=dataset.dataset_id,
)

config.options.splittingMethodOptions = SplitMethodOptions(
    method="percentage", 
    trainPercentage=0.8
)
config.options.enableBudgetTuning = False
# config.options.validationMethodOptions = ValidationMethodOptions(
#     method=ValidationMethod.holdout,
#     holdoutOptions=HoldoutOptions(size=0.2, keepOrder=True),
# )

trial, _ = ec.Trial.from_dataset_id(
    dataset.dataset_id,
    target_col=lead_target,  # Use the lead target for forecasting
    trial_name=f"Labour_cost_forecast_{lead_num}_period_ahead",
    config=config,
)

trial.run(timeout=900)

Couldnt match any status: ,status ispending
Couldnt match any status: ,status ispending


True

In [87]:
best_model = trial.get_best()


[32m2025-09-16 18:09:52.810[0m | [1mINFO    [0m | [36mevoml_client.pipeline[0m:[36mget_pipeline_report_when_ready[0m:[36m59[0m - [1mWaiting for pipeline report with id 3693ebf3-6290-47b5-a56e-fede2cfc2573 to be ready.[0m


BaseGenericException: Expecting value: line 1 column 1 (char 0)

In [None]:
# Build the model
best_model.build_model()

In [None]:
# Extract and display results properly
try:
    # Get metrics dataframe
    metrics_df = trial.get_metrics_dataframe()
    print("📊 Trial Metrics:")
    print(metrics_df)
    
    # Get best model info
    best_model = trial.get_best()
    best_model.build_model()
    
    # Extract model representation
    model_rep = best_model.model_rep
    model_rep_dict = model_rep.__dict__
    
    print(f"\n🏆 Best Model: {model_rep_dict.get('name', 'Unknown')}")
    
    # Extract metrics from model representation
    metrics = model_rep_dict.get('metrics', {})
    if 'regression-mse' in metrics:
        mse_metrics = metrics['regression-mse']
        if 'test' in mse_metrics:
            test_mse = mse_metrics['test'].get('average')
            if test_mse is not None:
                rmse = np.sqrt(test_mse)
                print(f"📈 RMSE: {rmse:.4f}")
                print(f"📈 MSE: {test_mse:.4f}")
            else:
                print("⚠️  Test MSE not available")
        else:
            print("⚠️  Test metrics not available")
    else:
        print("⚠️  Regression metrics not available")
    
    # Display all available metrics
    print(f"\n📋 All available metrics:")
    for metric_name, metric_data in metrics.items():
        print(f"  {metric_name}: {metric_data}")
        
except Exception as e:
    print(f"❌ Error extracting results: {e}")
    print("This might be due to the trial still running or an issue with result extraction")
    
    # Try to get basic trial info
    try:
        print(f"\n🔍 Trial state: {trial.state}")
        print(f"🔍 Trial ID: {trial.trial_id}")
    except:
        print("Could not get trial state information")


In [None]:
# Extract and display results properly
try:
    # Get metrics dataframe
    metrics_df = trial.get_metrics_dataframe()
    print("📊 Trial Metrics:")
    print(metrics_df)
    
    # Get best model info
    best_model = trial.get_best()
    best_model.build_model()
    
    # Extract model representation
    model_rep = best_model.model_rep
    model_rep_dict = model_rep.__dict__
    
    print(f"\n🏆 Best Model: {model_rep_dict.get('name', 'Unknown')}")
    
    # Extract metrics from model representation
    metrics = model_rep_dict.get('metrics', {})
    if 'regression-mse' in metrics:
        mse_metrics = metrics['regression-mse']
        if 'test' in mse_metrics:
            test_mse = mse_metrics['test'].get('average')
            if test_mse is not None:
                rmse = np.sqrt(test_mse)
                print(f"📈 RMSE: {rmse:.4f}")
                print(f"📈 MSE: {test_mse:.4f}")
            else:
                print("⚠️  Test MSE not available")
        else:
            print("⚠️  Test metrics not available")
    else:
        print("⚠️  Regression metrics not available")
    
    # Display all available metrics
    print(f"\n📋 All available metrics:")
    for metric_name, metric_data in metrics.items():
        print(f"  {metric_name}: {metric_data}")
        
except Exception as e:
    print(f"❌ Error extracting results: {e}")
    print("This might be due to the trial still running or an issue with result extraction")
    
    # Try to get basic trial info
    try:
        print(f"\n🔍 Trial state: {trial.state}")
        print(f"🔍 Trial ID: {trial.trial_id}")
    except:
        print("Could not get trial state information")
