In [1]:
import numpy as np
import pandas as pd
import time
import psutil
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from joblib import dump
import os
from sklearn.multioutput import MultiOutputRegressor

# Load datasets
actual_data = pd.read_csv('original_CMKL1.csv')
synthetic_data_ml = pd.read_csv('synthetic_data_RandomSeaerch_Ensemble.csv')
synthetic_data_gan = pd.read_csv('GANs_synthetic_data.csv')

# Combine synthetic datasets for the ML+GAN case
synthetic_data_combined = pd.concat([synthetic_data_ml, synthetic_data_gan])

# Create directory for saving models
if not os.path.exists('NEW_senior_saved_model'):
    os.makedirs('NEW_senior_saved_model')

# Define models and hyperparameters for grid search
classification_models = {
    'RandomForestClassifier': {
        'model': RandomForestClassifier(random_state=42),
        'params': {
            'n_estimators': [100, 150, 200],
            'max_depth': [10, 15, 20],
            'min_samples_split': [2, 5, 10]
        }
    },
    'LogisticRegression': {
        'model': LogisticRegression(random_state=42, max_iter=1000),
        'params': {
            'C': [0.01, 0.1, 1, 10, 100]
        }
    },
    'DecisionTreeClassifier': {
        'model': DecisionTreeClassifier(random_state=42),
        'params': {
            'max_depth': [10, 15, 20],
            'min_samples_split': [2, 5, 10]
        }
    },
    'GradientBoostingClassifier': {
        'model': GradientBoostingClassifier(random_state=42),
        'params': {
            'n_estimators': [100, 150, 200],
            'learning_rate': [0.01, 0.1, 0.2],
            'max_depth': [3, 5, 7]
        }
    },
    'KNeighborsClassifier': {
        'model': KNeighborsClassifier(),
        'params': {
            'n_neighbors': [3, 5, 7, 10],
            'weights': ['uniform', 'distance']
        }
    }
}

regression_models = {
    'RandomForestRegressor': {
        'model': MultiOutputRegressor(RandomForestRegressor(random_state=42)),
        'params': {
            'estimator__n_estimators': [100, 150, 200],
            'estimator__max_depth': [10, 15, 20],
            'estimator__min_samples_split': [2, 5, 10]
        }
    },
    'LinearRegression': {
        'model': MultiOutputRegressor(LinearRegression()),
        'params': {
            'estimator__fit_intercept': [True, False]
        }
    },
    'DecisionTreeRegressor': {
        'model': MultiOutputRegressor(DecisionTreeRegressor(random_state=42)),
        'params': {
            'estimator__max_depth': [10, 15, 20],
            'estimator__min_samples_split': [2, 5, 10]
        }
    },
    'GradientBoostingRegressor': {
        'model': MultiOutputRegressor(GradientBoostingRegressor(random_state=42)),
        'params': {
            'estimator__n_estimators': [100, 150, 200],
            'estimator__learning_rate': [0.01, 0.1, 0.2],
            'estimator__max_depth': [3, 5, 7]
        }
    },
    'KNeighborsRegressor': {
        'model': KNeighborsRegressor(),
        'params': {
            'n_neighbors': [3, 5, 7, 10],
            'weights': ['uniform', 'distance']
        }
    }
}

# Function to create combined dataset with given ratio for each floor
def create_combined_dataset(actual_data, synthetic_data, ratio):
    combined_data = pd.DataFrame()
    floors = actual_data['z'].unique()
    for floor in floors:
        actual_floor_data = actual_data[actual_data['z'] == floor]
        synthetic_floor_data = synthetic_data[synthetic_data['z'] == floor]
        
        n_actual = int(len(actual_floor_data) * ratio)
        n_synthetic = len(actual_floor_data) - n_actual
        
        actual_sample = actual_floor_data.sample(n_actual, random_state=42)
        synthetic_sample = synthetic_floor_data.sample(n_synthetic, random_state=42)
        
        combined_floor_data = pd.concat([actual_sample, synthetic_sample])
        combined_data = pd.concat([combined_data, combined_floor_data], ignore_index=True)
        
    return combined_data

# Function to perform grid search and train the model
def train_model_with_grid_search(X_train, y_train, model_config):
    grid_search = GridSearchCV(model_config['model'], model_config['params'], cv=10, scoring='neg_mean_squared_error', n_jobs=-1)
    grid_search.fit(X_train, y_train)
    return grid_search.best_estimator_, -grid_search.best_score_

# Function to calculate Mean Distance Error
def mean_distance_error(y_true, y_pred):
    return np.mean(np.sqrt(np.sum((y_true - y_pred) ** 2, axis=1)))

# Train and evaluate models
ratios = [0.9, 0.8, 0.7, 0.6, 0.5]
datasets = {
    'actual_ml': synthetic_data_ml,
    'actual_gan': synthetic_data_gan,
    'actual_ml_gan': synthetic_data_combined
}
results = {}

for dataset_name, synthetic_data in datasets.items():
    for ratio in ratios:
        combined_data = create_combined_dataset(actual_data, synthetic_data, ratio)
        
        # Floor classification training and evaluation
        X_train, X_test, y_train, y_test = train_test_split(combined_data[[f'RSSI{i+1}' for i in range(18)]], combined_data['z'], test_size=0.3, random_state=42)
        for model_name, model_config in classification_models.items():
            start_time = time.time()
            process = psutil.Process()
            initial_memory = process.memory_info().rss / (1024 ** 2)  # Convert to MB
            
            best_model, best_score = train_model_with_grid_search(X_train, y_train, model_config)
            
            training_time = time.time() - start_time
            training_memory = (process.memory_info().rss / (1024 ** 2)) - initial_memory
            
            start_time = time.time()
            initial_memory = process.memory_info().rss / (1024 ** 2)
            y_pred = best_model.predict(X_test)
            
            prediction_time = time.time() - start_time
            prediction_memory = (process.memory_info().rss / (1024 ** 2)) - initial_memory
            
            mae = mean_absolute_error(y_test, y_pred)
            
            results[f'{dataset_name}_{ratio}_{model_name}_floor'] = {
                'Mean Absolute Error': mae,
                'Training Time (s)': training_time,
                'Prediction Time (s)': prediction_time,
                'Memory Usage During Training (MB)': training_memory,
                'Memory Usage During Prediction (MB)': prediction_memory
            }
            dump(best_model, f'NEW_senior_saved_model/{dataset_name}_{ratio}_{model_name}_floor.joblib')
        
        # Coordinate regression training and evaluation
        X_train, X_test, y_train, y_test = train_test_split(combined_data[[f'RSSI{i+1}' for i in range(18)]], combined_data[['x', 'y']], test_size=0.3, random_state=42)
        for model_name, model_config in regression_models.items():
            start_time = time.time()
            process = psutil.Process()
            initial_memory = process.memory_info().rss / (1024 ** 2)  # Convert to MB
            
            best_model, best_score = train_model_with_grid_search(X_train, y_train, model_config)
            
            training_time = time.time() - start_time
            training_memory = (process.memory_info().rss / (1024 ** 2)) - initial_memory
            
            start_time = time.time()
            initial_memory = process.memory_info().rss / (1024 ** 2)
            y_pred = best_model.predict(X_test)
            
            prediction_time = time.time() - start_time
            prediction_memory = (process.memory_info().rss / (1024 ** 2)) - initial_memory
            
            mde = mean_distance_error(y_test, y_pred)
            
            results[f'{dataset_name}_{ratio}_{model_name}_coord'] = {
                'Mean Distance Error (meters)': mde,
                'Training Time (s)': training_time,
                'Prediction Time (s)': prediction_time,
                'Memory Usage During Training (MB)': training_memory,
                'Memory Usage During Prediction (MB)': prediction_memory
            }
            dump(best_model, f'NEW_senior_saved_model/{dataset_name}_{ratio}_{model_name}_coord.joblib')

# Print results
for key, metrics in results.items():
    print(f'Results for {key}:')
    for metric_name, value in metrics.items():
        print(f'  {metric_name}: {value}')


Results for actual_ml_0.9_RandomForestClassifier_floor:
  Mean Absolute Error: 0.0
  Training Time (s): 3.494168519973755
  Prediction Time (s): 0.0050525665283203125
  Memory Usage During Training (MB): 1.61328125
  Memory Usage During Prediction (MB): 0.02734375
Results for actual_ml_0.9_LogisticRegression_floor:
  Mean Absolute Error: 0.0
  Training Time (s): 0.07499814033508301
  Prediction Time (s): 0.0
  Memory Usage During Training (MB): 0.3515625
  Memory Usage During Prediction (MB): 0.00390625
Results for actual_ml_0.9_DecisionTreeClassifier_floor:
  Mean Absolute Error: 0.003401360544217687
  Training Time (s): 0.06589460372924805
  Prediction Time (s): 0.00099945068359375
  Memory Usage During Training (MB): 0.0078125
  Memory Usage During Prediction (MB): 0.0
Results for actual_ml_0.9_GradientBoostingClassifier_floor:
  Mean Absolute Error: 0.0
  Training Time (s): 5.032672166824341
  Prediction Time (s): 0.002953052520751953
  Memory Usage During Training (MB): 0.4375
  M

In [1]:
import numpy as np
import pandas as pd
import time
import psutil
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from joblib import dump, load
import os
from sklearn.multioutput import MultiOutputRegressor

# Load datasets
actual_data = pd.read_csv('original_CMKL1.csv')
synthetic_data_ml = pd.read_csv('synthetic_data_RandomSeaerch_Ensemble.csv')
synthetic_data_gan = pd.read_csv('GANs_synthetic_data.csv')

# Combine synthetic datasets for the ML+GAN case
synthetic_data_combined = pd.concat([synthetic_data_ml, synthetic_data_gan])

# Create directory for saving models
if not os.path.exists('NEW_senior_saved_model'):
    os.makedirs('NEW_senior_saved_model')

# Define models and hyperparameters for grid search
classification_models = {
    'RandomForestClassifier': {
        'model': RandomForestClassifier(random_state=42),
        'params': {
            'n_estimators': [100, 150, 200],
            'max_depth': [10, 15, 20],
            'min_samples_split': [2, 5, 10]
        }
    },
    'LogisticRegression': {
        'model': LogisticRegression(random_state=42, max_iter=1000),
        'params': {
            'C': [0.01, 0.1, 1, 10, 100]
        }
    },
    'DecisionTreeClassifier': {
        'model': DecisionTreeClassifier(random_state=42),
        'params': {
            'max_depth': [10, 15, 20],
            'min_samples_split': [2, 5, 10]
        }
    },
    'GradientBoostingClassifier': {
        'model': GradientBoostingClassifier(random_state=42),
        'params': {
            'n_estimators': [100, 150, 200],
            'learning_rate': [0.01, 0.1, 0.2],
            'max_depth': [3, 5, 7]
        }
    },
    'KNeighborsClassifier': {
        'model': KNeighborsClassifier(),
        'params': {
            'n_neighbors': [3, 5, 7, 10],
            'weights': ['uniform', 'distance']
        }
    }
}

regression_models = {
    'RandomForestRegressor': {
        'model': MultiOutputRegressor(RandomForestRegressor(random_state=42)),
        'params': {
            'estimator__n_estimators': [100, 150, 200],
            'estimator__max_depth': [10, 15, 20],
            'estimator__min_samples_split': [2, 5, 10]
        }
    },
    'LinearRegression': {
        'model': MultiOutputRegressor(LinearRegression()),
        'params': {
            'estimator__fit_intercept': [True, False]
        }
    },
    'DecisionTreeRegressor': {
        'model': MultiOutputRegressor(DecisionTreeRegressor(random_state=42)),
        'params': {
            'estimator__max_depth': [10, 15, 20],
            'estimator__min_samples_split': [2, 5, 10]
        }
    },
    'GradientBoostingRegressor': {
        'model': MultiOutputRegressor(GradientBoostingRegressor(random_state=42)),
        'params': {
            'estimator__n_estimators': [100, 150, 200],
            'estimator__learning_rate': [0.01, 0.1, 0.2],
            'estimator__max_depth': [3, 5, 7]
        }
    },
    'KNeighborsRegressor': {
        'model': KNeighborsRegressor(),
        'params': {
            'n_neighbors': [3, 5, 7, 10],
            'weights': ['uniform', 'distance']
        }
    }
}

# Function to create combined dataset with given ratio for each floor
def create_combined_dataset(actual_data, synthetic_data, ratio):
    combined_data = pd.DataFrame()
    floors = actual_data['z'].unique()
    for floor in floors:
        actual_floor_data = actual_data[actual_data['z'] == floor]
        synthetic_floor_data = synthetic_data[synthetic_data['z'] == floor]
        
        n_actual = int(len(actual_floor_data) * ratio)
        n_synthetic = len(actual_floor_data) - n_actual
        
        actual_sample = actual_floor_data.sample(n_actual, random_state=42)
        if len(synthetic_floor_data) < n_synthetic:
            synthetic_sample = synthetic_floor_data.sample(n_synthetic, replace=True, random_state=42)
        else:
            synthetic_sample = synthetic_floor_data.sample(n_synthetic, random_state=42)
        
        combined_floor_data = pd.concat([actual_sample, synthetic_sample])
        combined_data = pd.concat([combined_data, combined_floor_data], ignore_index=True)
        
    return combined_data

# Function to perform grid search and train the model
def train_model_with_grid_search(X_train, y_train, model_config):
    grid_search = GridSearchCV(model_config['model'], model_config['params'], cv=10, scoring='neg_mean_squared_error', n_jobs=-1)
    grid_search.fit(X_train, y_train)
    return grid_search.best_estimator_, -grid_search.best_score_

# Function to calculate Mean Distance Error
def mean_distance_error(y_true, y_pred):
    return np.mean(np.sqrt(np.sum((y_true - y_pred) ** 2, axis=1)))

# Train and evaluate models
ratios = [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1]
datasets = {
    'actual_ml': synthetic_data_ml,
    'actual_gan': synthetic_data_gan,
    'actual_ml_gan': synthetic_data_combined
}
results = {}

for dataset_name, synthetic_data in datasets.items():
    for ratio in ratios:
        combined_data = create_combined_dataset(actual_data, synthetic_data, ratio)
        
        # Floor classification training and evaluation
        X_train, X_test, y_train, y_test = train_test_split(combined_data[[f'RSSI{i+1}' for i in range(18)]], combined_data['z'], test_size=0.3, random_state=42)
        for model_name, model_config in classification_models.items():
            start_time = time.time()
            process = psutil.Process()
            initial_memory = process.memory_info().rss / (1024 ** 2)  # Convert to MB
            
            best_model, best_score = train_model_with_grid_search(X_train, y_train, model_config)
            
            training_time = time.time() - start_time
            training_memory = (process.memory_info().rss / (1024 ** 2)) - initial_memory
            
            start_time = time.time()
            initial_memory = process.memory_info().rss / (1024 ** 2)
            y_pred = best_model.predict(X_test)
            
            prediction_time = time.time() - start_time
            prediction_memory = (process.memory_info().rss / (1024 ** 2)) - initial_memory
            
            mae = mean_absolute_error(y_test, y_pred)
            
            results[f'{dataset_name}_{ratio}_{model_name}_floor'] = {
                'Mean Absolute Error': mae,
                'Training Time (s)': training_time,
                'Prediction Time (s)': prediction_time,
                'Memory Usage During Training (MB)': training_memory,
                'Memory Usage During Prediction (MB)': prediction_memory
            }
            dump(best_model, f'NEW_senior_saved_model/{dataset_name}_{ratio}_{model_name}_floor.joblib')
        
        # Coordinate regression training and evaluation
        X_train, X_test, y_train, y_test = train_test_split(combined_data[[f'RSSI{i+1}' for i in range(18)]], combined_data[['x', 'y']], test_size=0.3, random_state=42)
        for model_name, model_config in regression_models.items():
            start_time = time.time()
            process = psutil.Process()
            initial_memory = process.memory_info().rss / (1024 ** 2)  # Convert to MB
            
            best_model, best_score = train_model_with_grid_search(X_train, y_train, model_config)
            
            training_time = time.time() - start_time
            training_memory = (process.memory_info().rss / (1024 ** 2)) - initial_memory
            
            start_time = time.time()
            initial_memory = process.memory_info().rss / (1024 ** 2)
            y_pred = best_model.predict(X_test)
            
            prediction_time = time.time() - start_time
            prediction_memory = (process.memory_info().rss / (1024 ** 2)) - initial_memory
            
            mde = mean_distance_error(y_test, y_pred)
            
            results[f'{dataset_name}_{ratio}_{model_name}_coord'] = {
                'Mean Distance Error (meters)': mde,
                'Training Time (s)': training_time,
                'Prediction Time (s)': prediction_time,
                'Memory Usage During Training (MB)': training_memory,
                'Memory Usage During Prediction (MB)': prediction_memory
            }
            dump(best_model, f'NEW_senior_saved_model/{dataset_name}_{ratio}_{model_name}_coord.joblib')

# Print results
for key, metrics in results.items():
    print(f'Results for {key}:')
    for metric_name, value in metrics.items():
        print(f'  {metric_name}: {value}')

# Load and test models using the whole actual data
def load_and_predict_with_model(model_path, test_data, is_coordinate_model=False):
    model = load(model_path)
    X_test = test_data[[f'RSSI{i+1}' for i in range(18)]]
    
    process = psutil.Process()
    start_time = time.time()
    initial_memory = process.memory_info().rss / (1024 ** 2)  # Convert to MB

    predictions = model.predict(X_test)
    
    prediction_time = time.time() - start_time
    prediction_memory = (process.memory_info().rss / (1024 ** 2)) - initial_memory

    if is_coordinate_model:
        y_test = test_data[['x', 'y']]
        mean_dist_error = mean_distance_error(y_test, predictions)
        return {
            'Predictions': predictions,
            'Mean Distance Error (meters)': mean_dist_error,
            'Prediction Time (s)': prediction_time,
            'Memory Usage During Prediction (MB)': prediction_memory
        }
    else:
        y_test = test_data['z']
        mae = mean_absolute_error(y_test, predictions)
        return {
            'Predictions': predictions,
            'Mean Absolute Error': mae,
            'Prediction Time (s)': prediction_time,
            'Memory Usage During Prediction (MB)': prediction_memory
        }

# Example usage of the load_and_predict_with_model function
prediction_results = {}
model_directory = 'NEW_senior_saved_model'
classification_model_names = list(classification_models.keys())
regression_model_names = list(regression_models.keys())
dataset_names = list(datasets.keys())
ratios = [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1]

# Split the actual data to get a test set
test_data = actual_data  # Using the whole actual data as test set

# Load and predict using each stored model
for model_name in classification_model_names:
    for dataset_name in dataset_names:
        for ratio in ratios:
            model_path = f'{model_directory}/{dataset_name}_{ratio}_{model_name}_floor.joblib'
            key = f'{dataset_name}_{ratio}_{model_name}_floor'
            prediction_results[key] = load_and_predict_with_model(model_path, test_data)
            print(f"Results for {key}:")
            for metric_name, value in prediction_results[key].items():
                print(f"  {metric_name}: {value}")

for model_name in regression_model_names:
    for dataset_name in dataset_names:
        for ratio in ratios:
            model_path = f'{model_directory}/{dataset_name}_{ratio}_{model_name}_coord.joblib'
            key = f'{dataset_name}_{ratio}_{model_name}_coord'
            prediction_results[key] = load_and_predict_with_model(model_path, test_data, is_coordinate_model=True)
            print(f"Results for {key}:")
            for metric_name, value in prediction_results[key].items():
                print(f"  {metric_name}: {value}")


Results for actual_ml_0.9_RandomForestClassifier_floor:
  Mean Absolute Error: 0.0
  Training Time (s): 3.8678674697875977
  Prediction Time (s): 0.004002094268798828
  Memory Usage During Training (MB): 1.1796875
  Memory Usage During Prediction (MB): 0.02734375
Results for actual_ml_0.9_LogisticRegression_floor:
  Mean Absolute Error: 0.0
  Training Time (s): 0.09585976600646973
  Prediction Time (s): 0.0020020008087158203
  Memory Usage During Training (MB): 0.3515625
  Memory Usage During Prediction (MB): 0.0078125
Results for actual_ml_0.9_DecisionTreeClassifier_floor:
  Mean Absolute Error: 0.003401360544217687
  Training Time (s): 0.07765507698059082
  Prediction Time (s): 0.000949859619140625
  Memory Usage During Training (MB): 0.00390625
  Memory Usage During Prediction (MB): 0.0
Results for actual_ml_0.9_GradientBoostingClassifier_floor:
  Mean Absolute Error: 0.0
  Training Time (s): 5.255338191986084
  Prediction Time (s): 0.0020055770874023438
  Memory Usage During Traini

In [1]:
import numpy as np
import pandas as pd
import time
import psutil
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from joblib import dump, load
import os
from sklearn.multioutput import MultiOutputRegressor

# Load datasets
actual_data = pd.read_csv('original_CMKL1.csv')
synthetic_data_ml = pd.read_csv('synthetic_data_RandomSeaerch_Ensemble.csv')
synthetic_data_gan = pd.read_csv('GANs_synthetic_data.csv')

# Combine synthetic datasets for the ML+GAN case
synthetic_data_combined = pd.concat([synthetic_data_ml, synthetic_data_gan])

# Create directory for saving models
if not os.path.exists('NEW_senior_saved_model'):
    os.makedirs('NEW_senior_saved_model')

# Define models and hyperparameters for grid search
classification_models = {
    'RandomForestClassifier': {
        'model': RandomForestClassifier(random_state=42),
        'params': {
            'n_estimators': [100, 150, 200],
            'max_depth': [10, 15, 20],
            'min_samples_split': [2, 5, 10]
        }
    },
    'LogisticRegression': {
        'model': LogisticRegression(random_state=42, max_iter=1000),
        'params': {
            'C': [0.01, 0.1, 1, 10, 100]
        }
    },
    'DecisionTreeClassifier': {
        'model': DecisionTreeClassifier(random_state=42),
        'params': {
            'max_depth': [10, 15, 20],
            'min_samples_split': [2, 5, 10]
        }
    },
    'GradientBoostingClassifier': {
        'model': GradientBoostingClassifier(random_state=42),
        'params': {
            'n_estimators': [100, 150, 200],
            'learning_rate': [0.01, 0.1, 0.2],
            'max_depth': [3, 5, 7]
        }
    },
    'KNeighborsClassifier': {
        'model': KNeighborsClassifier(),
        'params': {
            'n_neighbors': [3, 5, 7, 10],
            'weights': ['uniform', 'distance']
        }
    }
}

regression_models = {
    'RandomForestRegressor': {
        'model': MultiOutputRegressor(RandomForestRegressor(random_state=42)),
        'params': {
            'estimator__n_estimators': [100, 150, 200],
            'estimator__max_depth': [10, 15, 20],
            'estimator__min_samples_split': [2, 5, 10]
        }
    },
    'LinearRegression': {
        'model': MultiOutputRegressor(LinearRegression()),
        'params': {
            'estimator__fit_intercept': [True, False]
        }
    },
    'DecisionTreeRegressor': {
        'model': MultiOutputRegressor(DecisionTreeRegressor(random_state=42)),
        'params': {
            'estimator__max_depth': [10, 15, 20],
            'estimator__min_samples_split': [2, 5, 10]
        }
    },
    'GradientBoostingRegressor': {
        'model': MultiOutputRegressor(GradientBoostingRegressor(random_state=42)),
        'params': {
            'estimator__n_estimators': [100, 150, 200],
            'estimator__learning_rate': [0.01, 0.1, 0.2],
            'estimator__max_depth': [3, 5, 7]
        }
    },
    'KNeighborsRegressor': {
        'model': KNeighborsRegressor(),
        'params': {
            'n_neighbors': [3, 5, 7, 10],
            'weights': ['uniform', 'distance']
        }
    }
}

# Function to create combined dataset with given ratio for each floor
def create_combined_dataset(actual_data, synthetic_data, ratio):
    combined_data = pd.DataFrame()
    floors = actual_data['z'].unique()
    for floor in floors:
        actual_floor_data = actual_data[actual_data['z'] == floor]
        synthetic_floor_data = synthetic_data[synthetic_data['z'] == floor]
        
        n_actual = int(len(actual_floor_data) * ratio)
        n_synthetic = len(actual_floor_data) - n_actual
        
        actual_sample = actual_floor_data.sample(n_actual, random_state=42)
        if len(synthetic_floor_data) < n_synthetic:
            synthetic_sample = synthetic_floor_data.sample(n_synthetic, replace=True, random_state=42)
        else:
            synthetic_sample = synthetic_floor_data.sample(n_synthetic, random_state=42)
        
        combined_floor_data = pd.concat([actual_sample, synthetic_sample])
        combined_data = pd.concat([combined_data, combined_floor_data], ignore_index=True)
        
    return combined_data

# Function to perform grid search and train the model
def train_model_with_grid_search(X_train, y_train, model_config):
    grid_search = GridSearchCV(model_config['model'], model_config['params'], cv=10, scoring='neg_mean_squared_error', n_jobs=-1)
    grid_search.fit(X_train, y_train)
    return grid_search.best_estimator_, -grid_search.best_score_, grid_search.best_params_

# Function to calculate Mean Distance Error
def mean_distance_error(y_true, y_pred):
    return np.mean(np.sqrt(np.sum((y_true - y_pred) ** 2, axis=1)))

# Train and evaluate models
ratios = [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1]
datasets = {
    'actual_ml': synthetic_data_ml,
    'actual_gan': synthetic_data_gan,
    'actual_ml_gan': synthetic_data_combined
}
results = {}

for dataset_name, synthetic_data in datasets.items():
    for ratio in ratios:
        combined_data = create_combined_dataset(actual_data, synthetic_data, ratio)
        
        # Floor classification training and evaluation
        X_train, X_test, y_train, y_test = train_test_split(combined_data[[f'RSSI{i+1}' for i in range(18)]], combined_data['z'], test_size=0.3, random_state=42)
        for model_name, model_config in classification_models.items():
            start_time = time.time()
            process = psutil.Process()
            initial_memory = process.memory_info().rss / (1024 ** 2)  # Convert to MB
            
            best_model, best_score, best_params = train_model_with_grid_search(X_train, y_train, model_config)
            
            training_time = time.time() - start_time
            final_memory = process.memory_info().rss / (1024 ** 2)
            training_memory = max(0, final_memory - initial_memory)  # Ensure memory usage is not negative
            
            start_time = time.time()
            initial_memory = process.memory_info().rss / (1024 ** 2)
            y_pred = best_model.predict(X_test)
            
            prediction_time = time.time() - start_time
            final_memory = process.memory_info().rss / (1024 ** 2)
            prediction_memory = max(0, final_memory - initial_memory)  # Ensure memory usage is not negative
            
            mae = mean_absolute_error(y_test, y_pred)
            
            results[f'{dataset_name}_{ratio}_{model_name}_floor'] = {
                'Mean Absolute Error': mae,
                'Training Time (s)': training_time,
                'Prediction Time (s)': prediction_time,
                'Memory Usage During Training (MB)': training_memory,
                'Memory Usage During Prediction (MB)': prediction_memory,
                'Best Parameters': best_params
            }
            dump(best_model, f'NEW_senior_saved_model/{dataset_name}_{ratio}_{model_name}_floor.joblib')
        
        # Coordinate regression training and evaluation
        X_train, X_test, y_train, y_test = train_test_split(combined_data[[f'RSSI{i+1}' for i in range(18)]], combined_data[['x', 'y']], test_size=0.3, random_state=42)
        for model_name, model_config in regression_models.items():
            start_time = time.time()
            process = psutil.Process()
            initial_memory = process.memory_info().rss / (1024 ** 2)  # Convert to MB
            
            best_model, best_score, best_params = train_model_with_grid_search(X_train, y_train, model_config)
            
            training_time = time.time() - start_time
            final_memory = process.memory_info().rss / (1024 ** 2)
            training_memory = max(0, final_memory - initial_memory)  # Ensure memory usage is not negative
            
            start_time = time.time()
            initial_memory = process.memory_info().rss / (1024 ** 2)
            y_pred = best_model.predict(X_test)
            
            prediction_time = time.time() - start_time
            final_memory = process.memory_info().rss / (1024 ** 2)
            prediction_memory = max(0, final_memory - initial_memory)  # Ensure memory usage is not negative
            
            mde = mean_distance_error(y_test, y_pred)
            
            results[f'{dataset_name}_{ratio}_{model_name}_coord'] = {
                'Mean Distance Error (meters)': mde,
                'Training Time (s)': training_time,
                'Prediction Time (s)': prediction_time,
                'Memory Usage During Training (MB)': training_memory,
                'Memory Usage During Prediction (MB)': prediction_memory,
                'Best Parameters': best_params
            }
            dump(best_model, f'NEW_senior_saved_model/{dataset_name}_{ratio}_{model_name}_coord.joblib')

# Print results
for key, metrics in results.items():
    print(f'Results for {key}:')
    for metric_name, value in metrics.items():
        print(f'  {metric_name}: {value}')

# Load and test models using the whole actual data
def load_and_predict_with_model(model_path, test_data, is_coordinate_model=False):
    model = load(model_path)
    X_test = test_data[[f'RSSI{i+1}' for i in range(18)]]
    
    process = psutil.Process()
    start_time = time.time()
    initial_memory = process.memory_info().rss / (1024 ** 2)  # Convert to MB

    predictions = model.predict(X_test)
    
    prediction_time = time.time() - start_time
    final_memory = process.memory_info().rss / (1024 ** 2)
    prediction_memory = max(0, final_memory - initial_memory)  # Ensure memory usage is not negative

    if is_coordinate_model:
        y_test = test_data[['x', 'y']]
        mean_dist_error = mean_distance_error(y_test, predictions)
        return {
            'Predictions': predictions,
            'Mean Distance Error (meters)': mean_dist_error,
            'Prediction Time (s)': prediction_time,
            'Memory Usage During Prediction (MB)': prediction_memory
        }
    else:
        y_test = test_data['z']
        mae = mean_absolute_error(y_test, predictions)
        return {
            'Predictions': predictions,
            'Mean Absolute Error': mae,
            'Prediction Time (s)': prediction_time,
            'Memory Usage During Prediction (MB)': prediction_memory
        }

# Example usage of the load_and_predict_with_model function
prediction_results = {}
model_directory = 'NEW_senior_saved_model'
classification_model_names = list(classification_models.keys())
regression_model_names = list(regression_models.keys())
dataset_names = list(datasets.keys())
ratios = [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1]

# Split the actual data to get a test set
test_data = actual_data  # Using the whole actual data as test set

# Load and predict using each stored model
for model_name in classification_model_names:
    for dataset_name in dataset_names:
        for ratio in ratios:
            model_path = f'{model_directory}/{dataset_name}_{ratio}_{model_name}_floor.joblib'
            key = f'{dataset_name}_{ratio}_{model_name}_floor'
            prediction_results[key] = load_and_predict_with_model(model_path, test_data)
            print(f"Results for {key}:")
            for metric_name, value in prediction_results[key].items():
                print(f"  {metric_name}: {value}")

for model_name in regression_model_names:
    for dataset_name in dataset_names:
        for ratio in ratios:
            model_path = f'{model_directory}/{dataset_name}_{ratio}_{model_name}_coord.joblib'
            key = f'{dataset_name}_{ratio}_{model_name}_coord'
            prediction_results[key] = load_and_predict_with_model(model_path, test_data, is_coordinate_model=True)
            print(f"Results for {key}:")
            for metric_name, value in prediction_results[key].items():
                print(f"  {metric_name}: {value}")

# Display best parameters in a separate cell
best_params_results = {key: value['Best Parameters'] for key, value in results.items()}
best_params_results


Results for actual_ml_0.9_RandomForestClassifier_floor:
  Mean Absolute Error: 0.0
  Training Time (s): 5.577222585678101
  Prediction Time (s): 0.004059791564941406
  Memory Usage During Training (MB): 2.45703125
  Memory Usage During Prediction (MB): 0.06640625
  Best Parameters: {'max_depth': 10, 'min_samples_split': 2, 'n_estimators': 100}
Results for actual_ml_0.9_LogisticRegression_floor:
  Mean Absolute Error: 0.0
  Training Time (s): 0.11963939666748047
  Prediction Time (s): 0.0010411739349365234
  Memory Usage During Training (MB): 0.37890625
  Memory Usage During Prediction (MB): 0
  Best Parameters: {'C': 0.01}
Results for actual_ml_0.9_DecisionTreeClassifier_floor:
  Mean Absolute Error: 0.003401360544217687
  Training Time (s): 0.12122726440429688
  Prediction Time (s): 0.0009949207305908203
  Memory Usage During Training (MB): 0.00390625
  Memory Usage During Prediction (MB): 0
  Best Parameters: {'max_depth': 10, 'min_samples_split': 2}
Results for actual_ml_0.9_Gradien

{'actual_ml_0.9_RandomForestClassifier_floor': {'max_depth': 10,
  'min_samples_split': 2,
  'n_estimators': 100},
 'actual_ml_0.9_LogisticRegression_floor': {'C': 0.01},
 'actual_ml_0.9_DecisionTreeClassifier_floor': {'max_depth': 10,
  'min_samples_split': 2},
 'actual_ml_0.9_GradientBoostingClassifier_floor': {'learning_rate': 0.1,
  'max_depth': 3,
  'n_estimators': 100},
 'actual_ml_0.9_KNeighborsClassifier_floor': {'n_neighbors': 3,
  'weights': 'uniform'},
 'actual_ml_0.9_RandomForestRegressor_coord': {'estimator__max_depth': 20,
  'estimator__min_samples_split': 2,
  'estimator__n_estimators': 200},
 'actual_ml_0.9_LinearRegression_coord': {'estimator__fit_intercept': True},
 'actual_ml_0.9_DecisionTreeRegressor_coord': {'estimator__max_depth': 20,
  'estimator__min_samples_split': 5},
 'actual_ml_0.9_GradientBoostingRegressor_coord': {'estimator__learning_rate': 0.2,
  'estimator__max_depth': 7,
  'estimator__n_estimators': 200},
 'actual_ml_0.9_KNeighborsRegressor_coord': {'n