In [1]:
import sys
sys.path.append('../src')
import json

with open('../data/output.json') as f:
    json_data = json.load(f)

In [2]:
from dataframeFactory import get_data_frame

data = get_data_frame(json_data)

In [94]:
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF
from sklearn.metrics import mean_squared_error
from scipy.stats import uniform
import numpy as np
from sklearn.gaussian_process.kernels import Matern


# Separate data into observations and missing values
observations = data[data['all_measurements']!=0]
missing = data[data['all_measurements']==0]

# Split the observations into a training set and a test set
train, test = train_test_split(observations, test_size=0.6, random_state=42)
train = train.to_crs("EPSG:4326")
missing = missing.to_crs("EPSG:4326")
test = test.to_crs("EPSG:4326")
if len(train) > 0 and len(missing) > 0:
    # Fit a Gaussian Process Regressor on the training data
    X_train = train['geometry'].centroid.apply(lambda point: [point.x, point.y])
    X_train = np.array(X_train.tolist())
    y_train = train['all_stability']

    # Define the model and the parameter distribution for the random search
    gpr = GaussianProcessRegressor(kernel=RBF())
    param_dist = {'alpha': uniform(1e-3, 1), 'kernel__length_scale': uniform(1, 10)}

    # Perform the random search
    random_search = RandomizedSearchCV(gpr, param_distributions=param_dist, n_iter=10, cv=5, random_state=42)
    random_search.fit(X_train, y_train)

    # Print the best parameters
    # Print the best parameters
    print(f'Best parameters:\nAlpha: {random_search.best_params_["alpha"]:.6f}\nLength Scale: {random_search.best_params_["kernel__length_scale"]:.6f}\n')
    best_index = random_search.best_index_
    best_mse = -random_search.cv_results_['mean_test_score'][best_index]
    best_std = random_search.cv_results_['std_test_score'][best_index]
    print(f'MSE for best parameters: {best_mse:.6f}\nStd: {best_std:.6f}\n')
    # Print all hyperparameters and their corresponding mean squared errors
    for params, mean_score, scores in zip(random_search.cv_results_['params'], 
                                        random_search.cv_results_['mean_test_score'], 
                                        random_search.cv_results_['std_test_score']):
        print(f'Parameters:\nAlpha: {params["alpha"]:.6f}\nLength Scale: {params["kernel__length_scale"]:.6f}\nMean MSE: {-mean_score:.6f}\nStd: {scores:.6f}\n')

    # Calculate the MSE on the test set
    X_test_actual = test['geometry'].centroid.apply(lambda point: [point.x, point.y])
    X_test_actual = np.array(X_test_actual.tolist())
    y_test_actual = test['all_stability']
    y_pred_actual = random_search.predict(X_test_actual)
    mse = mean_squared_error(y_test_actual, y_pred_actual)
    print(f'Mean Squared Error on Test Set: {mse:.6f}')


  X_train = train['geometry'].centroid.apply(lambda point: [point.x, point.y])


Best parameters:
Alpha: 0.021584
Length Scale: 10.699099

MSE for best parameters: -0.052387
Std: 0.015793

Parameters:
Alpha: 0.375540
Length Scale: 10.507143
Mean MSE: 0.000975
Std: 0.001041

Parameters:
Alpha: 0.732994
Length Scale: 6.986585
Mean MSE: 0.000982
Std: 0.001076

Parameters:
Alpha: 0.157019
Length Scale: 2.559945
Mean MSE: 0.000971
Std: 0.001020

Parameters:
Alpha: 0.059084
Length Scale: 9.661761
Mean MSE: 0.000970
Std: 0.001011

Parameters:
Alpha: 0.602115
Length Scale: 8.080726
Mean MSE: 0.000979
Std: 0.001063

Parameters:
Alpha: 0.021584
Length Scale: 10.699099
Mean MSE: -0.052387
Std: 0.015793

Parameters:
Alpha: 0.833443
Length Scale: 3.123391
Mean MSE: 0.000984
Std: 0.001086

Parameters:
Alpha: 0.182825
Length Scale: 2.834045
Mean MSE: 0.000972
Std: 0.001023

Parameters:
Alpha: 0.305242
Length Scale: 6.247564
Mean MSE: 0.000974
Std: 0.001034

Parameters:
Alpha: 0.432945
Length Scale: 3.912291
Mean MSE: 0.000976
Std: 0.001047




  X_test_actual = test['geometry'].centroid.apply(lambda point: [point.x, point.y])


Mean Squared Error on Test Set: 0.007083


In [95]:
import pandas as pd


alpha = random_search.best_params_["alpha"]
length_scale = random_search.best_params_["kernel__length_scale"]

data['uncertainty'] = None

# Fit the model on the training data
observations = data[data['all_measurements']!=0]
missing = data[data['all_measurements']==0]

observations = observations.to_crs("EPSG:4326")
missing = missing.to_crs("EPSG:4326")

data['uncertainty'] = None  # Initialize 'uncertainty' with a default value

std_devs = None
if len(observations) > 0 and len(missing) > 0:
    # Fit a Gaussian Process Regressor on the observed data
    X_train = observations['geometry'].centroid.apply(lambda point: [point.x, point.y])
    X_train = np.array(X_train.tolist())
    y_train = observations['all_stability']

    gpr = GaussianProcessRegressor(alpha=alpha, kernel=RBF(length_scale=length_scale)).fit(X_train, y_train)

    # Predict the missing values and get standard deviations
    X_test = missing['geometry'].centroid.apply(lambda point: [point.x, point.y])
    X_test = np.array(X_test.tolist())
    y_pred, std_devs = gpr.predict(X_test, return_std=True)

    y_pred = np.clip(y_pred, 0, 1)
    data['all_stability'] = data['all_stability'].astype(float)

    # Fill in the missing values
    data.loc[missing.index, 'all_stability'] = y_pred
    data.loc[missing.index, 'uncertainty'] = std_devs


# Convert the data back to JSON





  X_train = observations['geometry'].centroid.apply(lambda point: [point.x, point.y])

  X_test = missing['geometry'].centroid.apply(lambda point: [point.x, point.y])


In [103]:
alpha = random_search.best_params_["alpha"]
length_scale = random_search.best_params_["kernel__length_scale"]
providers = ['t-mobile', 'vodafone', 'o2', 'e-plus']  # List of providers

for provider in providers:
    # Separate data into observations and missing values
    observations = data[data[provider+'_measurements']!=0]
    missing = data[data[provider+'_measurements']==0]
    print(provider+'-observation: '+str(observations.size))
    print(provider+'-missing: '+str(missing.size))
    observations = observations.to_crs("EPSG:4326")
    missing = missing.to_crs("EPSG:4326")

    data[provider+'_uncertainty'] = None  # Initialize 'uncertainty' with a default value

    if len(observations) > 0 and len(missing) > 0:
        # Fit a Gaussian Process Regressor on the observed data
        X_train = observations['geometry'].centroid.apply(lambda point: [point.x, point.y])
        X_train = np.array(X_train.tolist())
        
        y_train = observations[provider+'_stability']
        
        gpr = GaussianProcessRegressor(alpha=alpha, kernel=RBF(length_scale=length_scale)).fit(X_train, y_train)

        X_test = missing['geometry'].centroid.apply(lambda point: [point.x, point.y])
        X_test = np.array(X_test.tolist())

        y_pred, std_devs = gpr.predict(X_test, return_std=True)

        y_pred = np.clip(y_pred, 0, 1)
        data[provider+'_stability'] = data[provider+'_stability'].astype(float)

        # Fill in the missing values
        data.loc[missing.index, provider+'_stability'] = y_pred
        data.loc[missing.index, provider+'_uncertainty'] = std_devs

t-mobile-observation: 188040
t-mobile-missing: 523770



  X_train = observations['geometry'].centroid.apply(lambda point: [point.x, point.y])

  X_test = missing['geometry'].centroid.apply(lambda point: [point.x, point.y])


vodafone-observation: 296430
vodafone-missing: 415380



  X_train = observations['geometry'].centroid.apply(lambda point: [point.x, point.y])

  X_test = missing['geometry'].centroid.apply(lambda point: [point.x, point.y])


o2-observation: 133290
o2-missing: 578520



  X_train = observations['geometry'].centroid.apply(lambda point: [point.x, point.y])

  X_test = missing['geometry'].centroid.apply(lambda point: [point.x, point.y])


e-plus-observation: 117900
e-plus-missing: 593910



  X_train = observations['geometry'].centroid.apply(lambda point: [point.x, point.y])

  X_test = missing['geometry'].centroid.apply(lambda point: [point.x, point.y])


In [116]:
json_data = data.to_json(indent=4)
with open('output_matern_20_80.json', 'w') as f:
    f.write(json_data)