-
Notifications
You must be signed in to change notification settings - Fork 51
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
305 additions
and
162 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,185 @@ | ||
""" | ||
Mixed-effects model for irregular data when removing measurement points | ||
======================================================================= | ||
This example converts irregular data to a basis representation using a mixed | ||
effects model and checks the robustness of the method by fitting | ||
the model with decreasing number of measurement points per curve. | ||
""" | ||
# Author: Pablo Cuesta Sierra | ||
# License: MIT | ||
|
||
# sphinx_gallery_thumbnail_number = -1 | ||
|
||
import matplotlib.pyplot as plt | ||
import numpy as np | ||
import pandas as pd | ||
from sklearn.model_selection import train_test_split | ||
|
||
from skfda import FDataIrregular | ||
from skfda.datasets import fetch_weather, irregular_sample | ||
from skfda.representation.basis import FourierBasis | ||
from skfda.representation.conversion import EMMixedEffectsConverter | ||
from skfda.misc.scoring import r2_score, mean_squared_error | ||
|
||
|
||
# %% | ||
# For this example, we are going to check the robustness of | ||
# the mixed effects method for converting irregular data to basis | ||
# representation by removing some measurement points from the test and train | ||
# sets and comparing the results. The temperatures from the Canadian weather | ||
# dataset are used to generate the irregular data. | ||
fd_temperatures = fetch_weather().data.coordinates[0] | ||
basis = FourierBasis(n_basis=5, domain_range=fd_temperatures.domain_range) | ||
|
||
fd_temperatures.plot() | ||
plt.show() | ||
basis.plot() | ||
plt.title("Basis functions") | ||
plt.show() | ||
|
||
# %% | ||
# We split the data into train and test sets: | ||
random_state = np.random.RandomState(seed=4934792) | ||
train_original, test_original = train_test_split( | ||
fd_temperatures, | ||
test_size=0.3, | ||
random_state=random_state, | ||
) | ||
|
||
# %% | ||
# Then, we create datasets with decreasing number of measurement points per | ||
# curve, by removing measurement points from the previous dataset iteratively. | ||
train_irregular_list = [train_original] | ||
test_irregular_list = [test_original] | ||
n_points_list = [40, 10, 7, 5, 4, 3] | ||
for n_points in n_points_list: | ||
train_irregular_list.append( | ||
irregular_sample( | ||
train_irregular_list[-1], | ||
n_points_per_curve=n_points, | ||
random_state=random_state, | ||
), | ||
) | ||
test_irregular_list.append( | ||
irregular_sample( | ||
test_irregular_list[-1], | ||
n_points_per_curve=n_points, | ||
random_state=random_state, | ||
), | ||
) | ||
|
||
# remove the original dataset from the lists | ||
train_irregular_list = train_irregular_list[1:] | ||
test_irregular_list = test_irregular_list[1:] | ||
|
||
# %% | ||
# We convert the irregular data to basis representation and compute the scores. | ||
# To do so, we fit the converter once per train set. After fitting the | ||
# the converter with a train set that has :math:`k` points per curve, we | ||
# use it to transform that train set, the test set with :math:`k` points per | ||
# curve and the original test set with 365 points per curve. | ||
score_functions = { | ||
"R^2": r2_score, | ||
"MSE": mean_squared_error, | ||
} | ||
converted_data = { | ||
"Train-sparse": {}, | ||
"Test-sparse": {}, | ||
"Test-original": {}, | ||
} | ||
scores = { | ||
score_name: { | ||
"n_points_per_curve": n_points_list, | ||
**{data_name: [] for data_name in converted_data.keys()}, | ||
} | ||
for score_name in score_functions.keys() | ||
} | ||
converter = EMMixedEffectsConverter(basis) | ||
for n_points, train_irregular, test_irregular in zip( | ||
n_points_list, | ||
train_irregular_list, | ||
test_irregular_list, | ||
): | ||
converter = converter.fit(train_irregular) | ||
train_sparse_converted = converter.transform(train_irregular) | ||
test_sparse_converted = converter.transform(test_irregular) | ||
test_original_converted = converter.transform( | ||
FDataIrregular.from_fdatagrid(test_original), | ||
) | ||
converted_data["Train-sparse"][n_points] = train_sparse_converted | ||
converted_data["Test-sparse"][n_points] = test_sparse_converted | ||
converted_data["Test-original"][n_points] = test_original_converted | ||
|
||
for score_name, score_fun in score_functions.items(): | ||
scores[score_name]["Train-sparse"].append(score_fun( | ||
train_original, | ||
train_sparse_converted.to_grid(train_original.grid_points), | ||
)) | ||
scores[score_name]["Test-sparse"].append(score_fun( | ||
test_original, | ||
test_sparse_converted.to_grid(test_original.grid_points), | ||
)) | ||
scores[score_name]["Test-original"].append(score_fun( | ||
test_original, | ||
test_original_converted.to_grid(test_original.grid_points), | ||
)) | ||
|
||
# %% | ||
# Finally, we have the scores for the train and test sets with decreasing | ||
# number of measurement points per curve. | ||
for score_name in scores.keys(): | ||
print("-" * 62) | ||
print(f"{score_name} scores:") | ||
print("-" * 62) | ||
print(( | ||
pd.DataFrame(scores[score_name]) | ||
.set_index("n_points_per_curve").sort_index() | ||
), end="\n\n\n") | ||
|
||
# %% | ||
# The following plots show the original curves along with the converted | ||
# test curves for the conversions with 5, 4 and 3 points per curve. | ||
for ( | ||
n_points_per_curve, | ||
test_irregular, | ||
test_converted, | ||
test_original_converted, | ||
) in zip( | ||
n_points_list, | ||
test_irregular_list, | ||
converted_data["Test-sparse"].values(), | ||
converted_data["Test-original"].values(), | ||
): | ||
if n_points_per_curve not in [5, 4, 3]: | ||
continue | ||
fig = plt.figure(figsize=(10, 23)) | ||
for k in range(7): | ||
axes = plt.subplot(7, 1, k + 1) | ||
|
||
test_irregular[k].scatter( | ||
axes=axes, color=f"C{k}", | ||
) | ||
test_original[k].plot( | ||
axes=axes, color=f"C{k}", linewidth=0.65, | ||
label="Original test curve", | ||
) | ||
test_converted[k].plot( | ||
axes=axes, color=f"C{k}", linestyle="--", | ||
label=f"Test curve transformed from {n_points_per_curve} points", | ||
) | ||
test_original_converted[k].plot( | ||
axes=axes, color=f"C{k}", alpha=0.5, | ||
label="Test curve transformed from original 365 points", | ||
) | ||
axes.legend(bbox_to_anchor=(1., 1.)) | ||
plt.tight_layout(rect=[0, 0, 1, 0.98]) | ||
plt.suptitle(f"Fitted model with {n_points_per_curve=}") | ||
|
||
plt.show() | ||
|
||
# %% | ||
# References | ||
# ---------- | ||
# | ||
# .. footbibliography:: |
Oops, something went wrong.