-
Notifications
You must be signed in to change notification settings - Fork 0
/
FeMIP_MDS.m
198 lines (168 loc) · 6.97 KB
/
FeMIP_MDS.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
% FeMIP_MDS uses Multi-Dimensional Scaling (MDS) to visualise all
% observational and model data in a single plot. The statistical indices
% generated by skillplot.m are used for the analysis. The methodology is
% that of Vichi & Masina (2009).
%
% Run FeMIP_MDS after FeMIP_regrid and FeMIP_makegrid (optional) have been
% run. The function then runs: GEOTRACES_section.m and model_section.m over
% all stations with available data for a desired obs/model variable.
%
% FeMIP_MDS has six inputs, for which 3 are mandetory
%
% obsvar: the GEOTRACES's variable of interest (refer to documentation
% on naming conventions of variables in nc_variables.txt)
% modelvar: The name of the model variable of interest in the model file
% modelfile: name of the model file produced after running FeMIP_regrid
%
% EXAMPLE 1
%
% FeMIP_MDS('var73','FER','FeMIP_PISCES.nc')
%
% The output will be an MDS plot and all the created model.mat files will
% be prefixed with 'FeMIP'.
%
% To create a more realistic plot and facilitate a users custom vertical
% grid, 3 optional inputs are available
%
% vgrid: (optioal) a vertical grid, in a file named 'levels'.
% If no grid is specified, the default interpolation is
% to the World Ocean Atlas 2001 (WOA01) (Conkright et al. 2002).
% Leave blank of set to 'F' (default) for WOA01 grid or set to 'F'
% to use custom grid created by FeMIP_makegrid. (Refer to
% documentation on GEOTRACES_section.m.
% modelname:(optional) save created .mat files with the modelname included.
% If left blank, the output name will be 'FeMIP'.
% scale_factor:(optional) If needed, scale the model output to the units
% of the observational data. Default is set to 1. Check the IDP
% units
%
% EXAMPLE 2
%
% FeMIP_MDS('var73','FER','FeMIP_PISCES.nc','T','PISCES',1000)
%
% Similar to EXAMPLE 1, an MDS plot is created but the created model.mat
% files will be prefixed with the desired modelname. Furthermore, if
% the user ran FeMIP_makegrid, the custom vertical grid can now be used. It
% is recommended to scale your data, else this will affect the statistical
% analysis.
%
% Note: use addpath to the FeMIPeval scripts as well as the directory of
% the GEOTRACES data.
% Jonathan Rogerson
% 17 May 2021
%
function f1 = FeMIP_MDS(obsvar,modelvar,modelfile,vgrid,modelname,scale_factor)
%%
% Check inputs
if ~exist('obsvar','var')
error('No GEOTRACES variable chosen, please input variable eg: "var73" ')
end
if ~exist('modelvar','var')
error('No variable from model file has been chosen');
end
if ~exist('modelfile','var')
error('No model input, please provide model file');
end
if ~exist('vgrid','var')
vgrid = 'F';
end
if ~exist('modelname','var')
modelname = 'FeMIP';
end
if ~exist('scale_factor','var')
scale_factor = 1;
end
IDP = 'GEOTRACES_IDP2017_v2_Discrete_Sample_Data.nc';
if ~exist(IDP,'file')
disp('The GEOTRACES data folder cannot be found. Did you add it to the matlab path?')
disp('Type the following in the matlab prompt: addpath PATH_TO_FOLDER')
disp('e.g. >> addpath ../GEOTRACES/discrete_sample_data/netcdf')
error(['GEOTRACES file ',IDP,' not found!'])
else
file = IDP;
end
station = ncread(file, 'metavar1'); % read in the station variable
station = station'; % transpose to rectify format
station = cellstr(station); % Convert from a character to a cell-array
list_station = unique(station); % List of all the stations
% Loop over every-station for a desired variables
for ii = 1:length(list_station)
GEOTRACES_section(list_station{ii},obsvar,vgrid)
end
% Identify which stations had data and only run those through the model
MyFolderInfo = dir('*.mat');
for jj = 1:length(MyFolderInfo)
proc_stations(jj) = extractBefore(string(MyFolderInfo(jj).name),'.');
end
% Loop over those stations with data in doing the model_section
for kk =1:length(proc_stations)
model_section(proc_stations(kk),modelfile,modelvar,modelname);
end
% Now I have to loop over every station and its corresponding
% Convert OBS data
model_array = [];
obs_array = [];
for ll = 1:length(proc_stations)
od = dir(strcat(proc_stations(ll),'.mat')).name;
md = dir(strcat(modelname,'_',extractBefore(proc_stations(ll),'_'),'_',modelvar,'.mat')).name;
obs = load(od,'*_interp').(strcat(proc_stations(ll),'_interp'));
model = load(md,strcat(modelname,'_',extractBefore(proc_stations(ll),'_'),'_',modelvar))...
.(strcat(modelname,'_',extractBefore(proc_stations(ll),'_'),'_',modelvar));
obs = reshape(obs,[],1);
model = reshape(model.*scale_factor,[],1);
model(model == 0) = NaN;
obs(obs == 0) = NaN;
ind_mis = find(isnan(obs));
obs(ind_mis) = [];
model(ind_mis) = [];
ind_mis = find(isnan(model));
obs(ind_mis) = [];
model(ind_mis) =[];
ind_mis = find(model < 0);
obs(ind_mis) = [];
model(ind_mis) = [];
c_lim = (nanmean(obs) + nanstd(obs)) * 1.1;
skill = skillplot(obs, model,[0 round(c_lim *2)],10000);
close(figure(1))
data_model = [skill.mean, skill.meanobs, skill.stdev, skill.stdevobs,...
skill.corrP, skill.RMSDtot, skill.B, skill.AAE, skill.RMSDcp,...
skill.MEF, skill.RI];
model_array = [model_array; data_model];
skill = skillplot(obs, obs,[0 round(c_lim *2)],10000);
close(figure(1))
data_obs =[skill.mean, skill.meanobs, skill.stdev, skill.stdevobs,...
skill.corrP, skill.RMSDtot, skill.B, skill.AAE, skill.RMSDcp,...
skill.MEF, skill.RI];
obs_array = [obs_array; data_obs];
end
% Calculate normalised STD RS = (std_model/std_obs) -1
model_array(:,end+1) = (model_array(:,3)./model_array(:,4))-1;
obs_array(:,end+1) = (obs_array(:,3)./obs_array(:,4))-1;
% Now clean up the arrays
% Remove means as we already have the bias plus the standard deviations
model_array(:,[1,2,3,4]) = [];
obs_array(:,[1,2,3,4]) = [];
% All indices are identical, so shrink array
obs_array = mean(obs_array);
%% Create the mds plot
for nn = 1:length(proc_stations)
labels{nn} = string(string(extractBefore(proc_stations(nn),'_')));
end
array = [model_array; obs_array];
dissimilarities = pdist(array,'cityblock');%,@naneucdist);
dissimilarities(isnan(dissimilarities)) = [];
[Y,stress,disparities] = mdscale(dissimilarities,2);
f1 = figure;
plot(Y(1:length(proc_stations),1),Y(1:length(proc_stations),2),'bx')
text(Y(1:length(proc_stations),1),Y(1:length(proc_stations),2),labels,...
'HorizontalAlignment','left','FontSize',6);
hold on
plot(Y(length(proc_stations)+1:end,1),Y(length(proc_stations)+1:end,2),'ro')
text(Y(length(proc_stations)+1:end,1),Y(length(proc_stations)+1:end,2),'Data',...
'HorizontalAlignment','left','FontSize',6);
axis('square');
title(strcat('MDS - ','Normalized Manhattan distance (stress =',string(stress),')'),'FontSize',16)
set(gca,'xtick',[])
set(gca,'ytick',[])
h = legend('MODEL','OBS');
end