First commit

Apress · Jan 29, 2018 · bf18dda · bf18dda
commit bf18dda
Show file tree

Hide file tree

Showing 40 changed files with 13,733 additions and 0 deletions.
diff --git a/9781484233146.png b/9781484233146.png
diff --git a/AN_COHERENCE.TXT b/AN_COHERENCE.TXT
@@ -0,0 +1,218 @@
+/******************************************************************************/
+/*                                                                            */
+/*  AN_COHERENCE - Coherence analysis and plot of values                      */
+/*                                                                            */
+/******************************************************************************/
+
+class AnalyzeCoherenceChild {
+
+public:
+   AnalyzeCoherenceChild ( int npreds , int *preds , int n_dim , int nonpar ) ;
+   ~AnalyzeCoherenceChild () ;
+
+   int lookback ;
+   int npred ;
+   int preds[MAX_VARS] ;
+   int nonpar ;
+   int n ;        // Length of displayed series of coherences
+   double *val ;  // Coherences for display
+   // Work areas
+   double *mean ;
+   double *covar ;
+} ;
+
+
+/*
+--------------------------------------------------------------------------------
+
+   Constructor and Destructor
+
+--------------------------------------------------------------------------------
+*/
+
+AnalyzeCoherenceChild::AnalyzeCoherenceChild ( int np , int *p , int lb , int nonp )
+{
+   int icase, i, j, k ;
+   double *dptr, *means, *evals, *evects, *workv, minval, maxval, meanval ;
+   double sum, total, diff, diff2, *nonpar_work, factor ;
+   char msg[512], line[1024] ;
+   FILE *fp ;
+
+   MEMTEXT ( "AN_COHERENCE AnalyzeCoherenceChild constructor" ) ;
+   npred = np ;
+   lookback = lb ;
+   nonpar = nonp ;
+   for (i=0 ; i<np ; i++)
+      preds[i] = p[i] ;
+
+   audit ( "" ) ;
+   audit ( "" ) ;
+   audit ( "******************************************" ) ;
+   audit ( "*  Beginning coherence analysis...       *" ) ;
+   if (nonpar)
+      audit ( "*     Nonparametric correlation          *" ) ;
+   else
+      audit ( "*     Ordinary correlation               *" ) ;
+   audit ( "******************************************" ) ;
+
+   audit ( "" ) ;
+   sprintf_s ( msg, "The following variables are tested with a lookback of %d:", lookback ) ;
+   audit ( msg ) ;
+   for ( i=0 ; i<npred ; i++) {
+      sprintf_s ( msg , "   %s", var_names[preds[i]] ) ;
+      audit ( msg ) ;
+      }
+
+
+/*
+   Allocate memory
+*/
+
+   MEMTEXT ( "AN_COHERENCE: val, means, covar, evals, evects, workv" ) ;
+   val = (double *) malloc ( (n_cases-lookback+1) * sizeof(double) ) ;
+   means = (double *) malloc ( npred * sizeof(double) ) ;
+   covar = (double *) malloc ( npred * npred * sizeof(double) ) ;
+   evals = (double *) malloc ( npred * sizeof(double) ) ;
+   evects = (double *) malloc ( npred * npred * sizeof(double) ) ;
+   workv = (double *) malloc ( npred * sizeof(double) ) ;
+   if (nonpar) {
+      MEMTEXT ( "AN_COHERENCE: nonpar_work" ) ;
+      nonpar_work = (double *) malloc ( 2 * lookback * sizeof(double) ) ;  // Used for nonpar corr
+      }
+   else
+      nonpar_work = NULL ;
+
+
+/*
+   Main outer loop does all cases
+*/
+
+   minval = 1.e30 ;
+   maxval = -1.e30 ;
+   meanval = 0.0 ;
+
+   for (icase=lookback-1 ; icase<n_cases ; icase++) {
+
+      // If nonparametric, compute correlation matrix
+
+      if (nonpar) {
+         covar[0] = 1.0 ;
+         for (i=1 ; i<npred ; i++) {
+            for (j=0 ; j<i ; j++) {
+               for (k=0 ; k<lookback ; k++) {
+                  dptr = database + n_vars * (icase - k) ;  // Point to this case in database
+                  nonpar_work[k] = dptr[preds[i]] ;
+                  nonpar_work[lookback+k] = dptr[preds[j]] ;
+                  }
+               covar[i*npred+j] = spearman ( lookback , nonpar_work , nonpar_work+lookback , nonpar_work , nonpar_work+lookback ) ;
+               }
+            covar[i*npred+i] = 1.0 ;
+            }
+         }
+
+      // Else not nonparametric, so compute means and covariances, then correlation
+
+      else {
+         for (i=0 ; i<npred ; i++)
+            means[i] = 0.0 ;
+
+         for (i=0 ; i<lookback ; i++) {
+            dptr = database + n_vars * (icase - i) ;  // Point to this case in database
+            for (j=0 ; j<npred ; j++)
+               means[j] += dptr[preds[j]] ;
+            }
+
+         for (j=0 ; j<npred ; j++)
+            means[j] /= lookback ;
+
+         for (i=0 ; i<npred ; i++) {
+            for (j=0 ; j<=i ; j++)
+               covar[i*npred+j] = 0.0 ;
+            }
+
+         for (i=0 ; i<lookback ; i++) {
+            dptr = database + n_vars * (icase - i) ;  // Point to this case in database
+            for (j=0 ; j<npred ; j++) {
+               diff = dptr[preds[j]] - means[j] ;
+               for (k=0 ; k<=j ; k++) {
+                  diff2 = dptr[preds[k]] - means[k] ;
+                  covar[j*npred+k] += diff * diff2 ;
+                  }
+               }
+            }
+
+         for (j=0 ; j<npred ; j++) {
+            for (k=0 ; k<=j ; k++)
+               covar[j*npred+k] /= lookback ;
+            }
+
+         // Convert covariances to correlation
+         for (j=1 ; j<npred ; j++) {
+            for (k=0 ; k<j ; k++)
+               covar[j*npred+k] /= sqrt ( covar[j*npred+j] * covar[k*npred+k] ) ;
+            }
+
+         for (j=0 ; j<npred ; j++)
+            covar[j*npred+j] = 1.0 ;
+
+         } // Else not nonpar, so compute means and covar, correlation
+
+/*
+   Compute eigenvalues and fill in 'val' which we will display
+*/
+
+      evec_rs ( covar , npred , 0 , evects , evals , workv ) ;
+
+      factor = 0.5 * (npred - 1) ;
+      sum = total = 0.0 ;
+      for (i=0 ; i<npred ; i++) {
+         total += evals[i] ;
+         sum += (factor - i) * evals[i] / factor ;
+         }
+
+      // Compute the criterion
+      sum /= total ;
+      val[icase-lookback+1] = sum ;
+
+      if (val[icase-lookback+1] > maxval)
+         maxval = val[icase-lookback+1] ;
+      if (val[icase-lookback+1] < minval)
+         minval = val[icase-lookback+1] ;
+      meanval += val[icase-lookback+1] ;
+
+      } // For all cases
+
+   meanval /= n_cases - lookback + 1 ;
+
+
+/*
+   Print summary
+*/
+
+   audit ( "" ) ;
+   sprintf_s ( msg, "Mean coherence = %.5lf", meanval ) ;
+   audit ( msg ) ;
+   sprintf_s ( msg, "Min = %.5lf", minval ) ;
+   audit ( msg ) ;
+   sprintf_s ( msg, "Max = %.5lf", maxval ) ;
+   audit ( msg ) ;
+   audit ( "" ) ;
+   sprintf_s ( msg, "Coherence values have been written to %s", coherence_log ) ;
+   audit ( msg ) ;
+
+   MEMTEXT ( "AN_COHERENCE: free means, covar, evals, evects, workv (,nonpar_work)" ) ;
+   free ( means ) ;
+   free ( covar ) ;
+   free ( evals ) ;
+   free ( evects ) ;
+   free ( workv ) ;
+   if (nonpar_work != NULL)
+      free ( nonpar_work ) ;
+}
+
+AnalyzeCoherenceChild::~AnalyzeCoherenceChild ()
+{
+   MEMTEXT ( "AN_COHERENCE.CPP AnalyzeCoherenceChild destructor" ) ;
+   if (val != NULL)
+      free ( val ) ;
+}
diff --git a/AN_CVARS.TXT b/AN_CVARS.TXT
@@ -0,0 +1,159 @@
+/******************************************************************************/
+/*                                                                            */
+/*  AN_CVARS - AnalyzeClusterVars operations                                  */
+/*                                                                            */
+/******************************************************************************/
+
+int an_cvars (
+   int n_dim ,          // Number of initial dimensions to consider
+   int ngrp_to_print ,  // Start printing when n of groups drops this low
+   int type             // Centroid versus leader method
+   )
+{
+   int i, j, nvars, icand1, icand2, ibest1, ibest2, n_groups ;
+   int *group_id, *n_in_group ;
+   double x, dotprod, length, best_dotprod, *centroids ;
+   char msg[256], msg2[256] ;
+
+   n_groups = npred ;   // Number of groups; initially, every variable is its own group
+   nvars = npred ;      // This name just makes things more clear; no other reason
+
+/*
+   Allocate memory
+*/
+
+   group_id = (int *) malloc ( nvars * sizeof(int) ) ;
+   n_in_group = (int *) malloc ( nvars * sizeof(int) ) ;
+   centroids = (double *) malloc ( nvars * n_dim * sizeof(double) ) ;
+
+/*
+   Initialize; For each variable, make the length of the vector one
+*/
+
+   for (i=0 ; i<nvars ; i++) {
+      group_id[i] = i ;          // For each variable, this is the group to which it belongs
+      n_in_group[i] = 1 ;        // Size of each group
+      length = 0.0 ;             // Will cumulate length of this variable's vector
+      for (j=0 ; j<n_dim ; j++)
+         length += structure[i*nvars+j] * structure[i*nvars+j] ;
+      length = 1.0 / sqrt ( length ) ;
+      for (j=0 ; j<n_dim ; j++)  // Normalize the length of this variable's vector
+         centroids[i*n_dim+j] = length * structure[i*nvars+j] ;
+      }
+
+/*
+   Print normalized factors
+*/
+
+   audit ( "" ) ;
+   audit ( "" ) ;
+   audit ( "Relevant factors, after normalization" ) ;
+   audit ( "" ) ;
+
+   for (i=0 ; i<nvars ; i++) {
+      sprintf_s ( msg, "%15s %8.4lf", var_names[preds[i]], centroids[i*n_dim+0] ) ;
+      for (j=1 ; j<n_dim ; j++) {
+         sprintf_s ( msg2 , "%8.4lf", centroids[i*n_dim+j] ) ;
+         strcat_s ( msg , msg2 ) ;
+         }
+      audit ( msg ) ;
+      }
+   audit ( "" ) ;
+
+/*
+   Hierarchical grouping
+*/
+
+   while (n_groups > 1) {
+      best_dotprod = -1.0 ;
+
+      // Try every pair of groups (icand1 and icand2)
+      for (icand1=0 ; icand1<n_groups-1 ; icand1++) {
+         for (icand2=icand1+1 ; icand2<n_groups ; icand2++) {
+
+            // The distance measure is based on the angle between two variables
+            // Because the two vectors are unit length, their dot product is the cosine of the angle between them.
+            // The negative of a variable is equivalent to the variable, so check both via symmetry
+            dotprod = 0.0 ;
+            for (i=0 ; i<n_dim ; i++)
+               dotprod += centroids[icand1*n_dim+i] * centroids[icand2*n_dim+i] ;
+            dotprod = fabs ( dotprod ) ;   // Handle symmetry
+
+            if (dotprod > best_dotprod) {  // Keep track of the pair with best criterion
+               best_dotprod = dotprod ;
+               ibest1 = icand1 ; 
+               ibest2 = icand2 ;
+               }
+
+            } // For icand2
+         } // For icand1
+
+      // We just found the closest pair.  Merge larger index into smaller.
+
+      if (best_dotprod > 1.0)   // Should never happen, but handle tiny fpt errors
+         best_dotprod = 1.0 ;
+
+      sprintf_s ( msg , "Merged groups %d and %d separated by %.2lf degrees; now have %d groups",
+                  ibest1+1, ibest2+1, acos(best_dotprod)*180.0/PI, n_groups-1 ) ;
+      audit ( msg ) ;
+
+      if (type) {  // Did the user request centroid method?
+         // Recompute the (approximate) centroid of the absorbing (smaller id) group
+         length = 0.0 ;
+         for (j=0 ; j<n_dim ; j++) {
+            x = (n_in_group[ibest1] * centroids[ibest1*n_dim+j] +
+                 n_in_group[ibest2] * centroids[ibest2*n_dim+j]) /
+                (n_in_group[ibest1] + n_in_group[ibest2]) ;
+            centroids[ibest1*n_dim+j] = x ;
+            length += x * x ;
+            }
+         length = 1.0 / sqrt ( length ) ;
+         for (j=0 ; j<n_dim ; j++)
+            centroids[ibest1*n_dim+j] *= length ;  // The length must always be one
+         } // If type is centroid (not leader)
+
+      n_in_group[ibest1] += n_in_group[ibest2] ;  // Group 1 just absorbed group 2
+
+      // Remap the largest and then pull down all groups above largest.
+      for (i=0 ; i<nvars ; i++) {
+         if (group_id[i] == ibest2)  // If this variable was in Group 2
+            group_id[i] = ibest1 ;   // Reclassify it as being in Group 1, the absorbing group
+         if (group_id[i] > ibest2)   // Groups above absorbed group
+            --group_id[i] ;          // Now have to fill in the hole below them
+         }
+
+      for (i=ibest2+1 ; i<n_groups ; i++) {  // Crunch down group stuff above the absorbed group
+         n_in_group[i-1] = n_in_group[i] ;
+         for (j=0 ; j<n_dim ; j++)
+            centroids[(i-1)*n_dim+j] = centroids[i*n_dim+j] ;
+         }
+
+      --n_groups ;    // We just lost a group (ibest2 was absorbed into ibest1)
+
+/*
+   Print group membership info
+*/
+
+      if (n_groups <= ngrp_to_print  &&  n_groups > 1) {
+         audit ( "Group membership..." ) ;
+         for (i=0 ; i<n_groups ; i++) {
+            sprintf_s ( msg , "   Group %d", i+1 ) ;
+            audit ( msg ) ;
+            for (j=0 ; j<nvars ; j++) {
+               if (group_id[j] == i) {
+                  sprintf_s ( msg , "      %s", var_names[preds[j]] ) ;
+                  audit ( msg ) ;
+                  }
+               }
+            }
+         }
+
+      } // while (n_groups > 1)
+
+FINISH:
+   free ( group_id ) ;
+   free ( n_in_group ) ;
+   free ( centroids ) ;
+
+   return 0 ;
+}