@@ -430,6 +430,7 @@ encapsulated package HpcOmMemory
430430 list< PartlyFilledCacheLine > partlyFilledCacheLines; // cache lines that are shared between all threads -- SHARED variables -- but not fully filled
431431 algorithm
432432 print("createCacheMapLevelFixedOptimized: started \n " );
433+ printNodeSimCodeVarMapping(iNodeSimCodeVarMapping);
433434 cacheMap := CACHEMAP (iCacheLineSize,{},{});
434435 scVarCLMapping := arrayCreate(arrayLength(iAllSCVarsMapping),(-1 ,-1 ));
435436 numCL := 0 ;
@@ -438,7 +439,7 @@ encapsulated package HpcOmMemory
438439 cacheMapMeta := CACHEMAPMETA (iAllSCVarsMapping, iSimCodeVarTypes, scVarCLMapping);
439440 // Iterate over levels
440441 ((threadCacheLines,partlyFilledCacheLines,cacheMap as CACHEMAP (cacheVariables= cacheVariables, cacheLinesFloat= cacheLinesFloat),cacheMapMeta,numCL,_)) := List . fold(iTasksOfLevels, function createCacheMapLevelFixedOptimized0(iTaskGraph= iTaskGraph, iTaskGraphMeta= iTaskGraphMeta, iNumberOfThreads= iNumberOfThreads, iSchedulerInfo= iSchedulerInfo, iNodeSimCodeVarMapping= iNodeSimCodeVarMapping), (threadCacheLines, partlyFilledCacheLines,cacheMap,cacheMapMeta,numCL,1 ));
441- cacheLinesFloat := List . map(partlyFilledCacheLines, getCacheLineMapOfPartlyFilledCacheLine);
442+ cacheLinesFloat := listAppend(cacheLinesFloat, List . map(partlyFilledCacheLines, getCacheLineMapOfPartlyFilledCacheLine) );
442443 oCacheMap := CACHEMAP (iCacheLineSize, cacheVariables, Array . fold(threadCacheLines, listAppend, cacheLinesFloat));
443444 CACHEMAPMETA (scVarCLMapping= oScVarCLMapping) := cacheMapMeta;
444445 printCacheMap(oCacheMap);
@@ -474,22 +475,6 @@ encapsulated package HpcOmMemory
474475 CACHEMAP (cacheLinesFloat= cacheLinesFloat,cacheLineSize= cacheLineSize) := cacheMap;
475476 // print("createCacheMapLevelFixedOptimized0: Handling new level. Shared CL: " + stringDelimitList(List.map(sharedCacheLines,intString), ",") + " Number of CL: " + intString(numCL) + "\n");
476477 ((cacheMap,cacheMapMeta,createdCL,partlyFilledCacheLines)) := List . fold(getTaskListTasks(iLevelTasks), function createCacheMapLevelFixedOptimizedForTask(iTaskGraph= iTaskGraph, iTaskGraphMeta= iTaskGraphMeta, iSchedulerInfo= iSchedulerInfo, iNumberOfThreads= iNumberOfThreads, iLevel= level, iNodeSimCodeVarMapping = iNodeSimCodeVarMapping, iThreadCacheLines = threadCacheLines), (cacheMap,cacheMapMeta,numCL,partlyFilledCacheLines));
477- /*
478- availableCLold := List.setDifferenceIntN(allCL,cacheLinesPrevLevel,numCL);
479- //append free space to available cache lines and remove full cache lines
480- detailedCacheLineInfo := createDetailedCacheMapInformations(availableCLold, cacheLinesFloat, cacheLineSize);
481- detailedCacheLineInfo := listReverse(detailedCacheLineInfo);
482- //print("createCacheMapLevelOptimized0: clCandidates: " + stringDelimitList(List.map(List.map(detailedCacheLineInfo,Util.tuple21),intString), ",") + "\n");
483- availableCL := List.map(detailedCacheLineInfo, Util.tuple21);
484- //append the used cachelines to the writtenCL-list
485- //print("createCacheMapLevelOptimized0: New cacheLines created: " + intString(createdCL) + "\n");
486- writtenCL := List.setDifferenceIntN(availableCLold,availableCL,numCL);
487- //print("createCacheMapLevelOptimized0: Written CL_0: " + stringDelimitList(List.map(writtenCL,intString), ",") + " -- numCL: " + intString(numCL) + "\n");
488- writtenCL := listAppend(writtenCL, if intLe(numCL+1, numCL+createdCL) then List.intRange2(numCL+1, numCL+createdCL) else {});
489- //print("createCacheMapLevelOptimized0: Written CL_1: " + stringDelimitList(List.map(writtenCL,intString), ",") + "\n");
490- //print("======================================\n");
491- //printCacheMap(cacheMap);
492- //print("======================================\n"); */
493478 oInfo := (threadCacheLines,partlyFilledCacheLines,cacheMap,cacheMapMeta,createdCL,level+ 1 );
494479 end createCacheMapLevelFixedOptimized0;
495480
@@ -511,22 +496,25 @@ encapsulated package HpcOmMemory
511496 CacheMapMeta cacheMapMeta;
512497 tuple< CacheMap ,CacheMapMeta ,Integer ,list< PartlyFilledCacheLine >> tmpInfo;
513498 Integer threadIdx, varType, numNewCL;
499+ array< Option < SimCodeVar . SimVar >> allSCVarsMapping;
514500 list< PartlyFilledCacheLine > partlyFilledCacheLines; // map each non full Cachline to: PrefetchLevel, WriteLevel (LevelIdx, ThreadIdx)
515501 algorithm
516502 oInfo := match(iTask, iTaskGraph, iTaskGraphMeta, iSchedulerInfo, iNumberOfThreads, iNodeSimCodeVarMapping, iInfo)
517- case (HpcOmSimCode . CALCTASK_LEVEL (nodeIdc= nodeIdc,threadIdx= SOME (threadIdx)),_,_,_,_,_,(cacheMap,cacheMapMeta,numNewCL,partlyFilledCacheLines))
503+ case (HpcOmSimCode . CALCTASK_LEVEL (nodeIdc= nodeIdc,threadIdx= SOME (threadIdx)),_,_,_,_,_,(cacheMap,cacheMapMeta as CACHEMAPMETA (allSCVarsMapping = allSCVarsMapping) ,numNewCL,partlyFilledCacheLines))
518504 equation
519- // print("\t\tcreateCacheMapLevelFixedOptimizedForTask: handling task with node-indices: " + stringDelimitList(List.map(nodeIdc, intString), ",") + "\n");
505+ print(" \t\t createCacheMapLevelFixedOptimizedForTask: handling task with node-indices: " + stringDelimitList(List . map(nodeIdc, intString), "," ) + " \n " );
520506 // Get successor tasks
521507 successorTasks = List . flatten(List . map(nodeIdc, function arrayGet(arr= iTaskGraph)));
522508 nodeVars = List . flatten(List . map(nodeIdc, function arrayGet(arr= iNodeSimCodeVarMapping)));
523509 nodeVars = List . sortedUnique(nodeVars,intEq);
524510 varType = getCacheLineVarTypeBySuccessorList(successorTasks, iSchedulerInfo, iNumberOfThreads, threadIdx);
525511 if (intEq(varType,1 )) then
526- print(" \t\t\t createCacheMapLevelFixedOptimizedForTask: Handling variables " + stringDelimitList(List . map(nodeVars, intString), "," ) + " as THREAD_ONLY \n " );
512+ print(" \t\t\t createCacheMapLevelFixedOptimizedForTask: Handling variables " + stringDelimitList(List . map(nodeVars, intString), "," ) + " as THREAD_ONLY by Thread " + intString(threadIdx) + " \n " );
513+ print(" \t\t\t " + stringDelimitList(List . map(nodeVars, function dumpScVarsByIdx(iAllSCVarsMapping= allSCVarsMapping)), " \n\t\t\t " ) + " \n " );
527514 ((cacheMap,cacheMapMeta,numNewCL)) = addFixedLevelVarToThreadCL(nodeVars,threadIdx,iThreadCacheLines,(cacheMap,cacheMapMeta,numNewCL));
528515 else
529- print(" \t\t\t createCacheMapLevelFixedOptimizedForTask: Handling variables " + stringDelimitList(List . map(nodeVars, intString), "," ) + " as SHARED \n " );
516+ print(" \t\t\t createCacheMapLevelFixedOptimizedForTask: Handling variables " + stringDelimitList(List . map(nodeVars, intString), "," ) + " as SHARED by Thread " + intString(threadIdx) + " \n " );
517+ print(" \t\t\t " + stringDelimitList(List . map(nodeVars, function dumpScVarsByIdx(iAllSCVarsMapping= allSCVarsMapping)), " \n\t\t\t " ) + " \n " );
530518 ((cacheMap,cacheMapMeta,numNewCL,partlyFilledCacheLines)) = addFixedLevelVarToSharedCL(nodeVars,threadIdx,iLevel,(cacheMap,cacheMapMeta,numNewCL,partlyFilledCacheLines));
531519 print(" \t\t\t createCacheMapLevelFixedOptimizedForTask: Number of partly filled CLs: " + intString(listLength(partlyFilledCacheLines)) + " \n " );
532520 end if ;
@@ -714,7 +702,7 @@ encapsulated package HpcOmMemory
714702 algorithm
715703 oInfo := match(iMatchedCacheLine, iThreadIdx, iVarIdx, iLevelIdx, iInfo)
716704 case (SOME ((partlyFilledCacheLine as PARTLYFILLEDCACHELINE (cacheLineMap, prefetchLevel, writeLevel),listIndex)),_,_,_,(CACHEMAP (cacheLineSize= cacheLineSize,cacheVariables= cacheVariables,cacheLinesFloat= cacheLinesFloat),CACHEMAPMETA (allSCVarsMapping= allSCVarsMapping,simCodeVarTypes= simCodeVarTypes,scVarCLMapping= scVarCLMapping),numNewCL,partlyFilledCLs))
717- equation
705+ equation // this case is used if the partly filled cache line has enough space to store the variable
718706 CACHELINEMAP (idx,numBytesFree,entries) = cacheLineMap;
719707 ((varType,varSize)) = arrayGet(simCodeVarTypes, iVarIdx);
720708 numBytesFree = numBytesFree - varSize;
@@ -733,25 +721,30 @@ encapsulated package HpcOmMemory
733721
734722 scVarCLMapping = arrayUpdate(scVarCLMapping, iVarIdx, (idx,varType));
735723
736- if (intEq(numBytesFree - varSize , 0 )) then // CL is now full - remove it from partly filled CL list and at it to cachemap
724+ if (intEq(numBytesFree, 0 )) then // CL is now full - remove it from partly filled CL list and at it to cachemap
737725 partlyFilledCLs = listDelete(partlyFilledCLs, listIndex);
726+ print("addFixedLevelVarToSharedCL0: Cache line with index " + intString(idx) + " is now fully filled \n " );
738727 cacheLinesFloat = cacheLineMap::cacheLinesFloat;
728+ numNewCL = numNewCL - 1 ;
729+ else
730+ partlyFilledCLs = List . set(partlyFilledCLs, listIndex, partlyFilledCacheLine);
739731 end if ;
732+ print("addFixedLevelVarToSharedCL0: Used existing cache line with index " + intString(idx) + " to store the variable \n " );
740733 then ((CACHEMAP (cacheLineSize,cacheVariables,cacheLinesFloat),CACHEMAPMETA (allSCVarsMapping,simCodeVarTypes,scVarCLMapping),numNewCL,partlyFilledCLs));
741734 case (NONE (),_,_,_,(CACHEMAP (cacheLineSize= cacheLineSize,cacheVariables= cacheVariables,cacheLinesFloat= cacheLinesFloat),CACHEMAPMETA (allSCVarsMapping= allSCVarsMapping,simCodeVarTypes= simCodeVarTypes,scVarCLMapping= scVarCLMapping),numNewCL,partlyFilledCLs))
742735 equation
743736 ((varType,varSize)) = arrayGet(simCodeVarTypes, iVarIdx);
744737
745738 numNewCL = numNewCL + 1 ;
746739 idx = listLength(cacheLinesFloat) + numNewCL;
747- numBytesFree = cacheLineSize;
740+ numBytesFree = cacheLineSize - varSize ;
748741 entries = {};
749742 prefetchLevel = {};
750743 writeLevel = {};
751744
752745 SOME (cacheVariable) = arrayGet(allSCVarsMapping, iVarIdx);
753746 cacheVariables = cacheVariable::cacheVariables;
754- entry = CACHELINEENTRY (cacheLineSize - numBytesFree , varType, varSize, listLength(cacheVariables));
747+ entry = CACHELINEENTRY (0 , varType, varSize, listLength(cacheVariables));
755748 cacheLineMap = CACHELINEMAP (idx,numBytesFree,entry::entries);
756749
757750 if (intGt(iLevelIdx - 1 , 0 )) then
@@ -768,7 +761,7 @@ encapsulated package HpcOmMemory
768761 else // Add new CL as partly filled CL
769762 partlyFilledCLs = partlyFilledCacheLine::partlyFilledCLs;
770763 end if ;
771- print("addFixedLevelVarToSharedCL0: New CL added \n " );
764+ print("addFixedLevelVarToSharedCL0: Created a new cache line with index " + intString(idx) + " to store the variable \n " );
772765 then ((CACHEMAP (cacheLineSize,cacheVariables,cacheLinesFloat),CACHEMAPMETA (allSCVarsMapping,simCodeVarTypes,scVarCLMapping),numNewCL,partlyFilledCLs));
773766 end match;
774767 end addFixedLevelVarToSharedCL0;
@@ -1883,6 +1876,25 @@ encapsulated package HpcOmMemory
18831876 print("Scc " + intString(iIdx) + " is solved by node " + intString(iMappingEntry) + " \n " );
18841877 oIdx := iIdx + 1 ;
18851878 end printSccNodeMapping0;
1879+
1880+ protected function dumpScVarsByIdx
1881+ input Integer iSimCodeVarIdx;
1882+ input array< Option < SimCodeVar . SimVar >> iAllSCVarsMapping;
1883+ output String oString;
1884+ protected
1885+ String tmpString;
1886+ SimCodeVar . SimVar simVar;
1887+ algorithm
1888+ oString := matchcontinue(iSimCodeVarIdx, iAllSCVarsMapping)
1889+ case (_,_)
1890+ equation
1891+ SOME (simVar) = arrayGet(iAllSCVarsMapping, iSimCodeVarIdx);
1892+ tmpString = dumpSimCodeVar(simVar);
1893+ then tmpString;
1894+ else
1895+ then "NONE" ;
1896+ end matchcontinue;
1897+ end dumpScVarsByIdx;
18861898
18871899 // -------------------------------------------
18881900 // SUSAN
0 commit comments