Skip to content

Commit

Permalink
Add individual DIMM temperature sensor support
Browse files Browse the repository at this point in the history
Change-Id: If5b024f031d4b266603720d126fce88bf2362e1a
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/36528
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Martha Broyles <mbroyles@us.ibm.com>
Reviewed-by: William A. Bryan <wilbryan@us.ibm.com>
Reviewed-by: Christopher J. Cain <cjcain@us.ibm.com>
  • Loading branch information
cjcain committed Feb 15, 2017
1 parent 7d22139 commit 6ff7b26
Show file tree
Hide file tree
Showing 13 changed files with 130 additions and 96 deletions.
17 changes: 0 additions & 17 deletions src/occ_405/amec/amec_amester.c
Expand Up @@ -754,23 +754,6 @@ uint8_t amester_manual_throttle( const IPMIMsg_t * i_msg,
}


case 24: // parameter 24: set ambient and fan speed sensors
{
// Set ambient temperature (8 bits)
temp16=(uint16_t)i_msg->au8CmdData_ptr[2];
sensor_update(AMECSENSOR_PTR(TEMPAMBIENT), temp16);
// Set average fan speed (16 bits)
temp16=((uint16_t)i_msg->au8CmdData_ptr[3]<<8)+(uint16_t)i_msg->au8CmdData_ptr[4];
sensor_update(AMECSENSOR_PTR(FANSPEEDAVG), temp16);
o_resp[0]=i_msg->au8CmdData_ptr[2];
o_resp[1]=i_msg->au8CmdData_ptr[3];
o_resp[2]=i_msg->au8CmdData_ptr[3];
*io_resp_length=3;
l_rc = COMPCODE_NORMAL;
break;
}


case 29: // parameter 29: Control vector recording modes and stream rates.
{
g_amec->stream_vector_rate=255; // First step is to set an invalid rate so no recording done at all
Expand Down
13 changes: 2 additions & 11 deletions src/occ_405/amec/amec_analytics.c
Expand Up @@ -437,11 +437,6 @@ void amec_analytics_main(void)
// Now, update Group 45 analytics packed array
switch (g_amec->analytics_thermal_offset)
{
case 0:
tempreg = (g_amec->sys.tempambient.sample) << 8; // upper byte
tempreg = tempreg | 0x8000; // Turn on MSBit for temporal frame sync
break;

case 1:
if (g_amec->mst_ips_parms.active == 0)
{
Expand All @@ -459,20 +454,16 @@ void amec_analytics_main(void)
tempreg=(g_amec->mst_ips_parms.active)<<8; // upper byte
break;

case 3:
tempreg = (g_amec->fan.fanspeedavg.sample / 100) << 8; // upper byte (100 RPM resolution)
break;

case 4:
tempreg = (g_amec->proc[0].temp16msdimm.sample) << 8; // upper byte
tempreg = (g_amec->proc[0].tempdimmthrm.sample) << 8; // upper byte
break;

case 5:
tempreg = (g_amec->proc[0].temp2mscent.sample) << 8; // upper byte
break;

case 6:
// tempreg=(g_amec->proc[2].temp16msdimm.sample)<<8; // upper byte
// tempreg=(g_amec->proc[2].tempdimmthrm.sample)<<8; // upper byte
tempreg = 0;
break;

Expand Down
4 changes: 2 additions & 2 deletions src/occ_405/amec/amec_controller.c
Expand Up @@ -176,8 +176,8 @@ void amec_controller_dimm_thermal()
/*------------------------------------------------------------------------*/
/* Code */
/*------------------------------------------------------------------------*/
// Get TEMP16MSDIMM sensor value
l_sensor = getSensorByGsid(TEMP16MSDIMM);
// Get TEMPDIMMTHRM sensor value
l_sensor = getSensorByGsid(TEMPDIMMTHRM);

if(G_dimm_temp_expired_bitmap.bigword)
{
Expand Down
2 changes: 1 addition & 1 deletion src/occ_405/amec/amec_health.c
Expand Up @@ -165,7 +165,7 @@ void amec_health_check_dimm_temp()
}

l_ot_error = g_amec->thermaldimm.ot_error;
l_sensor = getSensorByGsid(TEMP16MSDIMM);
l_sensor = getSensorByGsid(TEMPDIMMTHRM);
l_cur_temp = l_sensor->sample;
l_max_temp = l_sensor->sample_max;
TRAC_ERR("amec_health_check_dimm_temp: DIMM reached error temp[%d]. cur_max[%d], hist_max[%d]",
Expand Down
2 changes: 1 addition & 1 deletion src/occ_405/amec/amec_sensors_centaur.c
Expand Up @@ -415,7 +415,7 @@ void amec_update_centaur_temp_sensors(void)
l_hot = g_amec->proc[0].memctl[k].centaur.tempdimmax.sample;
}
}
sensor_update(&g_amec->proc[0].temp16msdimm,l_hot);
sensor_update(&g_amec->proc[0].tempdimmthrm,l_hot);
AMEC_DBG("HotDimm=%d\n",l_hot);
}

Expand Down
7 changes: 3 additions & 4 deletions src/occ_405/amec/amec_sys.h
Expand Up @@ -95,7 +95,6 @@ typedef struct
typedef struct
{
// Sensors
sensor_t fanspeedavg;
sensor_t pwr250usfan;

} amec_fans_t;
Expand Down Expand Up @@ -358,8 +357,6 @@ typedef struct
typedef struct
{
// System Sensors
sensor_t tempambient;
sensor_t altitude;
sensor_t pwr250us;
sensor_t pwr250usgpu;
sensor_t pwrapssch[MAX_APSS_ADC_CHANNELS];
Expand Down Expand Up @@ -468,8 +465,10 @@ typedef struct

// Memory Summary Sensors
sensor_t temp2mscent;
sensor_t temp16msdimm;
sensor_t tempdimmthrm;
sensor_t memsp2ms_tls;
// Nimbus DIMM Sensors
sensor_t tempdimm[NUM_DIMM_PORTS*NUM_DIMMS_PER_I2CPORT];

sensor_t curvdn;
sensor_t pwrvdd;
Expand Down
5 changes: 2 additions & 3 deletions src/occ_405/dcom/dcom.h
Expand Up @@ -134,8 +134,7 @@ typedef struct __attribute__ ((packed))
// From APSS Power Measurement
uint16_t adc[MAX_APSS_ADC_CHANNELS]; // [2] - 32 bytes
uint16_t gpio[MAX_APSS_GPIO_PORTS]; // [34] - 4 bytes
uint16_t ambient_temp; // [38] - 2 bytes
uint16_t altitude; // [40] - 2 bytes
uint32_t reserved2; // [38] - 4 bytes
uint8_t tod[ TOD_SIZE ]; // [42] - 6 bytes

// Manufacturing parameters
Expand Down Expand Up @@ -207,7 +206,7 @@ typedef struct __attribute__ ((packed))
uint16_t pwrpx250usp0cy[MAX_CORES]; // [260]
uint16_t todclock[NUM_TOD_SENSORS]; // [308]
uint16_t temp2mscent; // [314]
uint16_t temp16msdimm; // [316]
uint16_t tempdimmthrm; // [316]
uint16_t util4msp0; // [318]
uint16_t ips4msp0; // [320]
uint16_t nutil3sp0cy[MAX_CORES]; // [322]
Expand Down
46 changes: 17 additions & 29 deletions src/occ_405/dimm/dimm.c
Expand Up @@ -50,18 +50,14 @@ extern bool G_mem_monitoring_allowed;
extern memory_control_task_t G_memory_control_task;

uint8_t G_dimm_state = DIMM_STATE_INIT; // Curret state of DIMM state machine
uint8_t G_maxDimmPorts = NUM_DIMM_PORTS;
// G_maxDimmPort is the maximum I2C port number (1 indicates port 0 and 1 are valid)
uint8_t G_maxDimmPort = NUM_DIMM_PORTS - 1;

bool G_dimm_i2c_reset_required = false;
uint32_t G_dimm_i2c_reset_cause = 0;

#define MAX_CONSECUTIVE_DIMM_RESETS 1

// On Nimbus, we are using the centaur number as the I2C port (keep same structure)
// There can be 8 DIMMs under a Centaur and 8 DIMMs per I2C port (max of 2 ports)
// DIMM code assumed that NUM_DIMMS_PER_I2CPORT == NUM_DIMMS_PER_CENTAUR
#define NUM_DIMMS_PER_I2CPORT 8

typedef struct {
bool disabled;
uint8_t errorCount;
Expand All @@ -72,6 +68,8 @@ dimmData_t G_dimm[NUM_DIMM_PORTS][NUM_DIMMS_PER_I2CPORT] = {{{false,0}}};
#define MAX_TICK_COUNT_WAIT 2

#define DIMM_AND_PORT ((G_dimm_sm_args.i2cPort<<8) | G_dimm_sm_args.dimm)
#define DIMM_INDEX(port,dimm) ((port*NUM_DIMMS_PER_I2CPORT)+dimm)

// GPE Requests
GpeRequest G_dimm_sm_request;

Expand Down Expand Up @@ -193,7 +191,7 @@ void memory_nimbus_init()
* @reasoncode SSX_GENERIC_FAILURE
* @userdata1 l_rc_gpe - Return code of failing function
* @userdata2 0
* @userdata4 ERC_CENTAUR_GPE_REQUEST_CREATE_FAILURE
* @userdata4 OCC_NO_EXTENDED_RC
* @devdesc Failed to initialize GPE1 DIMM IPC job
*/
l_err = createErrl(
Expand All @@ -217,34 +215,21 @@ void memory_nimbus_init()
void update_hottest_dimm()
{
// Find/save the hottest DIMM temperature for the last set of readings
uint8_t hottest = 0, hottest_loc = 0;
uint8_t hottest = 0;
int pIndex, dIndex;
for (pIndex = 0; pIndex < G_maxDimmPorts; ++pIndex)
for (pIndex = 0; pIndex < NUM_DIMM_PORTS; ++pIndex)
{
for (dIndex = 0; dIndex < NUM_DIMMS_PER_I2CPORT; ++dIndex)
{
if (g_amec->proc[0].memctl[pIndex].centaur.dimm_temps[dIndex].cur_temp > hottest)
{
hottest = g_amec->proc[0].memctl[pIndex].centaur.dimm_temps[dIndex].cur_temp;
hottest_loc = (pIndex*8) + dIndex;
}
}
}

DIMM_DBG("update_hottest_dimm: hottest DIMM temp for this sample: %dC (loc=%d)", hottest, hottest_loc);
if(hottest > g_amec->proc[0].memctl[0].centaur.tempdimmax.sample_max)
{
// Save hottest DIMM location ever sampled. There is no location for the temp16msdimm
// sensor, so just store it in memctl[0] location.
DIMM_DBG("update_hottest_dimm: Hottest DIMM ever sampled was DIMM%d %dC (prior %dC)",
hottest_loc, hottest, g_amec->proc[0].memctl[0].centaur.tempdimmax.sample_max);
// Store the hottest DIMM location in locdimmax sensor
sensor_update(&g_amec->proc[0].memctl[0].centaur.locdimmax, hottest_loc);
}
// Store the hottest DIMM temp in tempdimmax sensor
sensor_update(&g_amec->proc[0].memctl[0].centaur.tempdimmax, hottest);
// Store the hottest DIMM temp in temp16msdimm sensor
sensor_update(&g_amec->proc[0].temp16msdimm, hottest);
// Store the hottest DIMM temp sensor
sensor_update(AMECSENSOR_PTR(TEMPDIMMTHRM), hottest);
}


Expand Down Expand Up @@ -324,7 +309,7 @@ void mark_dimm_failed()
G_sysConfigData.dimm_huids[port][dimm],
ERRL_CALLOUT_PRIORITY_HIGH);
//Mark DIMM as logged so we don't log it again
amec_mem_mark_logged(port, dimm,
amec_mem_mark_logged(0, dimm,
&G_cent_timeout_logged_bitmap,
&G_dimm_timeout_logged_bitmap.bytes[port]);
commitErrl(&l_err);
Expand Down Expand Up @@ -507,7 +492,7 @@ uint8_t dimm_reset_sm()
case DIMM_STATE_RESET_SLAVE_P0_COMPLETE:
if (schedule_dimm_req(DIMM_STATE_RESET_SLAVE_P0_COMPLETE, L_new_dimm_args))
{
if (G_maxDimmPorts > 1)
if (G_maxDimmPort > 0)
{
nextState = DIMM_STATE_RESET_SLAVE_P1;
}
Expand Down Expand Up @@ -718,6 +703,9 @@ void process_dimm_temp()
}

l_fru->cur_temp = l_dimm_temp;
// Store DIMM temp in sensor
sensor_update(&g_amec->proc[0].tempdimm[DIMM_INDEX(port, dimm)], l_dimm_temp);

G_dimm[port][dimm].errorCount = 0;

} // end process_dimm_temp()
Expand Down Expand Up @@ -843,10 +831,10 @@ void task_dimm_sm(struct task *i_self)
{
case DIMM_STATE_INIT:
// Save max I2C ports
if (G_maxDimmPorts != G_dimm_sm_args.maxPorts)
if (G_maxDimmPort != G_dimm_sm_args.maxPorts)
{
G_maxDimmPorts = G_dimm_sm_args.maxPorts;
DIMM_DBG("task_dimm_sm: updating DIMM Max I2C Ports to %d", G_maxDimmPorts);
G_maxDimmPort = G_dimm_sm_args.maxPorts;
DIMM_DBG("task_dimm_sm: updating DIMM Max I2C Port to %d", G_maxDimmPort);
}
break;

Expand Down
5 changes: 5 additions & 0 deletions src/occ_405/dimm/dimm.h
Expand Up @@ -56,6 +56,11 @@ extern uint16_t G_configured_mbas;


#define NUM_DIMM_PORTS 2
// On Nimbus, we are using the centaur number as the I2C port (keep same structure)
// There can be 8 DIMMs under a Centaur and 8 DIMMs per I2C port (max of 2 ports)
// DIMM code assumed that NUM_DIMMS_PER_I2CPORT == NUM_DIMMS_PER_CENTAUR
#define NUM_DIMMS_PER_I2CPORT 8


#define DIMM_TICK (CURRENT_TICK % MAX_NUM_TICKS)

Expand Down
25 changes: 21 additions & 4 deletions src/occ_405/sensor/sensor_enum.h
Expand Up @@ -76,14 +76,11 @@ enum e_gsid
// ------------------------------------------------------
// System Sensors
// ------------------------------------------------------
TEMPAMBIENT, // Ambient Temp of System (from APSS)
ALTITUDE, // Altitude of System (from APSS)
PWR250US, // System DC Power (from APSS)
PWR250USFAN, // Fan Power (from APSS)
PWR250USIO, // IO Subsystem Power (from APSS)
PWR250USSTORE, // Storage Subsys Power (from APSS)
PWRGPU, // GPU Subsystem Power (from APSS) e.g. Nvidia GPU
FANSPEEDAVG, // Average Fan Speed (from DPSS)
PWRAPSSCH0, // These PWRAPSSCH sensors are used to report the power
PWRAPSSCH1, // provided by each of the 16 APSS channels.
PWRAPSSCH2,
Expand Down Expand Up @@ -495,6 +492,8 @@ enum e_gsid
MWR2MSP0M6,
MWR2MSP0M7,

// TODO: RTC 163359 - Determine if we want to store individual DIMM temps for CENTAUR
// or continue to use max DIMM temp/location under each CENTAUR.
TEMPDIMMAXP0M0,
TEMPDIMMAXP0M1,
TEMPDIMMAXP0M2,
Expand All @@ -513,6 +512,24 @@ enum e_gsid
LOCDIMMAXP0M6,
LOCDIMMAXP0M7,

// Individual DIMM temperatures (NIMBUS)
TEMPDIMM00,
TEMPDIMM01,
TEMPDIMM02,
TEMPDIMM03,
TEMPDIMM04,
TEMPDIMM05,
TEMPDIMM06,
TEMPDIMM07,
TEMPDIMM08,
TEMPDIMM09,
TEMPDIMM10,
TEMPDIMM11,
TEMPDIMM12,
TEMPDIMM13,
TEMPDIMM14,
TEMPDIMM15,

// ------------------------------------------------------
// Centaur Sensors - 8 MemC/Proc - 1 Cent/MemC - 2 PP/Cent
// ------------------------------------------------------
Expand Down Expand Up @@ -705,7 +722,7 @@ enum e_gsid
MLP2P0M7,

TEMP2MSCENT,
TEMP16MSDIMM,
TEMPDIMMTHRM,
MEMSP2MS,

// ------------------------------------------------------
Expand Down

0 comments on commit 6ff7b26

Please sign in to comment.