Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Count flops recursively #420

Merged
merged 11 commits into from
Nov 26, 2021
Merged

Count flops recursively #420

merged 11 commits into from
Nov 26, 2021

Conversation

IgorBaratta
Copy link
Member

@IgorBaratta IgorBaratta commented Nov 16, 2021

The functionality is not exposed, but can be used by other libraries or functions that use cnodes.

FLOPS:  0
// Quadrature rules

FLOPS:  0
static const double weights_48e[3] = { 0.16666666666666666, 0.16666666666666666, 0.16666666666666666 };

FLOPS:  0
// Precomputed values of basis functions and precomputations

FLOPS:  0
// FE* dimensions: [permutation][entities][points][dofs]

FLOPS:  0
static const double FE4_C0_D10_Q48e[1][1][1][3] = { { { { -1.0, 1.0, 0.0 } } } };

FLOPS:  0
static const double FE4_C1_D01_Q48e[1][1][1][3] = { { { { -1.0, 0.0, 1.0 } } } };

FLOPS:  0
static const double FE8_C0_D01_Q48e[1][1][3][6] =
    { { { { -1.6666666666666667, 0.0, -0.33333333333333337, 0.6666666666666666, 2.0, -0.6666666666666671 },
          { 0.33333333333333287, 0.0, 1.6666666666666663, 0.6666666666666667, -1.9999999999999998, -0.6666666666666664 },
          { 0.33333333333333276, 0.0, -0.33333333333333337, 2.6666666666666665, 0.0, -2.666666666666667 } } } };

FLOPS:  0
static const double FE8_C0_D10_Q48e[1][1][3][6] =
    { { { { -1.6666666666666667, -0.3333333333333333, 0.0, 0.6666666666666666, -0.6666666666666663, 2.0 },
          { 0.3333333333333327, -0.33333333333333315, 0.0, 2.666666666666667, -2.6666666666666665, 0.0 },
          { 0.33333333333333276, 1.666666666666667, 0.0, 0.6666666666666662, -0.666666666666667, -1.9999999999999996 } } } };

FLOPS:  0
// Quadrature loop independent computations for quadrature rule 48e

FLOPS:  5
const double J_c0 = coordinate_dofs[0] * FE4_C0_D10_Q48e[0][0][0][0] + coordinate_dofs[3] * FE4_C0_D10_Q48e[0][0][0][1] + coordinate_dofs[6] * FE4_C0_D10_Q48e[0][0][0][2];

FLOPS:  5
const double J_c3 = coordinate_dofs[1] * FE4_C1_D01_Q48e[0][0][0][0] + coordinate_dofs[4] * FE4_C1_D01_Q48e[0][0][0][1] + coordinate_dofs[7] * FE4_C1_D01_Q48e[0][0][0][2];

FLOPS:  5
const double J_c1 = coordinate_dofs[0] * FE4_C1_D01_Q48e[0][0][0][0] + coordinate_dofs[3] * FE4_C1_D01_Q48e[0][0][0][1] + coordinate_dofs[6] * FE4_C1_D01_Q48e[0][0][0][2];

FLOPS:  5
const double J_c2 = coordinate_dofs[1] * FE4_C0_D10_Q48e[0][0][0][0] + coordinate_dofs[4] * FE4_C0_D10_Q48e[0][0][0][1] + coordinate_dofs[7] * FE4_C0_D10_Q48e[0][0][0][2];

FLOPS:  0
double sp_48e[20];

FLOPS:  1
sp_48e[0] = J_c0 * J_c3;

FLOPS:  1
sp_48e[1] = J_c1 * J_c2;

FLOPS:  2
sp_48e[2] = sp_48e[0] + -1 * sp_48e[1];

FLOPS:  1
sp_48e[3] = J_c0 / sp_48e[2];

FLOPS:  2
sp_48e[4] = (-1 * J_c1) / sp_48e[2];

FLOPS:  1
sp_48e[5] = sp_48e[3] * sp_48e[3];

FLOPS:  1
sp_48e[6] = sp_48e[3] * sp_48e[4];

FLOPS:  1
sp_48e[7] = sp_48e[4] * sp_48e[4];

FLOPS:  1
sp_48e[8] = J_c3 / sp_48e[2];

FLOPS:  2
sp_48e[9] = (-1 * J_c2) / sp_48e[2];

FLOPS:  1
sp_48e[10] = sp_48e[9] * sp_48e[9];

FLOPS:  1
sp_48e[11] = sp_48e[8] * sp_48e[9];

FLOPS:  1
sp_48e[12] = sp_48e[8] * sp_48e[8];

FLOPS:  1
sp_48e[13] = sp_48e[5] + sp_48e[10];

FLOPS:  1
sp_48e[14] = sp_48e[6] + sp_48e[11];

FLOPS:  1
sp_48e[15] = sp_48e[12] + sp_48e[7];

FLOPS:  1
sp_48e[16] = fabs(sp_48e[2]);

FLOPS:  1
sp_48e[17] = sp_48e[13] * sp_48e[16];

FLOPS:  1
sp_48e[18] = sp_48e[14] * sp_48e[16];

FLOPS:  1
sp_48e[19] = sp_48e[15] * sp_48e[16];

FLOPS:  3 * ((3 + 6 * 6) + 6 * (6 * 4))
for (int iq = 0; iq < 3; ++iq)
{
    const double fw0 = sp_48e[19] * weights_48e[iq];
    const double fw1 = sp_48e[18] * weights_48e[iq];
    const double fw2 = sp_48e[17] * weights_48e[iq];
    double t0[6];
    double t1[6];
    for (int i = 0; i < 6; ++i)
    {
        t0[i] = fw0 * FE8_C0_D10_Q48e[0][0][iq][i] + fw1 * FE8_C0_D01_Q48e[0][0][iq][i];
        t1[i] = fw1 * FE8_C0_D10_Q48e[0][0][iq][i] + fw2 * FE8_C0_D01_Q48e[0][0][iq][i];
    }
    for (int i = 0; i < 6; ++i)
        for (int j = 0; j < 6; ++j)
            A[6 * i + j] += FE8_C0_D10_Q48e[0][0][iq][j] * t0[i] + FE8_C0_D01_Q48e[0][0][iq][j] * t1[i];
}

TOTAL: 592

@IgorBaratta IgorBaratta merged commit df22e51 into main Nov 26, 2021
@IgorBaratta IgorBaratta deleted the igor/count-flops branch November 26, 2021 16:22
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants