pygmalion.cpp/examples/pyggy at main · AlpinDale/pygmalion.cpp · GitHub

 struct ggml_tensor * inpFF = cur;

 // feed-forward network
 // this is independent of the self-attention result, so it could be done in parallel to the self-attention
 {
     // note here we pass inpSA instead of cur
     cur = ggml_mul_mat(ctx0,
             ggml_transpose(ctx0, model.layers[il].c_mlp_fc_w),
             inpSA);

     cur = ggml_add(ctx0,
             ggml_repeat(ctx0, model.layers[il].c_mlp_fc_b, cur),
             cur);

     // GELU activation
     cur = ggml_gelu(ctx0, cur);

     // projection
     // cur = proj_w*cur + proj_b
     cur = ggml_mul_mat(ctx0,
             model.layers[il].c_mlp_proj_w_trans,
             cur);

     cur = ggml_add(ctx0,
             ggml_repeat(ctx0, model.layers[il].c_mlp_proj_b, cur),
Name		Name	Last commit message	Last commit date
parent directory ..
CMakeLists.txt		CMakeLists.txt
README.md		README.md
download.sh		download.sh
pyg.cpp		pyg.cpp
quantize.cpp		quantize.cpp
	struct ggml_tensor * inpFF = cur;

	// feed-forward network
	// this is independent of the self-attention result, so it could be done in parallel to the self-attention
	{
	// note here we pass inpSA instead of cur
	cur = ggml_mul_mat(ctx0,
	ggml_transpose(ctx0, model.layers[il].c_mlp_fc_w),
	inpSA);

	cur = ggml_add(ctx0,
	ggml_repeat(ctx0, model.layers[il].c_mlp_fc_b, cur),
	cur);

	// GELU activation
	cur = ggml_gelu(ctx0, cur);

	// projection
	// cur = proj_w*cur + proj_b
	cur = ggml_mul_mat(ctx0,
	model.layers[il].c_mlp_proj_w_trans,
	cur);

	cur = ggml_add(ctx0,
	ggml_repeat(ctx0, model.layers[il].c_mlp_proj_b, cur),