@@ -598,6 +598,26 @@ static bool ggml_gallocr_is_allocated(ggml_gallocr_t galloc, struct ggml_tensor
598598    return  t -> data  !=  NULL  ||  ggml_gallocr_hash_get (galloc , t )-> allocated ;
599599}
600600
601+ // free the extra space at the end if the new tensor is smaller 
602+ static  void  ggml_gallocr_free_extra_space (ggml_gallocr_t  galloc , struct  ggml_tensor  *  node , struct  ggml_tensor  *  parent ) {
603+     struct  hash_node  *  hn  =  ggml_gallocr_hash_get (galloc , node );
604+     struct  hash_node  *  p_hn  =  ggml_gallocr_hash_get (galloc , parent );
605+ 
606+     size_t  parent_size  =  ggml_backend_buft_get_alloc_size (galloc -> bufts [p_hn -> buffer_id ], parent );
607+     size_t  node_size  =  ggml_backend_buft_get_alloc_size (galloc -> bufts [hn -> buffer_id ], node );
608+ 
609+     GGML_ASSERT (parent_size  >= node_size );
610+ 
611+     if  (parent_size  >  node_size ) {
612+         struct  ggml_dyn_tallocr  *  p_alloc  =  galloc -> buf_tallocs [p_hn -> buffer_id ];
613+         struct  buffer_address  p_addr  =  p_hn -> addr ;
614+         p_addr .offset  +=  node_size ;
615+         size_t  extra_size  =  parent_size  -  node_size ;
616+         AT_PRINTF ("freeing extra %zu bytes from parent %s for %s\n" , extra_size , parent -> name , node -> name );
617+         ggml_dyn_tallocr_free_tensor (p_alloc , p_addr , extra_size , parent );
618+     }
619+ }
620+ 
601621static  void  ggml_gallocr_allocate_node (ggml_gallocr_t  galloc , struct  ggml_tensor  *  node , int  buffer_id ) {
602622    GGML_ASSERT (buffer_id  >= 0 );
603623    struct  hash_node  *  hn  =  ggml_gallocr_hash_get (galloc , node );
@@ -643,13 +663,15 @@ static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor
643663                            hn -> addr  =  p_hn -> addr ;
644664                            p_hn -> allocated  =  false; // avoid freeing the parent 
645665                            view_src_hn -> allocated  =  false;
666+                             ggml_gallocr_free_extra_space (galloc , node , view_src );
646667                            return ;
647668                        }
648669                    } else  {
649670                        AT_PRINTF ("reusing parent %s for %s\n" , parent -> name , node -> name );
650671                        hn -> buffer_id  =  p_hn -> buffer_id ;
651672                        hn -> addr  =  p_hn -> addr ;
652673                        p_hn -> allocated  =  false; // avoid freeing the parent 
674+                         ggml_gallocr_free_extra_space (galloc , node , parent );
653675                        return ;
654676                    }
655677                }
0 commit comments