# NB08 - Investigate what's wrong with the current SCCA model

As of now, it seems that the model incorrectly, it seems that the chat still experiences issues with certain kinds of questions, with the main issue being that it references the wrong documents and then says that it does not have the information available (when we know that it does). A few potential issues could be:

 - Incorrect implementation of torch.roll and the overall shifting method
 - Incorrect reshaping back to original tensor dimensions
 - Misalignment of shifted embeddings

## Existing SCCA clas description and generate_json_entry_scca() function, as well as generate_json_entry_without_scca()

In [1]:
import torch
import torch.nn as nn

class ShiftedCrossChunkAttention(nn.Module):
    def __init__(self, embed_dim, num_heads=8, shift_size=1):
        super(ShiftedCrossChunkAttention, self).__init__()
        self.attention = nn.MultiheadAttention(embed_dim=embed_dim, num_heads=num_heads)
        self.shift_size = shift_size

    def shift_key_value(self, embeddings, shift_size):
        # embeddings: Tensor of shape (chunk_size, num_chunks, embed_dim)
        #chunk_size, num_chunks, embed_dim = embeddings.size()
        
        # Shifting keys and values
        shifted_embeddings = torch.roll(embeddings, shifts=shift_size, dims=1)
        return shifted_embeddings

    def forward(self, chunk_embeddings):
        # chunk_embeddings: Tensor of shape (num_chunks, chunk_size, embed_dim)
        #num_chunks, chunk_size, embed_dim = chunk_embeddings.size()
        
        # Reshape for multihead attention
        chunk_embeddings = chunk_embeddings.permute(1, 0, 2)  # (chunk_size, num_chunks, embed_dim)
        
        # Shift keys and values
        shifted_embeddings = self.shift_key_value(chunk_embeddings, self.shift_size)
        
        # Apply attention mechanism across shifted chunks
        attn_output, _ = self.attention(chunk_embeddings, shifted_embeddings, shifted_embeddings)
        
        # Reshape back to original format
        attn_output = attn_output.permute(1, 0, 2)  # (num_chunks, chunk_size, embed_dim)
        
        return attn_output


In [30]:
import os
import re
import hashlib
from PyPDF2 import PdfReader
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
import json
from datetime import datetime


from chatlse.embeddings import compute_text_embedding_sync

import torch



#### Environment variables & Constants ####

# Default is 512 for GTE-large
EMBED_CHUNK_SIZE = os.getenv("EMBED_CHUNK_SIZE")
#  Default is 128 as experimented
EMBED_OVERLAP_SIZE = os.getenv("EMBED_OVERLAP_SIZE")
# Get embedding model
EMBED_MODEL = os.getenv("OLLAMA_EMBED_MODEL")
# Get embedding dimension 
EMBED_DIM = os.getenv("EMBED_DIM")

if not EMBED_MODEL:
    # Use default model if not provided
    EMBED_MODEL = "thenlper/gte-large"

MODEL_INSTANCE = HuggingFaceEmbedding(EMBED_MODEL)

# Initialize the cross-chunk attention mechanism
CROSS_CHUNK_ATTENTION_INSTANCE = ShiftedCrossChunkAttention(EMBED_DIM if EMBED_DIM else 1024) # Set default embed_dim to 1024

def generate_json_entry_scca(text, type, url, title, date_scraped, doc_id):
    """
    This function takes the metadata returned by the `file_downloader`, chunks and embeds
    the files and returns a json entry for input into postgres database. 

    Output: 
        - doc_id: hashed chunk content 
        - chunk_id: id of the file chunk 
        - type: type of the file 
        - url: url of the file 
        - title: title of the file 
        - content: chunked content of the file 
        - date_scraped: datetime of when the data is scraped 
        - embedding: embedded chunk 
    """

    # Chunking the document
    splitter = SentenceSplitter(
        chunk_size=EMBED_CHUNK_SIZE if EMBED_CHUNK_SIZE else 512,
        chunk_overlap=0
    )

    sentence_chunks = splitter.split_text(text)
    chunk_embeddings = []

    # Compute initial embeddings for each chunk
    for chunk_text in sentence_chunks:
        embedding = compute_text_embedding_sync(chunk_text, model_instance=MODEL_INSTANCE)
        chunk_embeddings.append(embedding)

    # Convert to tensor and reshape for the attention mechanism
    chunk_embeddings = torch.tensor(chunk_embeddings)
    num_chunks, embed_dim = chunk_embeddings.size()
    chunk_embeddings = chunk_embeddings.view(num_chunks, 1, embed_dim)  # Reshape to (num_chunks, chunk_size=1, embed_dim)

    # Apply cross-chunk attention
    attended_embeddings = CROSS_CHUNK_ATTENTION_INSTANCE(chunk_embeddings)
    attended_embeddings = attended_embeddings.view(num_chunks, embed_dim).tolist()  # Reshape back to (num_chunks, embed_dim)

    # Generate output list with attended embeddings
    output_list = []
    for chunk_id, attended_embedding in enumerate(attended_embeddings):
        id = f"{doc_id}_{chunk_id}"
        output_list.append([
            id, 
            doc_id,
            chunk_id,
            type,
            url,
            title,
            sentence_chunks[chunk_id],
            date_scraped,
            attended_embedding
        ])

    return output_list

def generate_json_entry_without_scca(text, type, url, title, date_scraped, doc_id):
    """
    This function takes the metadata returned by the `file_downloader`, chunks and embeds
    the files and returns a json entry for input into postgres database. 

    Output: 
        - doc_id: hashed chunk content 
        - chunk_id: id of the file chunk 
        - type: type of the file 
        - url: url of the file 
        - title: title of the file 
        - content: chunked content of the file 
        - date_scraped: datetime of when the data is scraped 
        - embedding: embedded chunk 
    """

    # Chunking and embedding chunks
    splitter = SentenceSplitter(
        chunk_size=EMBED_CHUNK_SIZE if EMBED_CHUNK_SIZE else 512,
        chunk_overlap=EMBED_OVERLAP_SIZE if EMBED_OVERLAP_SIZE else 128
    )

    sentence_chunks = splitter.split_text(text)
    output_list = []
    for chunk_id, chunk_text in enumerate(sentence_chunks):
        id = f"{doc_id}_{chunk_id}"
        embedding = compute_text_embedding_sync(chunk_text, model_instance=MODEL_INSTANCE)
        output_list.append([
            id,
            doc_id,
            chunk_id,
            type,
            url,
            title,
            chunk_text,
            date_scraped,
            embedding
        ])

    return output_list

### Testing 

In [31]:
json_entry = generate_json_entry_scca(text='Those who believe in a more open, liberal approach to immigration often frame their argument as being on the side of “good economics” versus “bad politics”. Alan Manning explains why the arguments presented as “good economics” are often unconvincing; those on this side of the argument really need to up their game. Post-pandemic net migration has risen in many countries. Although there are good reasons to think much of this is temporary, many countries, including the UK, have responded by making their immigration policies more restrictive. Those who believe in a more open, liberal approach to immigration argue that this is “good economics” but closer scrutiny shows these arguments are often not as strong as they think or could be. To illustrate this I consider three articles from respected sources. I apologise to the authors for singling them out; I could have chosen many others who put forward similar arguments. I chose them because I think they are representative, not because they are especially bad. They are an Economist editorial (“How to detoxify the politics of migration”), a Financial Times op-ed “Immigration crackdowns are good politics but bad economics” and a Guardian article “Why Home Office visa plans will be ‘nail in the coffin’ for UK hospitality ”. The Economist editorial points out that “the share of the world’s people who live outside their country of birth is just 3.6 per cent; it has barely changed since 1960, when it was 3.1 per cent”, implying immigration anxiety is a fuss about nothing. This is highly misleading; it is in the high-income countries where concerns about immigration are focused and there the share of migrants has doubled in the last 30 years and continues to rise. Can this really be described as “barely changed”? In most of these countries, the share of migrants is at historical highs. Migrants are over-represented as founders of unicorn businesses but, like the Nobel Laureates, this is a tiny number of people. The Economist goes on to outline some economic benefits from immigration: immigrants are four times likelier to win a Nobel science prize than the native-born. True, but this is 45 people since 2000 so the chance of an immigrant winning a Nobel is so small that it can hardly be used to argue for a generally open immigration policy (though perhaps more open for some small selective groups of high-flying scientists). The Economist goes on: “Immigrants in America are nearly twice as likely to start a company as the native-born”. They could have given other less striking sources, for example a 2011 OECD report also found that “migrants are more likely to start a new business in most OECD countries” but “the survival rate of those businesses is lower than that for new businesses started by native-born entrepreneurs” so that “on average, across OECD countries, the percentage of migrant entrepreneurs differs only slightly from that of natives“. In addition “On average, a foreign-born self-employed who owns a small or medium firm creates slightly fewer [additional jobs] than their native-born counterparts” suggesting migrant businesses seem less successful. Migrants are over-represented as founders of unicorn businesses but, like the Nobel Laureates, this is a tiny number of people. The Economist claim comes, I think, from this study but is an inaccurate summary of a misleading paper. Misleading because many companies have both local and migrant founders and the study’s headline figure is based on counting these as migrant firms. And then it compares the share of migrant firms with the share of migrants in the total population. Migrants are more likely to found firms in large part because they are more likely to be working-age; few companies are founded by children and pensioners. Adjust for this and you arrive at a conclusion similar to the OECD. Migrants age at the same rate as everyone else. They may be young when they arrive but don’t stay that way. You might think it is me who is missing the point, that we need working-age immigrants to deal with the challenge of an ageing population. The Financial Times op-ed argues “the US would need to let in nearly 4 million migrants a year, every year, to prevent its population growth turning negative in the coming decades”. I think the intention is to argue that high immigration is necessary to address ageing but really it makes the point that even immigration at very high levels (implying one per cent population growth per year permanently and ultimately a population over 50 per cent migrant) can only delay population decline by a few years. As a 2019 EU report put it, “Higher fertility or more immigration are not enough to cope with the challenges of population ageing”. The reason is simple; migrants age at the same rate as everyone else. They may be young when they arrive but don’t stay that way. All serious demographic work of which I am aware comes to a similar conclusion yet proponents of immigration as a solution to ageing rarely cite this work or give any impression they have ever read it. Then there is the argument that we need migrants to deal with problems of labour shortages. I have written about why this is misleading elsewhere. Most of the shortages we hear about are not the result of too few people in a country able to do the job (a skills shortage); they are the result of too few wanting the job because of poor pay and conditions. These sectors want a ring-fenced supply of migrants so they do not have to offer competitive salaries. The Financial Times op-ed argues immigration “reduced upward pressure on wages and inflation” while the Guardian article recounts an employer who “when he advertised for a head chef in Birmingham, on a very competitive rate of £37,000, he woke up the next morning to find rival restaurants outbidding him for the same staff at £40,000”. I understand why business owners dislike this bidding war for workers but perhaps the chef might think it a good thing. Both of these articles seem to imply that wage growth would have been higher if immigration had been lower thus conceding a common criticism of immigration (that it reduces wages) while seeming to imagine they are making a case for more liberal immigration. There is an unfortunate tendency for discussions of migration policy to lionise or demonise migrants when they are only human. Please do not think those on the other side of the immigration debate are better; they are equally, probably more, guilty of using cherry-picked studies, misleading statistics and arguments that do not withstand much scrutiny. There is an unfortunate tendency for discussions of migration policy to lionise or demonise migrants when they are only human. If, as I do, you think we can have a more liberal, open (though with limits), humane immigration policy, please be a bit less critical of the other side and more critical of yourselves. Otherwise don’t be surprised if your arguments fail to prevail',
    type="website",
    url="https://blogs.lse.ac.uk/politicsandpolicy/the-discussion-of-immigration-needs-to-improve/",
    title="The discussion on immigration needs to improve",
    date_scraped=datetime.now(),
    doc_id="1")

json_entry_2 = generate_json_entry_without_scca(text='Those who believe in a more open, liberal approach to immigration often frame their argument as being on the side of “good economics” versus “bad politics”. Alan Manning explains why the arguments presented as “good economics” are often unconvincing; those on this side of the argument really need to up their game. Post-pandemic net migration has risen in many countries. Although there are good reasons to think much of this is temporary, many countries, including the UK, have responded by making their immigration policies more restrictive. Those who believe in a more open, liberal approach to immigration argue that this is “good economics” but closer scrutiny shows these arguments are often not as strong as they think or could be. To illustrate this I consider three articles from respected sources. I apologise to the authors for singling them out; I could have chosen many others who put forward similar arguments. I chose them because I think they are representative, not because they are especially bad. They are an Economist editorial (“How to detoxify the politics of migration”), a Financial Times op-ed “Immigration crackdowns are good politics but bad economics” and a Guardian article “Why Home Office visa plans will be ‘nail in the coffin’ for UK hospitality ”. The Economist editorial points out that “the share of the world’s people who live outside their country of birth is just 3.6 per cent; it has barely changed since 1960, when it was 3.1 per cent”, implying immigration anxiety is a fuss about nothing. This is highly misleading; it is in the high-income countries where concerns about immigration are focused and there the share of migrants has doubled in the last 30 years and continues to rise. Can this really be described as “barely changed”? In most of these countries, the share of migrants is at historical highs. Migrants are over-represented as founders of unicorn businesses but, like the Nobel Laureates, this is a tiny number of people. The Economist goes on to outline some economic benefits from immigration: immigrants are four times likelier to win a Nobel science prize than the native-born. True, but this is 45 people since 2000 so the chance of an immigrant winning a Nobel is so small that it can hardly be used to argue for a generally open immigration policy (though perhaps more open for some small selective groups of high-flying scientists). The Economist goes on: “Immigrants in America are nearly twice as likely to start a company as the native-born”. They could have given other less striking sources, for example a 2011 OECD report also found that “migrants are more likely to start a new business in most OECD countries” but “the survival rate of those businesses is lower than that for new businesses started by native-born entrepreneurs” so that “on average, across OECD countries, the percentage of migrant entrepreneurs differs only slightly from that of natives“. In addition “On average, a foreign-born self-employed who owns a small or medium firm creates slightly fewer [additional jobs] than their native-born counterparts” suggesting migrant businesses seem less successful. Migrants are over-represented as founders of unicorn businesses but, like the Nobel Laureates, this is a tiny number of people. The Economist claim comes, I think, from this study but is an inaccurate summary of a misleading paper. Misleading because many companies have both local and migrant founders and the study’s headline figure is based on counting these as migrant firms. And then it compares the share of migrant firms with the share of migrants in the total population. Migrants are more likely to found firms in large part because they are more likely to be working-age; few companies are founded by children and pensioners. Adjust for this and you arrive at a conclusion similar to the OECD. Migrants age at the same rate as everyone else. They may be young when they arrive but don’t stay that way. You might think it is me who is missing the point, that we need working-age immigrants to deal with the challenge of an ageing population. The Financial Times op-ed argues “the US would need to let in nearly 4 million migrants a year, every year, to prevent its population growth turning negative in the coming decades”. I think the intention is to argue that high immigration is necessary to address ageing but really it makes the point that even immigration at very high levels (implying one per cent population growth per year permanently and ultimately a population over 50 per cent migrant) can only delay population decline by a few years. As a 2019 EU report put it, “Higher fertility or more immigration are not enough to cope with the challenges of population ageing”. The reason is simple; migrants age at the same rate as everyone else. They may be young when they arrive but don’t stay that way. All serious demographic work of which I am aware comes to a similar conclusion yet proponents of immigration as a solution to ageing rarely cite this work or give any impression they have ever read it. Then there is the argument that we need migrants to deal with problems of labour shortages. I have written about why this is misleading elsewhere. Most of the shortages we hear about are not the result of too few people in a country able to do the job (a skills shortage); they are the result of too few wanting the job because of poor pay and conditions. These sectors want a ring-fenced supply of migrants so they do not have to offer competitive salaries. The Financial Times op-ed argues immigration “reduced upward pressure on wages and inflation” while the Guardian article recounts an employer who “when he advertised for a head chef in Birmingham, on a very competitive rate of £37,000, he woke up the next morning to find rival restaurants outbidding him for the same staff at £40,000”. I understand why business owners dislike this bidding war for workers but perhaps the chef might think it a good thing. Both of these articles seem to imply that wage growth would have been higher if immigration had been lower thus conceding a common criticism of immigration (that it reduces wages) while seeming to imagine they are making a case for more liberal immigration. There is an unfortunate tendency for discussions of migration policy to lionise or demonise migrants when they are only human. Please do not think those on the other side of the immigration debate are better; they are equally, probably more, guilty of using cherry-picked studies, misleading statistics and arguments that do not withstand much scrutiny. There is an unfortunate tendency for discussions of migration policy to lionise or demonise migrants when they are only human. If, as I do, you think we can have a more liberal, open (though with limits), humane immigration policy, please be a bit less critical of the other side and more critical of yourselves. Otherwise don’t be surprised if your arguments fail to prevail',
    type="website",
    url="https://blogs.lse.ac.uk/politicsandpolicy/the-discussion-of-immigration-needs-to-improve/",
    title="The discussion on immigration needs to improve",
    date_scraped=datetime.now(),
    doc_id="1")

In [32]:
from pprint import pprint
pprint(f"Chunk 1 embedding with attention: {json_entry[0][-1]}")
pprint(f"Chunk 2 embedding with attention: {json_entry[1][-1]}")
pprint(f"Chunk 3 embedding with attention: {json_entry[2][-1]}")

('Chunk 1 embedding with attention: [0.0016721525462344289, '
 '-0.015388729982078075, 0.011133555322885513, -0.00095429114298895, '
 '-0.0024166384246200323, 0.02761598490178585, -0.01279676053673029, '
 '-0.014689227566123009, -0.017902497202157974, -0.0010053578298538923, '
 '0.010331042110919952, -0.024289697408676147, 0.004641219973564148, '
 '0.0027018000837415457, 0.005588550120592117, 0.00980026088654995, '
 '0.005101038608700037, -0.0012931920355185866, -0.026455536484718323, '
 '0.0004549261066131294, -0.027863463386893272, 0.009428746998310089, '
 '0.007896462455391884, -0.0038433170411735773, -0.010457388125360012, '
 '0.0174561720341444, -0.013882909901440144, -0.013637252151966095, '
 '0.009693284519016743, 0.0026804357767105103, -0.0045809317380189896, '
 '0.023669565096497536, -0.002020570682361722, -0.015370476990938187, '
 '0.01603194698691368, -0.005510391667485237, 0.021488724276423454, '
 '-0.004789944272488356, -0.0001987361756619066, 0.005584363359957933, '
 '-0.

In [33]:
pprint(f"Chunk 1 embedding without attention: {json_entry_2[0][-1]}")
pprint(f"Chunk 2 embedding without attention: {json_entry_2[1][-1]}")
pprint(f"Chunk 3 embedding without attention: {json_entry_2[2][-1]}")

('Chunk 1 embedding without attention: [0.002360329730436206, '
 '0.023222019895911217, -0.002290709177032113, -0.0242925975471735, '
 '-0.011311972513794899, -0.0198212880641222, 0.0009374780347570777, '
 '0.03984905779361725, -0.0014118346152827144, 0.03539763018488884, '
 '0.041316140443086624, 0.012940448708832264, -0.006051282864063978, '
 '-0.01541432086378336, -0.025221440941095352, -0.010274909436702728, '
 '-0.007863504812121391, -0.0240637194365263, -0.012484882958233356, '
 '0.013493623584508896, -0.014475763775408268, 0.009798510931432247, '
 '-0.05072183161973953, -0.015307178720831871, -0.014557438902556896, '
 '0.04302104562520981, 0.05946977436542511, 0.008090896531939507, '
 '0.08089026808738708, 0.07207117974758148, -0.01581844873726368, '
 '-0.020859692245721817, 0.03981094807386398, -0.04355306550860405, '
 '-0.03008013591170311, -0.03229402005672455, 0.0235392265021801, '
 '-0.04830694943666458, -0.009857905097305775, -0.037128619849681854, '
 '-0.01473384629935026

## 1. Testing if reshaping is working fine

In [36]:
import torch
import torch.nn as nn

class ShiftedCrossChunkAttention(nn.Module):
    def __init__(self, embed_dim, num_heads=8, shift_size=1):
        super(ShiftedCrossChunkAttention, self).__init__()
        self.attention = nn.MultiheadAttention(embed_dim=embed_dim, num_heads=num_heads)
        self.shift_size = shift_size

    def shift_key_value(self, embeddings, shift_size):
        # embeddings: Tensor of shape (chunk_size, num_chunks, embed_dim)
        print("Original embeddings shape:", embeddings.shape)
        shifted_embeddings = torch.roll(embeddings, shifts=shift_size, dims=1)
        print("Shifted embeddings shape:", shifted_embeddings.shape)
        return shifted_embeddings

    def forward(self, chunk_embeddings):
        # chunk_embeddings: Tensor of shape (num_chunks, chunk_size, embed_dim)
        print("Original chunk_embeddings shape:", chunk_embeddings.shape)
        chunk_embeddings = chunk_embeddings.permute(1, 0, 2)  # (chunk_size, num_chunks, embed_dim)
        print("Permuted chunk_embeddings shape:", chunk_embeddings.shape)
        
        # Shift keys and values
        shifted_embeddings = self.shift_key_value(chunk_embeddings, self.shift_size)
        
        # Apply attention mechanism across shifted chunks
        attn_output, _ = self.attention(chunk_embeddings, shifted_embeddings, shifted_embeddings)
        print("Attention output shape:", attn_output.shape)
        
        # Reshape back to original format
        attn_output = attn_output.permute(1, 0, 2)  # (num_chunks, chunk_size, embed_dim)
        print("Reshaped attention output shape:", attn_output.shape)
        
        return attn_output

def generate_json_entry_test_reshaping(text, type, url, title, date_scraped, doc_id, cross_chunk_attention_instance):
    splitter = SentenceSplitter(
        chunk_size=EMBED_CHUNK_SIZE if EMBED_CHUNK_SIZE else 512,
        chunk_overlap=0
    )

    sentence_chunks = splitter.split_text(text)
    chunk_embeddings = []

    # Compute initial embeddings for each chunk
    for chunk_text in sentence_chunks:
        embedding = compute_text_embedding_sync(chunk_text, model_instance=MODEL_INSTANCE)
        chunk_embeddings.append(embedding)

    # Convert to tensor and reshape for the attention mechanism
    chunk_embeddings = torch.tensor(chunk_embeddings)
    num_chunks, embed_dim = chunk_embeddings.size()
    chunk_embeddings = chunk_embeddings.view(num_chunks, 1, embed_dim)  # Reshape to (num_chunks, chunk_size=1, embed_dim)
    print("Initial chunk_embeddings shape:", chunk_embeddings.shape)

    # Apply cross-chunk attention
    attended_embeddings = cross_chunk_attention_instance(chunk_embeddings)
    print("Attended embeddings shape:", attended_embeddings.shape)
    attended_embeddings = attended_embeddings.view(num_chunks, embed_dim).tolist()  # Reshape back to (num_chunks, embed_dim)

    # Generate output list with attended embeddings
    output_list = []
    for chunk_id, attended_embedding in enumerate(attended_embeddings):
        id = f"{doc_id}_{chunk_id}"
        output_list.append([
            id, 
            doc_id,
            chunk_id,
            type,
            url,
            title,
            sentence_chunks[chunk_id],
            date_scraped,
            attended_embedding
        ])

    return output_list

CROSS_CHUNK_ATTENTION_INSTANCE = ShiftedCrossChunkAttention(EMBED_DIM if EMBED_DIM else 1024) # Set default embed_dim to 1024


json_entry_test_shift = generate_json_entry_test_reshaping(text='Those who believe in a more open, liberal approach to immigration often frame their argument as being on the side of “good economics” versus “bad politics”. Alan Manning explains why the arguments presented as “good economics” are often unconvincing; those on this side of the argument really need to up their game. Post-pandemic net migration has risen in many countries. Although there are good reasons to think much of this is temporary, many countries, including the UK, have responded by making their immigration policies more restrictive. Those who believe in a more open, liberal approach to immigration argue that this is “good economics” but closer scrutiny shows these arguments are often not as strong as they think or could be. To illustrate this I consider three articles from respected sources. I apologise to the authors for singling them out; I could have chosen many others who put forward similar arguments. I chose them because I think they are representative, not because they are especially bad. They are an Economist editorial (“How to detoxify the politics of migration”), a Financial Times op-ed “Immigration crackdowns are good politics but bad economics” and a Guardian article “Why Home Office visa plans will be ‘nail in the coffin’ for UK hospitality ”. The Economist editorial points out that “the share of the world’s people who live outside their country of birth is just 3.6 per cent; it has barely changed since 1960, when it was 3.1 per cent”, implying immigration anxiety is a fuss about nothing. This is highly misleading; it is in the high-income countries where concerns about immigration are focused and there the share of migrants has doubled in the last 30 years and continues to rise. Can this really be described as “barely changed”? In most of these countries, the share of migrants is at historical highs. Migrants are over-represented as founders of unicorn businesses but, like the Nobel Laureates, this is a tiny number of people. The Economist goes on to outline some economic benefits from immigration: immigrants are four times likelier to win a Nobel science prize than the native-born. True, but this is 45 people since 2000 so the chance of an immigrant winning a Nobel is so small that it can hardly be used to argue for a generally open immigration policy (though perhaps more open for some small selective groups of high-flying scientists). The Economist goes on: “Immigrants in America are nearly twice as likely to start a company as the native-born”. They could have given other less striking sources, for example a 2011 OECD report also found that “migrants are more likely to start a new business in most OECD countries” but “the survival rate of those businesses is lower than that for new businesses started by native-born entrepreneurs” so that “on average, across OECD countries, the percentage of migrant entrepreneurs differs only slightly from that of natives“. In addition “On average, a foreign-born self-employed who owns a small or medium firm creates slightly fewer [additional jobs] than their native-born counterparts” suggesting migrant businesses seem less successful. Migrants are over-represented as founders of unicorn businesses but, like the Nobel Laureates, this is a tiny number of people. The Economist claim comes, I think, from this study but is an inaccurate summary of a misleading paper. Misleading because many companies have both local and migrant founders and the study’s headline figure is based on counting these as migrant firms. And then it compares the share of migrant firms with the share of migrants in the total population. Migrants are more likely to found firms in large part because they are more likely to be working-age; few companies are founded by children and pensioners. Adjust for this and you arrive at a conclusion similar to the OECD. Migrants age at the same rate as everyone else. They may be young when they arrive but don’t stay that way. You might think it is me who is missing the point, that we need working-age immigrants to deal with the challenge of an ageing population. The Financial Times op-ed argues “the US would need to let in nearly 4 million migrants a year, every year, to prevent its population growth turning negative in the coming decades”. I think the intention is to argue that high immigration is necessary to address ageing but really it makes the point that even immigration at very high levels (implying one per cent population growth per year permanently and ultimately a population over 50 per cent migrant) can only delay population decline by a few years. As a 2019 EU report put it, “Higher fertility or more immigration are not enough to cope with the challenges of population ageing”. The reason is simple; migrants age at the same rate as everyone else. They may be young when they arrive but don’t stay that way. All serious demographic work of which I am aware comes to a similar conclusion yet proponents of immigration as a solution to ageing rarely cite this work or give any impression they have ever read it. Then there is the argument that we need migrants to deal with problems of labour shortages. I have written about why this is misleading elsewhere. Most of the shortages we hear about are not the result of too few people in a country able to do the job (a skills shortage); they are the result of too few wanting the job because of poor pay and conditions. These sectors want a ring-fenced supply of migrants so they do not have to offer competitive salaries. The Financial Times op-ed argues immigration “reduced upward pressure on wages and inflation” while the Guardian article recounts an employer who “when he advertised for a head chef in Birmingham, on a very competitive rate of £37,000, he woke up the next morning to find rival restaurants outbidding him for the same staff at £40,000”. I understand why business owners dislike this bidding war for workers but perhaps the chef might think it a good thing. Both of these articles seem to imply that wage growth would have been higher if immigration had been lower thus conceding a common criticism of immigration (that it reduces wages) while seeming to imagine they are making a case for more liberal immigration. There is an unfortunate tendency for discussions of migration policy to lionise or demonise migrants when they are only human. Please do not think those on the other side of the immigration debate are better; they are equally, probably more, guilty of using cherry-picked studies, misleading statistics and arguments that do not withstand much scrutiny. There is an unfortunate tendency for discussions of migration policy to lionise or demonise migrants when they are only human. If, as I do, you think we can have a more liberal, open (though with limits), humane immigration policy, please be a bit less critical of the other side and more critical of yourselves. Otherwise don’t be surprised if your arguments fail to prevail',
    type="website",
    url="https://blogs.lse.ac.uk/politicsandpolicy/the-discussion-of-immigration-needs-to-improve/",
    title="The discussion on immigration needs to improve",
    date_scraped=datetime.now(),
    doc_id="1",
    cross_chunk_attention_instance= CROSS_CHUNK_ATTENTION_INSTANCE)


Initial chunk_embeddings shape: torch.Size([3, 1, 1024])
Original chunk_embeddings shape: torch.Size([3, 1, 1024])
Permuted chunk_embeddings shape: torch.Size([1, 3, 1024])
Original embeddings shape: torch.Size([1, 3, 1024])
Shifted embeddings shape: torch.Size([1, 3, 1024])
Attention output shape: torch.Size([1, 3, 1024])
Reshaped attention output shape: torch.Size([3, 1, 1024])
Attended embeddings shape: torch.Size([3, 1, 1024])


### Reshaping seems to be done fine... onto the next one!

## 2. Testing if shifting is working fine (torch.roll functionality)

In [38]:
import torch
import torch.nn as nn

class ShiftedCrossChunkAttention(nn.Module):
    def __init__(self, embed_dim, num_heads=8, shift_size=1):
        super(ShiftedCrossChunkAttention, self).__init__()
        self.attention = nn.MultiheadAttention(embed_dim=embed_dim, num_heads=num_heads)
        self.shift_size = shift_size

    def shift_key_value(self, embeddings, shift_size):
        # embeddings: Tensor of shape (chunk_size, num_chunks, embed_dim)
        print("Original embeddings:\n", embeddings)
        shifted_embeddings = torch.roll(embeddings, shifts=shift_size, dims=1)
        print("Shifted embeddings:\n", shifted_embeddings)
        return shifted_embeddings

    def forward(self, chunk_embeddings):
        # chunk_embeddings: Tensor of shape (num_chunks, chunk_size, embed_dim)
        print("Original chunk_embeddings:\n", chunk_embeddings)
        chunk_embeddings = chunk_embeddings.permute(1, 0, 2)  # (chunk_size, num_chunks, embed_dim)
        print("Permuted chunk_embeddings:\n", chunk_embeddings)
        
        # Shift keys and values
        shifted_embeddings = self.shift_key_value(chunk_embeddings, self.shift_size)
        
        # Apply attention mechanism across shifted chunks
        attn_output, _ = self.attention(chunk_embeddings, shifted_embeddings, shifted_embeddings)
        print("Attention output:\n", attn_output)
        
        # Reshape back to original format
        attn_output = attn_output.permute(1, 0, 2)  # (num_chunks, chunk_size, embed_dim)
        print("Reshaped attention output:\n", attn_output)
        
        return attn_output

def generate_json_entry_test_shifting(text, type, url, title, date_scraped, doc_id, cross_chunk_attention_instance):
    splitter = SentenceSplitter(
        chunk_size=EMBED_CHUNK_SIZE if EMBED_CHUNK_SIZE else 512,
        chunk_overlap=0
    )

    sentence_chunks = splitter.split_text(text)
    chunk_embeddings = []

    # Compute initial embeddings for each chunk
    for chunk_text in sentence_chunks:
        embedding = compute_text_embedding_sync(chunk_text, model_instance=MODEL_INSTANCE)
        chunk_embeddings.append(embedding)

    # Convert to tensor and reshape for the attention mechanism
    chunk_embeddings = torch.tensor(chunk_embeddings)
    num_chunks, embed_dim = chunk_embeddings.size()
    chunk_embeddings = chunk_embeddings.view(num_chunks, 1, embed_dim)  # Reshape to (num_chunks, chunk_size=1, embed_dim)
    print("Initial chunk_embeddings:\n", chunk_embeddings)

    # Apply cross-chunk attention
    attended_embeddings = cross_chunk_attention_instance(chunk_embeddings)
    print("Attended embeddings:\n", attended_embeddings)
    attended_embeddings = attended_embeddings.view(num_chunks, embed_dim).tolist()  # Reshape back to (num_chunks, embed_dim)

    # Generate output list with attended embeddings
    output_list = []
    for chunk_id, attended_embedding in enumerate(attended_embeddings):
        id = f"{doc_id}_{chunk_id}"
        output_list.append([
            id, 
            doc_id,
            chunk_id,
            type,
            url,
            title,
            sentence_chunks[chunk_id],
            date_scraped,
            attended_embedding
        ])

    return output_list

CROSS_CHUNK_ATTENTION_INSTANCE = ShiftedCrossChunkAttention(EMBED_DIM if EMBED_DIM else 1024) # Set default embed_dim to 1024


json_entry_test_shift = generate_json_entry_test_shifting(text='Those who believe in a more open, liberal approach to immigration often frame their argument as being on the side of “good economics” versus “bad politics”. Alan Manning explains why the arguments presented as “good economics” are often unconvincing; those on this side of the argument really need to up their game. Post-pandemic net migration has risen in many countries. Although there are good reasons to think much of this is temporary, many countries, including the UK, have responded by making their immigration policies more restrictive. Those who believe in a more open, liberal approach to immigration argue that this is “good economics” but closer scrutiny shows these arguments are often not as strong as they think or could be. To illustrate this I consider three articles from respected sources. I apologise to the authors for singling them out; I could have chosen many others who put forward similar arguments. I chose them because I think they are representative, not because they are especially bad. They are an Economist editorial (“How to detoxify the politics of migration”), a Financial Times op-ed “Immigration crackdowns are good politics but bad economics” and a Guardian article “Why Home Office visa plans will be ‘nail in the coffin’ for UK hospitality ”. The Economist editorial points out that “the share of the world’s people who live outside their country of birth is just 3.6 per cent; it has barely changed since 1960, when it was 3.1 per cent”, implying immigration anxiety is a fuss about nothing. This is highly misleading; it is in the high-income countries where concerns about immigration are focused and there the share of migrants has doubled in the last 30 years and continues to rise. Can this really be described as “barely changed”? In most of these countries, the share of migrants is at historical highs. Migrants are over-represented as founders of unicorn businesses but, like the Nobel Laureates, this is a tiny number of people. The Economist goes on to outline some economic benefits from immigration: immigrants are four times likelier to win a Nobel science prize than the native-born. True, but this is 45 people since 2000 so the chance of an immigrant winning a Nobel is so small that it can hardly be used to argue for a generally open immigration policy (though perhaps more open for some small selective groups of high-flying scientists). The Economist goes on: “Immigrants in America are nearly twice as likely to start a company as the native-born”. They could have given other less striking sources, for example a 2011 OECD report also found that “migrants are more likely to start a new business in most OECD countries” but “the survival rate of those businesses is lower than that for new businesses started by native-born entrepreneurs” so that “on average, across OECD countries, the percentage of migrant entrepreneurs differs only slightly from that of natives“. In addition “On average, a foreign-born self-employed who owns a small or medium firm creates slightly fewer [additional jobs] than their native-born counterparts” suggesting migrant businesses seem less successful. Migrants are over-represented as founders of unicorn businesses but, like the Nobel Laureates, this is a tiny number of people. The Economist claim comes, I think, from this study but is an inaccurate summary of a misleading paper. Misleading because many companies have both local and migrant founders and the study’s headline figure is based on counting these as migrant firms. And then it compares the share of migrant firms with the share of migrants in the total population. Migrants are more likely to found firms in large part because they are more likely to be working-age; few companies are founded by children and pensioners. Adjust for this and you arrive at a conclusion similar to the OECD. Migrants age at the same rate as everyone else. They may be young when they arrive but don’t stay that way. You might think it is me who is missing the point, that we need working-age immigrants to deal with the challenge of an ageing population. The Financial Times op-ed argues “the US would need to let in nearly 4 million migrants a year, every year, to prevent its population growth turning negative in the coming decades”. I think the intention is to argue that high immigration is necessary to address ageing but really it makes the point that even immigration at very high levels (implying one per cent population growth per year permanently and ultimately a population over 50 per cent migrant) can only delay population decline by a few years. As a 2019 EU report put it, “Higher fertility or more immigration are not enough to cope with the challenges of population ageing”. The reason is simple; migrants age at the same rate as everyone else. They may be young when they arrive but don’t stay that way. All serious demographic work of which I am aware comes to a similar conclusion yet proponents of immigration as a solution to ageing rarely cite this work or give any impression they have ever read it. Then there is the argument that we need migrants to deal with problems of labour shortages. I have written about why this is misleading elsewhere. Most of the shortages we hear about are not the result of too few people in a country able to do the job (a skills shortage); they are the result of too few wanting the job because of poor pay and conditions. These sectors want a ring-fenced supply of migrants so they do not have to offer competitive salaries. The Financial Times op-ed argues immigration “reduced upward pressure on wages and inflation” while the Guardian article recounts an employer who “when he advertised for a head chef in Birmingham, on a very competitive rate of £37,000, he woke up the next morning to find rival restaurants outbidding him for the same staff at £40,000”. I understand why business owners dislike this bidding war for workers but perhaps the chef might think it a good thing. Both of these articles seem to imply that wage growth would have been higher if immigration had been lower thus conceding a common criticism of immigration (that it reduces wages) while seeming to imagine they are making a case for more liberal immigration. There is an unfortunate tendency for discussions of migration policy to lionise or demonise migrants when they are only human. Please do not think those on the other side of the immigration debate are better; they are equally, probably more, guilty of using cherry-picked studies, misleading statistics and arguments that do not withstand much scrutiny. There is an unfortunate tendency for discussions of migration policy to lionise or demonise migrants when they are only human. If, as I do, you think we can have a more liberal, open (though with limits), humane immigration policy, please be a bit less critical of the other side and more critical of yourselves. Otherwise don’t be surprised if your arguments fail to prevail',
    type="website",
    url="https://blogs.lse.ac.uk/politicsandpolicy/the-discussion-of-immigration-needs-to-improve/",
    title="The discussion on immigration needs to improve",
    date_scraped=datetime.now(),
    doc_id="1",
    cross_chunk_attention_instance= CROSS_CHUNK_ATTENTION_INSTANCE)

Initial chunk_embeddings:
 tensor([[[ 0.0024,  0.0232, -0.0023,  ..., -0.0003, -0.0147, -0.0015]],

        [[ 0.0064,  0.0079, -0.0259,  ..., -0.0217, -0.0080,  0.0019]],

        [[ 0.0123,  0.0028, -0.0301,  ..., -0.0177, -0.0106,  0.0009]]])
Original chunk_embeddings:
 tensor([[[ 0.0024,  0.0232, -0.0023,  ..., -0.0003, -0.0147, -0.0015]],

        [[ 0.0064,  0.0079, -0.0259,  ..., -0.0217, -0.0080,  0.0019]],

        [[ 0.0123,  0.0028, -0.0301,  ..., -0.0177, -0.0106,  0.0009]]])
Permuted chunk_embeddings:
 tensor([[[ 0.0024,  0.0232, -0.0023,  ..., -0.0003, -0.0147, -0.0015],
         [ 0.0064,  0.0079, -0.0259,  ..., -0.0217, -0.0080,  0.0019],
         [ 0.0123,  0.0028, -0.0301,  ..., -0.0177, -0.0106,  0.0009]]])
Original embeddings:
 tensor([[[ 0.0024,  0.0232, -0.0023,  ..., -0.0003, -0.0147, -0.0015],
         [ 0.0064,  0.0079, -0.0259,  ..., -0.0217, -0.0080,  0.0019],
         [ 0.0123,  0.0028, -0.0301,  ..., -0.0177, -0.0106,  0.0009]]])
Shifted embeddings:
 tensor

### Seems to work fine... shifted as expected

## 3. Checking for misalignment of calculations

In [41]:
import torch
import torch.nn as nn

class ShiftedCrossChunkAttention(nn.Module):
    def __init__(self, embed_dim, num_heads=8, shift_size=1):
        super(ShiftedCrossChunkAttention, self).__init__()
        self.attention = nn.MultiheadAttention(embed_dim=embed_dim, num_heads=num_heads)
        self.shift_size = shift_size

    def shift_key_value(self, embeddings, shift_size):
        # embeddings: Tensor of shape (chunk_size, num_chunks, embed_dim)
        print("Original embeddings shape:", embeddings.shape)
        print("Original embeddings:\n", embeddings)
        shifted_embeddings = torch.roll(embeddings, shifts=shift_size, dims=1)
        print("Shifted embeddings shape:", shifted_embeddings.shape)
        print("Shifted embeddings:\n", shifted_embeddings)

        # Verify alignment
        self.verify_alignment(embeddings, shifted_embeddings, shift_size)
        
        return shifted_embeddings

    def verify_alignment(self, original, shifted, shift_size):
        num_chunks = original.size(1)
        for i in range(num_chunks):
            original_index = i
            shifted_index = (i + shift_size) % num_chunks
            print(f"Original chunk index: {original_index}, Shifted chunk index: {shifted_index}")
            print(f"Original embedding:\n{original[:, original_index, :]}")
            print(f"Shifted embedding:\n{shifted[:, shifted_index, :]}")
            assert torch.equal(original[:, original_index, :], shifted[:, shifted_index, :]), "Misalignment detected!"
    
    def forward(self, chunk_embeddings):
        # chunk_embeddings: Tensor of shape (num_chunks, chunk_size, embed_dim)
        print("Original chunk_embeddings shape:", chunk_embeddings.shape)
        print("Original chunk_embeddings:\n", chunk_embeddings)
        chunk_embeddings = chunk_embeddings.permute(1, 0, 2)  # (chunk_size, num_chunks, embed_dim)
        print("Permuted chunk_embeddings shape:", chunk_embeddings.shape)
        print("Permuted chunk_embeddings:\n", chunk_embeddings)
        
        # Shift keys and values
        shifted_embeddings = self.shift_key_value(chunk_embeddings, self.shift_size)
        
        # Apply attention mechanism across shifted chunks
        attn_output, _ = self.attention(chunk_embeddings, shifted_embeddings, shifted_embeddings)
        print("Attention output shape:", attn_output.shape)
        print("Attention output:\n", attn_output)
        
        # Reshape back to original format
        attn_output = attn_output.permute(1, 0, 2)  # (num_chunks, chunk_size, embed_dim)
        print("Reshaped attention output shape:", attn_output.shape)
        print("Reshaped attention output:\n", attn_output)
        
        return attn_output
def generate_json_entry_test_alignment(text, type, url, title, date_scraped, doc_id, cross_chunk_attention_instance):
    splitter = SentenceSplitter(
        chunk_size=EMBED_CHUNK_SIZE if EMBED_CHUNK_SIZE else 512,
        chunk_overlap=0
    )

    sentence_chunks = splitter.split_text(text)
    chunk_embeddings = []

    # Compute initial embeddings for each chunk
    for chunk_text in sentence_chunks:
        embedding = compute_text_embedding_sync(chunk_text, model_instance=MODEL_INSTANCE)
        chunk_embeddings.append(embedding)

    # Convert to tensor and reshape for the attention mechanism
    chunk_embeddings = torch.tensor(chunk_embeddings)
    num_chunks, embed_dim = chunk_embeddings.size()
    chunk_embeddings = chunk_embeddings.view(num_chunks, 1, embed_dim)  # Reshape to (num_chunks, chunk_size=1, embed_dim)
    print("Initial chunk_embeddings shape:", chunk_embeddings.shape)
    print("Initial chunk_embeddings:\n", chunk_embeddings)

    # Apply cross-chunk attention
    attended_embeddings = cross_chunk_attention_instance(chunk_embeddings)
    print("Attended embeddings shape:", attended_embeddings.shape)
    print("Attended embeddings:\n", attended_embeddings)
    attended_embeddings = attended_embeddings.view(num_chunks, embed_dim).tolist()  # Reshape back to (num_chunks, embed_dim)

    # Generate output list with attended embeddings
    output_list = []
    for chunk_id, attended_embedding in enumerate(attended_embeddings):
        id = f"{doc_id}_{chunk_id}"
        output_list.append([
            id, 
            doc_id,
            chunk_id,
            type,
            url,
            title,
            sentence_chunks[chunk_id],
            date_scraped,
            attended_embedding
        ])

    return output_list


json_entry_test_alignment = generate_json_entry_test_alignment(text='Those who believe in a more open, liberal approach to immigration often frame their argument as being on the side of “good economics” versus “bad politics”. Alan Manning explains why the arguments presented as “good economics” are often unconvincing; those on this side of the argument really need to up their game. Post-pandemic net migration has risen in many countries. Although there are good reasons to think much of this is temporary, many countries, including the UK, have responded by making their immigration policies more restrictive. Those who believe in a more open, liberal approach to immigration argue that this is “good economics” but closer scrutiny shows these arguments are often not as strong as they think or could be. To illustrate this I consider three articles from respected sources. I apologise to the authors for singling them out; I could have chosen many others who put forward similar arguments. I chose them because I think they are representative, not because they are especially bad. They are an Economist editorial (“How to detoxify the politics of migration”), a Financial Times op-ed “Immigration crackdowns are good politics but bad economics” and a Guardian article “Why Home Office visa plans will be ‘nail in the coffin’ for UK hospitality ”. The Economist editorial points out that “the share of the world’s people who live outside their country of birth is just 3.6 per cent; it has barely changed since 1960, when it was 3.1 per cent”, implying immigration anxiety is a fuss about nothing. This is highly misleading; it is in the high-income countries where concerns about immigration are focused and there the share of migrants has doubled in the last 30 years and continues to rise. Can this really be described as “barely changed”? In most of these countries, the share of migrants is at historical highs. Migrants are over-represented as founders of unicorn businesses but, like the Nobel Laureates, this is a tiny number of people. The Economist goes on to outline some economic benefits from immigration: immigrants are four times likelier to win a Nobel science prize than the native-born. True, but this is 45 people since 2000 so the chance of an immigrant winning a Nobel is so small that it can hardly be used to argue for a generally open immigration policy (though perhaps more open for some small selective groups of high-flying scientists). The Economist goes on: “Immigrants in America are nearly twice as likely to start a company as the native-born”. They could have given other less striking sources, for example a 2011 OECD report also found that “migrants are more likely to start a new business in most OECD countries” but “the survival rate of those businesses is lower than that for new businesses started by native-born entrepreneurs” so that “on average, across OECD countries, the percentage of migrant entrepreneurs differs only slightly from that of natives“. In addition “On average, a foreign-born self-employed who owns a small or medium firm creates slightly fewer [additional jobs] than their native-born counterparts” suggesting migrant businesses seem less successful. Migrants are over-represented as founders of unicorn businesses but, like the Nobel Laureates, this is a tiny number of people. The Economist claim comes, I think, from this study but is an inaccurate summary of a misleading paper. Misleading because many companies have both local and migrant founders and the study’s headline figure is based on counting these as migrant firms. And then it compares the share of migrant firms with the share of migrants in the total population. Migrants are more likely to found firms in large part because they are more likely to be working-age; few companies are founded by children and pensioners. Adjust for this and you arrive at a conclusion similar to the OECD. Migrants age at the same rate as everyone else. They may be young when they arrive but don’t stay that way. You might think it is me who is missing the point, that we need working-age immigrants to deal with the challenge of an ageing population. The Financial Times op-ed argues “the US would need to let in nearly 4 million migrants a year, every year, to prevent its population growth turning negative in the coming decades”. I think the intention is to argue that high immigration is necessary to address ageing but really it makes the point that even immigration at very high levels (implying one per cent population growth per year permanently and ultimately a population over 50 per cent migrant) can only delay population decline by a few years. As a 2019 EU report put it, “Higher fertility or more immigration are not enough to cope with the challenges of population ageing”. The reason is simple; migrants age at the same rate as everyone else. They may be young when they arrive but don’t stay that way. All serious demographic work of which I am aware comes to a similar conclusion yet proponents of immigration as a solution to ageing rarely cite this work or give any impression they have ever read it. Then there is the argument that we need migrants to deal with problems of labour shortages. I have written about why this is misleading elsewhere. Most of the shortages we hear about are not the result of too few people in a country able to do the job (a skills shortage); they are the result of too few wanting the job because of poor pay and conditions. These sectors want a ring-fenced supply of migrants so they do not have to offer competitive salaries. The Financial Times op-ed argues immigration “reduced upward pressure on wages and inflation” while the Guardian article recounts an employer who “when he advertised for a head chef in Birmingham, on a very competitive rate of £37,000, he woke up the next morning to find rival restaurants outbidding him for the same staff at £40,000”. I understand why business owners dislike this bidding war for workers but perhaps the chef might think it a good thing. Both of these articles seem to imply that wage growth would have been higher if immigration had been lower thus conceding a common criticism of immigration (that it reduces wages) while seeming to imagine they are making a case for more liberal immigration. There is an unfortunate tendency for discussions of migration policy to lionise or demonise migrants when they are only human. Please do not think those on the other side of the immigration debate are better; they are equally, probably more, guilty of using cherry-picked studies, misleading statistics and arguments that do not withstand much scrutiny. There is an unfortunate tendency for discussions of migration policy to lionise or demonise migrants when they are only human. If, as I do, you think we can have a more liberal, open (though with limits), humane immigration policy, please be a bit less critical of the other side and more critical of yourselves. Otherwise don’t be surprised if your arguments fail to prevail',
    type="website",
    url="https://blogs.lse.ac.uk/politicsandpolicy/the-discussion-of-immigration-needs-to-improve/",
    title="The discussion on immigration needs to improve",
    date_scraped=datetime.now(),
    doc_id="1",
    cross_chunk_attention_instance= CROSS_CHUNK_ATTENTION_INSTANCE)

Initial chunk_embeddings shape: torch.Size([3, 1, 1024])
Initial chunk_embeddings:
 tensor([[[ 0.0024,  0.0232, -0.0023,  ..., -0.0003, -0.0147, -0.0015]],

        [[ 0.0064,  0.0079, -0.0259,  ..., -0.0217, -0.0080,  0.0019]],

        [[ 0.0123,  0.0028, -0.0301,  ..., -0.0177, -0.0106,  0.0009]]])
Original chunk_embeddings:
 tensor([[[ 0.0024,  0.0232, -0.0023,  ..., -0.0003, -0.0147, -0.0015]],

        [[ 0.0064,  0.0079, -0.0259,  ..., -0.0217, -0.0080,  0.0019]],

        [[ 0.0123,  0.0028, -0.0301,  ..., -0.0177, -0.0106,  0.0009]]])
Permuted chunk_embeddings:
 tensor([[[ 0.0024,  0.0232, -0.0023,  ..., -0.0003, -0.0147, -0.0015],
         [ 0.0064,  0.0079, -0.0259,  ..., -0.0217, -0.0080,  0.0019],
         [ 0.0123,  0.0028, -0.0301,  ..., -0.0177, -0.0106,  0.0009]]])
Original embeddings:
 tensor([[[ 0.0024,  0.0232, -0.0023,  ..., -0.0003, -0.0147, -0.0015],
         [ 0.0064,  0.0079, -0.0259,  ..., -0.0217, -0.0080,  0.0019],
         [ 0.0123,  0.0028, -0.0301,  ..., 