In [1]:
import pickle
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [2]:
def add_noise(user_embedding, noise_strength=0.01):
    noise = np.random.normal(0, noise_strength, user_embedding.shape)
    return user_embedding + noise

In [40]:
def initialize_deal(publisher_list, round_per_iter, agents, adv_embeddings):
    publisher_emb = pickle.load(open('publisher_embedding/data/embeddings_to_pick/sites_embeddings_10dim.pkl', 'rb'))
    chosen_publisher_emb = {publisher: publisher_emb[publisher] for publisher in publisher_list}
    # For each publisher, add noise to the embedding to simulate different users for rounds_per_iter times
    noisy_publisher_emb = {
        publisher: np.array([
            add_noise(chosen_publisher_emb[publisher])
            for _ in range(round_per_iter)
        ])
        for publisher in publisher_list
    }
    # Compute cosine similarity between each pair of noisy embeddings and all adv agents embeddings
    agents_publishers_similarity = {}
    for agent in agents:
        agents_publishers_similarity[agent['name']] = {}
        agent_embedding = adv_embeddings[agent['adv_name']]
        for publisher in publisher_list:
            noisy_emb = noisy_publisher_emb[publisher]
            agent_publisher_sim = cosine_similarity(noisy_emb, agent_embedding.reshape(1, -1))
            agents_publishers_similarity[agent['name']][publisher] = agent_publisher_sim
    return agents_publishers_similarity

In [8]:
publisher_list = ['repubblica.it', 'ilfattoquotidiano.it', 'corriere.it', 'lastampa.it', 'ilsole24ore.com']
agents = [
    {
        'name': 'agent1',
        'adv_name': 'Racer 1000'
    },
    {
        'name': 'agent2',
        'adv_name': 'Reflex Pro'
    }
]
adv_embeddings = pickle.load(open('publisher_embedding/data/embeddings_to_pick/ad_embeddings_10dim.pkl', 'rb'))

In [12]:
round_per_iter = 10

In [13]:
publisher_emb = pickle.load(open('publisher_embedding/data/embeddings_to_pick/sites_embeddings_10dim.pkl', 'rb'))
chosen_publisher_emb = {publisher: publisher_emb[publisher] for publisher in publisher_list}
# For each publisher, add noise to the embedding to simulate different users for rounds_per_iter times
noisy_publisher_emb = {
    publisher: np.array([
        add_noise(chosen_publisher_emb[publisher])
        for _ in range(round_per_iter)
    ])
    for publisher in publisher_list
}

In [14]:
noisy_publisher_emb['repubblica.it']

array([[ 4.40166322e-01,  1.01663863e-01,  1.67248904e-01,
         5.14380222e-02,  3.54608968e-02,  1.22950198e-02,
        -6.82915685e-02, -2.44192066e-02, -1.11601488e-02,
        -3.89383178e-03],
       [ 4.65482262e-01,  1.03009698e-01,  1.76115524e-01,
         8.20578294e-02,  4.09425000e-02, -2.26394842e-04,
        -4.96406599e-02, -2.19545114e-02,  1.09332297e-02,
        -8.14898994e-03],
       [ 4.61851025e-01,  8.01479624e-02,  1.55169054e-01,
         8.37390718e-02,  1.57718437e-02,  2.08718830e-02,
        -8.99843646e-02, -9.00611723e-03,  2.82247839e-03,
        -1.03272111e-03],
       [ 4.44346154e-01,  9.39257521e-02,  1.77304488e-01,
         4.57871949e-02,  3.15393508e-02,  2.81934326e-04,
        -5.91211327e-02, -2.79829860e-02, -1.00514015e-02,
         5.89371156e-05],
       [ 4.56070120e-01,  9.12196299e-02,  1.42146305e-01,
         6.32444602e-02,  4.27025743e-02,  7.65617071e-03,
        -6.85382369e-02, -2.82851625e-02,  3.92757468e-03,
         1.

In [45]:
noisy_publisher_emb['repubblica.it'][0]

array([ 0.44016632,  0.10166386,  0.1672489 ,  0.05143802,  0.0354609 ,
        0.01229502, -0.06829157, -0.02441921, -0.01116015, -0.00389383])

In [15]:
adv_embeddings['Racer 1000']

array([ 0.3140404 , -0.17245695,  0.07879242, -0.11443274,  0.28469235,
        0.05925324,  0.15899682, -0.14622322,  0.03026602, -0.02893178],
      dtype=float32)

In [18]:
cosine_similarity(noisy_publisher_emb['repubblica.it'], adv_embeddings['Racer 1000'].reshape(1, -1))

array([[0.50434156],
       [0.51029911],
       [0.46937277],
       [0.51640276],
       [0.51889865],
       [0.49212789],
       [0.53591857],
       [0.51716585],
       [0.4887576 ],
       [0.52854021]])

In [41]:
agents_publishers_similarity = initialize_deal(publisher_list, 10, agents, adv_embeddings)

In [42]:
agents_publishers_similarity['agent1']['repubblica.it']

array([[0.49988949],
       [0.51340985],
       [0.54589478],
       [0.52977638],
       [0.52073111],
       [0.54818871],
       [0.50911809],
       [0.52180341],
       [0.49972175],
       [0.51612423]])

In [47]:
agents_publishers_similarity['agent1']['repubblica.it'][0][0]

0.49988948507923375

In [48]:
rng = np.random.default_rng(0)

In [67]:
rng.lognormal(-3, 0.1, 10)

array([0.04877915, 0.04485653, 0.05293515, 0.04879961, 0.04765885,
       0.05244366, 0.04746997, 0.05720568, 0.05156797, 0.04748064])

In [58]:
rng.normal(0, 0.01, 10)

array([-0.00287388,  0.01574408, -0.00432786, -0.00735483,  0.00249785,
        0.01031453,  0.0016101 , -0.00585529, -0.0134122 , -0.0140152 ])