## Controller File to generate the Cosine Similarity Values
This is the main file that generates the Cosine Similarity Values of every ad and the dictionary and stores it in an excel sheet

## 0: Imports

In [1]:
import pandas as pd
import sys
sys.path.append('..')

In [2]:
from Scripts import adSimilarity 
import pickle

## 1: Defining environment variables

In [3]:
AD_EMBEDDINGS_FILE = "serializedAdEmbeddings_distilRoberta"
DICTIONARY_EMBEDDINGS_FILE = "dictEmbeddings_v3"
OUT_FILE = "vals"

## 2: Generating & Saving Similarity Values

In [4]:
with open(f"adEmbeddings/{AD_EMBEDDINGS_FILE}.pkl", 'rb') as fp:
    deserializedAdEmbeddings = pickle.load(fp)

    column_names=adSimilarity.getDictionaryColumns(DICTIONARY_EMBEDDINGS_FILE)
    data=pd.DataFrame(columns=column_names)
    
    for identifier in deserializedAdEmbeddings:
        mean_pooled_value = deserializedAdEmbeddings[identifier]
        cosine_similarity_val = adSimilarity.getCosineSimilarity(mean_pooled_value, DICTIONARY_EMBEDDINGS_FILE)
        new_row={"Identifier":identifier}
        for i in range(len(cosine_similarity_val)):
            new_row[column_names[i+1]]=cosine_similarity_val[i]
        data.loc[len(data)]=new_row

In [5]:
data.to_csv(f"similarityValues/{OUT_FILE}.csv",index=False)
data.to_json(f"similarityValues/{OUT_FILE}.json",orient="records",indent=4)
data.to_excel(f"similarityValues/{OUT_FILE}.xlsx",index=False)