In [1]:
import pandas as pd
from sklearn.decomposition import TruncatedSVD

In [2]:
bert_embeded = pd.read_csv('data/BERT_embeded.csv')
albert_embeded = pd.read_csv("data/ALBERT_embeded.csv")
roberta_embeded = pd.read_csv("data/ROBERTA_embeded.csv")
xlmroberta_embeded = pd.read_csv("data/XLMROBERTA_embeded.csv")

In [6]:
def apply_svd(bert_df, num_features_svd, model_name):
    transformer = TruncatedSVD(n_components=num_features_svd)
    matrix = transformer.fit_transform(bert_df.drop("id", axis=1))
    
    columns = [f"bert_svd_{dim}" for dim in range(num_features_svd)]
    feature_df = pd.DataFrame(matrix, columns=columns)
    ids = [n for n in range(len(bert_df))]
    feature_df["id"] = ids
    feature_df.to_csv(f"data/{model_name}_svd{num_features_svd}.csv", index=False)
    return feature_df

In [7]:
model_names = [
    (bert_embeded, "BERT"),
    (albert_embeded, "ALBERT"),
    (roberta_embeded, "ROBERTA"),
    (xlmroberta_embeded, "XLMROBERTA"),
]



for model, name in model_names:
    bert_decomposition = apply_svd(model,
                                      num_features_svd=256,
                                      model_name=name)