# Installing required libraries

In [None]:
!pip install tensorflow tensorflow-hub
!pip install pandas
!pip install torch

# Loading Model for Likes Prediction

In [None]:
from tensorflow.keras.models import load_model
model = load_model('/kaggle/working/likes_prediction.h5')

# Text and Image Embeddings

In [None]:
import tensorflow as tf
import tensorflow_hub as hub

def text_embedding_function(text_column):
    embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")
    embeddings = embed(text_column)
    return embeddings.numpy()

In [None]:
import requests
from io import BytesIO

from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input
import numpy as np

def image_embedding_function(image_link):
    # Download the image from the web link
    response = requests.get(image_link)
    img = image.load_img(BytesIO(response.content), target_size=(224, 224))

    # Preprocess the image
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)

    # Load MobileNetV2 model
    base_model = MobileNetV2(weights='imagenet', include_top=False)

    # Get image features
    img_features = base_model.predict(img_array)
    img_features = np.mean(img_features, axis=(1, 2))  # Global average pooling

    return img_features


In [None]:
text_embeddings = np.concatenate([text_embedding_function(df['date']),
                                  text_embedding_function(df['username']),
                                  text_embedding_function(df['inferred company']),
                                  text_embedding_function(df['content'])], axis=1)

In [None]:
import re

def extract_links(text):
    url_pattern = r"'(https?://\S+)'"
    matches = re.findall(url_pattern, text)

    return matches[0]

text_with_links = "[Video(thumbnailUrl='https://pbs.twimg.com/amplify_video_thumb/1227938886921457664/img/BLtuv_1o8Rq_Noq4.jpg', variants=[VideoVariant(contentType='video/mp4', url='https://video.twimg.com/amplify_video/1227938886921457664/vid/1280x720/SFx1sCgkIbUnZeKo.mp4?tag=13', bitrate=2176000), VideoVariant(contentType='video/mp4', url='https://video.twimg.com/amplify_video/1227938886921457664/vid/480x270/3ooUCNaXys5kdswT.mp4?tag=13', bitrate=288000), VideoVariant(contentType='video/mp4', url='https://video.twimg.com/amplify_video/1227938886921457664/vid/640x360/t7WbD3E2QlEc6jin.mp4?tag=13', bitrate=832000), VideoVariant(contentType='application/x-mpegURL', url='https://video.twimg.com/amplify_video/1227938886921457664/pl/fUfDdm_8MfySsDlF.m3u8?tag=13', bitrate=None)], duration=6.0, views=19829)]"
links = extract_links(text_with_links)

print(links)


https://pbs.twimg.com/amplify_video_thumb/1227938886921457664/img/BLtuv_1o8Rq_Noq4.jpg


In [None]:
import numpy as np

In [None]:
loaded_data = np.load('/kaggle/working/image_embeddings_test.npz')
image_embeddings = loaded_data['embeddings']

In [None]:
reshaped_image_embedding=[]
reshaped_text_embedding=[]

In [None]:
def input_format(text_embeddings):
    for i in range(10000):
        val=0
        for j in text_embeddings[i]:
            val+=j
        reshaped_text_embedding.append(val)

        reshaped_text_embedding=np.array(reshaped_text_embedding)
        return reshaped_text_embedding

In [None]:
for i in range(3105):
    val=0
    for j in image_embeddings[i][0]:
        val+=j
    reshaped_image_embedding.append(val)

In [None]:
#reshaped_image_embedding=np.array(reshaped_image_embedding)
reshaped_text_embedding=np.array(reshaped_text_embedding)

In [None]:
#reshaped_image_embedding=reshaped_image_embedding.reshape((10000,1))
reshaped_text_embedding=reshaped_text_embedding.reshape((10000,1))

In [None]:
reshaped_image_embedding.shape

(3105, 1)

In [None]:
combined_embeddings = reshaped_image_embedding+reshaped_text_embedding

In [None]:
combined_embeddings.shape

(3105, 1)

In [None]:
import numpy as np
output_file = '/kaggle/working/image_embeddings_cluster_18.npz'
np.savez(output_file, embeddings=image_embeddings)


# Predicting likes for test dataset

In [None]:
df = pd.read_excel('/kaggle/input/adobe-test-dataset/behaviour_simulation_test_time.xlsx')
df.to_csv('behaviour_simulation_test_time.csv', index=False)
test_data=pd.read_csv('behaviour_simulation_test_time.csv')

test_df = pd.DataFrame(test_data)

In [None]:
test_df

Unnamed: 0,id,date,content,username,media,inferred company
0,1,2021-11-08 00:40:49,"Andres, a Pharmacy Manager at Store 4669 in Pi...",WalmartWorld,[Photo(previewUrl='https://pbs.twimg.com/media...,walmart
1,2,2021-05-09 07:51:21,Happy #EuropeDay! We are always stronger by wo...,SimonHarrisTD,[Photo(previewUrl='https://pbs.twimg.com/media...,td
2,3,2022-11-22 05:30:05,"Election Results, BVAS Report Established Over...",IndependentNGR,[Photo(previewUrl='https://pbs.twimg.com/media...,independent
3,4,2021-09-22 21:52:08,American Olympic swimmer Elizabeth Beisel will...,CBCOlympics,[Photo(previewUrl='https://pbs.twimg.com/media...,cbc
4,5,2022-04-15 13:00:27,No need to hunt ‘em or hope to find ‘em in a b...,TimHortonsUS,[Photo(previewUrl='https://pbs.twimg.com/media...,tim hortons
...,...,...,...,...,...,...
9995,9996,2021-04-27 11:28:28,𝐊𝐚𝐢𝐳𝐞𝐧 - The essence of continuous improvement...,TGR_WEC,[Video(thumbnailUrl='https://pbs.twimg.com/ext...,toyota
9996,9997,2021-10-10 10:08:29,JACK'S BACK! 🤩\n\nBest of luck to our Reserve ...,WilliamsRacing,[Photo(previewUrl='https://pbs.twimg.com/media...,williams
9997,9998,2022-10-15 05:13:00,[Special Saturday Hearing in #SupremeCourt]\n\...,barandbench,[Photo(previewUrl='https://pbs.twimg.com/media...,bar
9998,9999,2022-05-26 00:42:30,Obama has crossed the line. Don’t make this re...,w_terrence,[Photo(previewUrl='https://pbs.twimg.com/media...,williams


In [None]:
text_embeddings_time = np.concatenate([text_embedding_function(test_df['date']),
                                  text_embedding_function(test_df['username']),
                                  text_embedding_function(test_df['inferred company']),
                                  text_embedding_function(test_df['content'])], axis=1)

In [None]:
X_test_time = input_format(text_embeddings_time)

In [None]:
y_pred = model.predict(X_test_time)



In [None]:
y_pred=y_pred.reshape(-1)

In [None]:
y_pred

array([0.7145476 , 0.5932815 , 0.3918623 , ..., 0.4416125 , 0.6270305 ,
       0.60417193], dtype=float32)

In [None]:
# mse=0
# idx=0
# for i in y_test:
#     mse+=(i-y_pred[idx])*(i-y_pred[idx])
#     idx+=1
# mse=mse/len(y_test)

In [None]:
mse

In [None]:
df = pd.read_excel('/kaggle/input/adobe-test-dataset/behaviour_simulation_test_company.xlsx')
df.to_csv('behaviour_simulation_test_company.csv', index=False)
test_data=pd.read_csv('behaviour_simulation_test_company.csv')

test_df2 = pd.DataFrame(test_data)

In [None]:
test_df2

Unnamed: 0,id,date,content,username,media,inferred company
0,1,2019-01-08 18:03:10,the Heathrow drone right now <hyperlink>,bbcthree,[Photo(previewUrl='https://pbs.twimg.com/media...,bbc
1,2,2018-01-29 10:51:17,A deal at £60m?\n\nThe latest on Pierre-Emeric...,BBCSport,[Photo(previewUrl='https://pbs.twimg.com/media...,bbc
2,3,2019-09-05 07:25:14,"""Dealing with Boris Johnson is like someone de...",BBCPolitics,[Video(thumbnailUrl='https://pbs.twimg.com/med...,bbc
3,4,2018-03-28 18:15:00,Doesn't unlimited food &amp; drinks all night ...,BrdgstoneArena,[Photo(previewUrl='https://pbs.twimg.com/media...,bridgestone
4,5,2019-09-11 21:15:44,Apple Watch Series 5.\n\nYou’ve never seen a w...,BestBuy,[Video(thumbnailUrl='https://pbs.twimg.com/ext...,best buy
...,...,...,...,...,...,...
9995,9996,2020-01-25 01:45:07,"""So what do all these defenses mean? What they...",HouseIntelDems,[Video(thumbnailUrl='https://pbs.twimg.com/med...,house
9996,9997,2019-06-26 12:51:27,"In this week’s #MacroMemo, we explore last wee...",rbcgamnews,[Photo(previewUrl='https://pbs.twimg.com/media...,rbc
9997,9998,2019-05-06 10:37:03,When it's bank holiday Monday and someone ment...,bbceastenders,[Gif(thumbnailUrl='https://pbs.twimg.com/tweet...,bbc
9998,9999,2019-05-19 12:00:51,England boss Gareth Southgate says he is conce...,BBCSport,[Photo(previewUrl='https://pbs.twimg.com/media...,bbc


In [None]:
text_embeddings_company = np.concatenate([text_embedding_function(test_df2['date']),
                                  text_embedding_function(test_df2['username']),
                                  text_embedding_function(test_df2['inferred company']),
                                  text_embedding_function(test_df2['content'])], axis=1)

In [None]:
X_test_company = input_format(text_embeddings_company)

In [None]:
y_pred = model.predict(X_test_company)



In [None]:
y_pred=y_pred.reshape(-1)

In [None]:
y_pred

array([0.41054586, 0.47821495, 0.3749732 , ..., 0.37888548, 0.7872715 ,
       1.139032  ], dtype=float32)

In [None]:
# mse=0
# idx=0
# for i in y_test:
#     mse+=(i-y_pred[idx])*(i-y_pred[idx])
#     idx+=1
# mse=mse/len(y_test)

In [None]:
mse