https://www.kaggle.com/code/pankul/image-classification-w-vgg16-weights

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torchvision.io import read_image, ImageReadMode
import torchvision
from torchvision.models import vgg16, VGG16_Weights

In [2]:
model_VGG = vgg16(pretrained=True)
model_VGG.eval()



VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [3]:
weights = VGG16_Weights.DEFAULT
img = read_image('download.png',ImageReadMode.RGB)
preprocess = weights.transforms()
batch = preprocess(img).unsqueeze(0)



In [4]:
prediction = model_VGG(batch).squeeze(0).softmax(0)
class_id = prediction.argmax().item()
score = prediction[class_id].item()
category_name = weights.meta["categories"][class_id]
print(f"{category_name}: {100 * score:.1f}%")

envelope: 52.3%


In [5]:
print(prediction)

tensor([1.3940e-08, 3.3728e-08, 1.6699e-08, 2.0886e-08, 6.8706e-08, 3.5107e-07,
        5.7571e-09, 3.0424e-08, 3.1428e-08, 4.8228e-07, 1.2860e-08, 4.1380e-08,
        1.5684e-08, 7.3109e-09, 3.3472e-08, 6.5863e-08, 1.0455e-07, 1.7187e-08,
        1.4225e-07, 6.0205e-09, 1.1687e-09, 5.6427e-07, 9.3066e-07, 2.5272e-07,
        2.0217e-07, 5.2212e-08, 9.6702e-08, 1.4840e-07, 3.3169e-08, 1.1520e-08,
        1.0130e-09, 2.5459e-09, 1.6265e-09, 2.4641e-08, 5.1403e-08, 1.0902e-09,
        2.8889e-08, 2.2076e-09, 6.0049e-08, 3.8904e-08, 9.9589e-09, 1.1972e-08,
        1.6583e-08, 1.5290e-08, 4.3518e-08, 6.8465e-08, 3.6682e-08, 2.1322e-08,
        1.3351e-09, 2.5328e-08, 6.1616e-07, 2.1692e-08, 4.2519e-08, 1.4186e-07,
        3.9425e-08, 9.0397e-09, 5.8402e-09, 1.6297e-09, 3.4780e-08, 2.6792e-07,
        2.5594e-08, 1.5809e-09, 5.5677e-09, 2.4054e-07, 1.3917e-07, 8.7529e-07,
        1.4706e-07, 4.4170e-08, 9.9791e-08, 4.4673e-07, 1.9516e-08, 9.3228e-07,
        5.9271e-08, 1.7783e-07, 9.1817e-

In [6]:
import os

files_in_directory = os.listdir('./PLOTS')
# read all jpg
jpg_files = [file for file in files_in_directory if file.endswith('.png')]
print(jpg_files)

weight_list = []

for image in jpg_files:
    img = read_image(f'./PLOTS/{image}',ImageReadMode.RGB)
    preprocess = weights.transforms()
    batch = preprocess(img).unsqueeze(0)
    prediction = model_VGG(batch).squeeze(0).softmax(0)
    weight_list.append(prediction.tolist())

['download.png', 'GPTJbalance.png', 'GPTJcore.png', 'GPTJIO.png', 'GPTJmem.png', 'GPTJqu.png', 'GPTJseq.png', 'GPTJsingle.png', 'GPTNeobalanced.png', 'GPTNeocore.png', 'GPTNeoIO.png', 'GPTNeomem.png', 'GPTNeoQu.png', 'GPTNeoseq.png', 'GPTNeoXbalanced.png', 'GPTNeoXcore.png', 'GPTNeoXIO.png', 'GPTNeoXmem.png', 'GPTNeoXqa.png', 'GPTNeoXseq.png', 'GPTNeoXsingle.png']


In [7]:
print(weight_list)

[[2.5410240596102085e-06, 4.7421832277905196e-05, 1.96806104213465e-05, 1.237179822055623e-05, 8.611718658357859e-05, 0.00018417416140437126, 3.700147499330342e-05, 6.35657852399163e-05, 2.7218437026022002e-05, 0.00041013394366018474, 9.223531378665939e-05, 0.00019705606973730028, 0.00025263187126256526, 8.604092727182433e-05, 0.0004493066808208823, 0.00042356664198450744, 0.0007780922460369766, 0.00030936297844164073, 0.0006220421055331826, 7.981471571838483e-05, 1.0844933967746329e-05, 0.002569075208157301, 0.002589096315205097, 0.0011869266163557768, 0.0005136763793416321, 3.6452107451623306e-05, 0.0001895176392281428, 0.00015915387484710664, 5.165033871890046e-05, 1.2316116226429585e-05, 3.471833679213887e-06, 5.712230176868616e-06, 1.952979118868825e-06, 1.0118157661054283e-05, 3.086624565185048e-05, 1.7263832887692843e-06, 3.079324233112857e-05, 2.150224418073776e-06, 4.669254849432036e-05, 0.00025645000278018415, 0.00022581787197850645, 0.00013295756070874631, 0.0001613937638467

In [8]:
import pandas as pd

df = pd.read_csv('Dataset_Complete_new.csv')


In [9]:
df = df.drop(df.columns[0], axis=1)
df

Unnamed: 0,Username,Followers,Followees,Verification,Business,Business Category,Posts,Caption Token 1,Caption Token 2,Caption Token 3,...,990,991,992,993,994,995,996,997,998,999
0,231,464630,110,1,0,0,453,32,27737,1110,...,4.790062e-05,5.874800e-06,3.164175e-05,3.962273e-06,1.073073e-06,1.518422e-05,8.285926e-06,1.085005e-06,1.705110e-03,2.033460e-04
1,231,464630,110,1,0,0,453,5211,345,1975,...,1.755963e-06,2.359930e-06,4.036049e-06,3.162234e-05,2.632791e-05,1.711393e-05,4.834904e-05,1.479381e-05,9.874469e-05,1.768440e-04
2,231,464630,110,1,0,0,453,43730,428,11580,...,1.334257e-07,3.685503e-07,5.709466e-07,2.522005e-06,1.224240e-06,5.823150e-06,7.481035e-07,2.643675e-07,1.062817e-06,3.312291e-06
3,231,464630,110,1,0,0,453,7738,12,8940,...,1.145234e-05,3.199559e-07,1.102531e-07,1.546725e-06,2.860360e-07,2.342150e-06,1.597221e-06,2.772259e-07,8.442921e-06,3.435930e-04
4,231,464630,110,1,0,0,453,8582,99,241,...,4.905597e-08,1.226727e-08,5.824077e-09,1.467996e-07,1.411268e-07,9.342696e-07,6.143584e-07,5.742364e-09,5.474894e-07,3.829028e-05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2446,13,30980252,101,1,0,0,738,44,33103,13,...,1.540743e-08,5.848895e-08,5.764909e-08,8.989597e-08,2.967782e-07,1.279557e-07,2.918121e-07,7.234205e-09,1.324046e-06,1.510216e-03
2447,13,30980252,101,1,0,0,738,31,85,5119,...,2.215644e-08,2.976236e-08,4.801437e-09,4.831751e-08,5.136811e-07,3.722031e-08,1.971586e-08,1.011932e-08,2.002270e-07,2.401269e-07
2448,13,30980252,101,1,0,0,738,38,3247,494,...,7.198442e-08,6.232727e-08,9.155664e-08,3.690384e-07,4.337970e-06,6.601301e-07,2.282336e-06,3.176082e-08,1.150381e-06,2.318747e-05
2449,13,30980252,101,1,0,0,738,26486,101,9576,...,1.244301e-07,4.659513e-08,6.310743e-07,1.079178e-07,7.582918e-06,3.400634e-06,1.740985e-06,3.382646e-07,2.276448e-03,2.955427e-06


In [10]:
# List to store the results
X = []
y_like = []
y_comment = []
# Iterate over each row in the DataFrame
for _, row in df.iterrows():
    row_list = []
    for item in row[:-2]:
        if isinstance(item, list):
            # If the item is a list, extend the row_list with its elements
            row_list.extend(item)
        else:
            # Otherwise, just append the item
            row_list.append(item)
    # Append the processed row to the result list
    X.append(row_list)

y_like = df["Likes"].values.tolist()
y_comment = df["Comments"].values.tolist()

In [11]:
y_like

[95281,
 74311,
 70293,
 69970,
 69289,
 68087,
 68256,
 67066,
 64406,
 64532,
 12546857,
 11224004,
 10922610,
 9656027,
 9414379,
 9229569,
 8844203,
 8522291,
 8476311,
 8446248,
 466965,
 394665,
 374312,
 365585,
 352924,
 345612,
 345369,
 336107,
 326490,
 265795,
 42287,
 40677,
 40812,
 40214,
 36206,
 34666,
 33994,
 31918,
 29213,
 27627,
 30666,
 23322,
 22859,
 22683,
 21721,
 16165,
 15790,
 15605,
 15585,
 15495,
 94449,
 86047,
 75955,
 72788,
 65373,
 61471,
 61012,
 61018,
 55949,
 55200,
 59507,
 49162,
 47447,
 47268,
 46514,
 37778,
 36303,
 30689,
 29474,
 27297,
 51082,
 51321,
 50715,
 50364,
 41102,
 39716,
 38012,
 37252,
 35055,
 34306,
 305954,
 257227,
 233146,
 216333,
 177932,
 149437,
 148876,
 145477,
 144275,
 144139,
 21725,
 18199,
 18119,
 17698,
 16974,
 16212,
 15591,
 14836,
 14703,
 14375,
 155396,
 141672,
 139637,
 116964,
 99151,
 97657,
 96529,
 95544,
 93303,
 83158,
 127384,
 122301,
 116360,
 116294,
 111924,
 104212,
 99862,
 97163,
 92

In [12]:
import pandas as pd
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import classification_report

In [13]:
model_like = XGBRegressor(n_estimators=200, max_depth=5)
model_comment =XGBRegressor(n_estimators=200, max_depth=5)

In [15]:
X_train_comment, X_test_comment, y_train_comment, y_test_comment = train_test_split(X, y_comment, test_size=0.1, shuffle = True)
X_train_like, X_test_like, y_train_like, y_test_like = train_test_split(X, y_like, test_size=0.1, shuffle = True)

In [20]:
#like_label = np.random.randint(50, size=len(weight_list))
#comment_label = np.random.randint(50, size=len(weight_list))
#print(len(weight_list))

weight_list = np.array(X)
like_label = np.array(y_like)
comment_label = np.array(y_comment) 
#print(weight_list)
#print(like_label)

model_like.fit(X_train_like,y_train_like)
model_comment.fit(X_train_comment,y_train_comment)

In [25]:
cross_val_score(model_like , X_test_like, y_test_like)

array([0.99779946, 0.99271129, 0.98574396, 0.99379954, 0.98990843])

In [17]:
scores = cross_val_score(model_like , weight_list, like_label)

In [18]:
print(scores)

[0.94210552 0.97165113 0.94562273 0.99950489 0.98917148]


In [19]:
model_like.fit(X,y_like)
model_comment.fit(X,y_comment)
weight_list = np.array(X)
like_label = np.array(y_like)
comment_label = np.array(y_comment) 

In [118]:
import cv2

def predict_result(image,user_info,model_like,model_comment):
    img = read_image(f'./{image}',ImageReadMode.RGB)
    preprocess = weights.transforms()
    batch = preprocess(img).unsqueeze(0)
    prediction = model_VGG(batch).squeeze(0).softmax(0)

    img_info = np.array(user_info+prediction.tolist())
    print(img_info)
    result_like = model_like.predict([img_info])
    result_comment = model_comment.predict([img_info])
    
    like = result_like.tolist()[0] if result_like.tolist()[0] > 0 else 0
    comment = result_comment.tolist()[0] if result_comment.tolist()[0] > 0 else 0
    
    print(f"The input image will receive {int(like)} likes and {int(comment)} comments")
    
    return result_like.tolist()[0],result_comment.tolist()[0]

    

In [119]:
user_profile = df.loc[2, :].values.flatten().tolist()[:64]
print(user_profile)
predict_result('DALLE4.jpg',user_profile,model_like,model_comment)

[231.0, 464630.0, 110.0, 1.0, 0.0, 0.0, 453.0, 43730.0, 428.0, 11580.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 50256.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 1.0]
[2.31000000e+02 4.64630000e+05 1.10000000e+02 ... 1.63546310e-09
 3.71415172e-06 7.20353199e-08]
The input image will receive 2240 likes and 185 comments


(2240.752685546875, 185.2084197998047)

In [120]:
predict_result('weneedcurve.png',user_profile,model_like,model_comment)

[2.31000000e+02 4.64630000e+05 1.10000000e+02 ... 4.29417479e-09
 8.53115054e-08 3.50947602e-07]
The input image will receive 3997 likes and 0 comments


(3997.763427734375, -38.78290939331055)

In [121]:
predict_result('weneedcurve.png',df.loc[1000, :].values.flatten().tolist()[:64],model_like,model_comment)

[3.80000000e+01 9.23197000e+05 2.50000000e+01 ... 4.29417479e-09
 8.53115054e-08 3.50947602e-07]
The input image will receive 2734 likes and 0 comments


(2734.829345703125, -93.84390258789062)

In [122]:
predict_result('DALLE4.jpg',df.loc[1000, :].values.flatten().tolist()[:64],model_like,model_comment)

[3.80000000e+01 9.23197000e+05 2.50000000e+01 ... 1.63546310e-09
 3.71415172e-06 7.20353199e-08]
The input image will receive 2559 likes and 140 comments


(2559.231689453125, 140.7702178955078)