In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
file_path = "data/overlap_df.csv"
df = pd.read_csv(file_path, index_col=0)

# Extract keyword names
keywords = df.index.tolist()

In [3]:
print(len(keywords))

29


In [4]:
LAMBDA = 0.7  
ALPHA = 1.0 
Beta = 4.0
overlap_matrix = df.to_numpy()
print(overlap_matrix)

[[ 0 21  9 20 24  5 13  0  2  2  5  7  3  4  4  2 10  6  6  1  1  3  3  1
   5 12  7  1 16]
 [21  0  7 17 21  3 11  1  3  2  3  6  3  4  4  2  7  7  5  1  1  2  2  1
   5  9  5  2 12]
 [ 9  7  0 10 11  3 13  1  2  4  2  6  3  4  5  1 25  8  3  2  3  6  2  2
   1  8  6  4 12]
 [20 17 10  0 22  7 16  4  4  3  7 10  8  4  9  4 13 10  7  3  3  4  3  3
   7 15 12  4 21]
 [24 21 11 22  0  7 18  2  4  5  6  9  4  5  6  4 12  9  7  4  3  6  5  4
   5 13 10  3 17]
 [ 5  3  3  7  7  0  7  4  8  4 16 28 19 24 29 17  5 13  7  2  7  2  5  5
  11 22 23  4 10]
 [13 11 13 16 18  7  0  4  3  4  6 12  7  8  8  4 14 10  5  2  5  6  3  3
   4 16 13  4 17]
 [ 0  1  1  4  2  4  4  0  5  5  0  3  3  3  5  3  3  2  5  4  6  4  5  5
   1  3  5 10  4]
 [ 2  3  2  4  4  8  3  5  0 21  2  6  3  4  8  8  2 11 27 16 17 15 19 19
   4  7 11  1  4]
 [ 2  2  4  3  5  4  4  5 21  0  2  3  1  2  5  2  4  4 17 26 18 22 20 25
   2  4  4  1  4]
 [ 5  3  2  7  6 16  6  0  2  2  0 22 15 18 20 12  3  7  4  0  3  0  1  0
  13 1

In [5]:
S = np.sum(overlap_matrix, axis=1)

In [6]:
N = len(keywords)
D = np.zeros(N)
for i in range(N):
    D[i] = (1 / N) * np.sum(overlap_matrix[i] * S)

In [7]:
A = S - LAMBDA * D

In [8]:
# Normalize Scores
min_A, max_A = np.min(A), np.max(A)
N_i = (A - min_A) / (max_A - min_A)

In [9]:
W = np.round(ALPHA + (Beta) * N_i).astype(int)

In [10]:
df_result = pd.DataFrame({
    "Keyword": keywords,
    "Raw Score (S)": S,
    "Dependency Factor (D)": np.round(D, 2),
    "Adjusted Score (A)": np.round(A, 2),
    "Normalized Score (N_i)": np.round(N_i, 2),
    "Final Search Weight (W)": W
})

In [11]:
df_result = df_result.sort_values(by="Final Search Weight (W)", ascending=False)

In [12]:
from IPython.display import display
display(df_result)



Unnamed: 0,Keyword,Raw Score (S),Dependency Factor (D),Adjusted Score (A),Normalized Score (N_i),Final Search Weight (W)
27,همراه پلاس ملت,84,667.34,-383.14,1.0,5
7,دانلود مستقیم بانک پلاس,100,785.52,-449.86,0.95,5
2,بانک ملت,163,1324.03,-763.82,0.7,4
19,مگابانک,186,1410.21,-801.14,0.67,4
21,مگابانک ملت,197,1486.45,-843.51,0.64,4
24,نصب همراه بانک ملت بدون مراجعه به شعبه,160,1391.83,-814.28,0.66,4
1,اینترنتی ملت بانک,167,1374.34,-795.04,0.68,4
0,اینترنت بانک ملت,193,1591.83,-921.28,0.58,3
23,مگابانک همراه,202,1541.07,-876.75,0.62,3
18,مگا بانک ملت نسخه وب,254,1983.17,-1134.22,0.42,3


In [13]:
fixed_keyword = "دانلود همراه بانک ملت با لینک مستقیم"
fixed_W = 5  # Desired fixed search weight

if fixed_keyword in df_result["Keyword"].values:
    # Set fixed W value for the specific keyword
    df_result.loc[df_result["Keyword"] == fixed_keyword, "Final Search Weight (W)"] = fixed_W
    
    # Reduce W for keywords highly related to the fixed keyword
    keyword_index = keywords.index(fixed_keyword)
    related_scores = overlap_matrix[keyword_index]  # Get overlap values for the fixed keyword
    # threshold = np.percentile(related_scores, 75)  
    threshold = 19
    # Reduce W for highly related keywords
    for i, rel_score in enumerate(related_scores):
        if i != keyword_index and rel_score > threshold:  # High relation threshold
            df_result.loc[df_result["Keyword"] == keywords[i], "Final Search Weight (W)"] = max(1, fixed_W - 3)

# Ensure W values remain as integers
df_result["Final Search Weight (W)"] = df_result["Final Search Weight (W)"].astype(int)

# Sort by Final Search Weight (Descending)
df_result = df_result.sort_values(by="Final Search Weight (W)", ascending=False)

# Display the updated results
display(df_result)

Unnamed: 0,Keyword,Raw Score (S),Dependency Factor (D),Adjusted Score (A),Normalized Score (N_i),Final Search Weight (W)
27,همراه پلاس ملت,84,667.34,-383.14,1.0,5
7,دانلود مستقیم بانک پلاس,100,785.52,-449.86,0.95,5
14,دانلود همراه بانک ملت با لینک مستقیم,324,2784.59,-1625.21,0.03,5
19,مگابانک,186,1410.21,-801.14,0.67,4
2,بانک ملت,163,1324.03,-763.82,0.7,4
24,نصب همراه بانک ملت بدون مراجعه به شعبه,160,1391.83,-814.28,0.66,4
1,اینترنتی ملت بانک,167,1374.34,-795.04,0.68,4
21,مگابانک ملت,197,1486.45,-843.51,0.64,4
20,مگابانک آخرین نسخه,217,1720.24,-987.17,0.53,3
3,بانک ملت اینترنتی جدید,250,2026.9,-1168.83,0.39,3
