In [7]:
import pandas as pd

# 读取包含电影和用户的数据表
data = {
    'movie_id': [1, 2, 3, 1, 2, 3],
    'user_id': [101, 102, 101, 101, 104, 104]
}
df = pd.DataFrame(data)

In [8]:
df

Unnamed: 0,movie_id,user_id
0,1,101
1,2,102
2,3,101
3,1,101
4,2,104
5,3,104


In [9]:
import pandas as pd
from itertools import product
movie_pairs = list(product(df['movie_id'].unique(), repeat=2))
similarities = []

for pair in movie_pairs:
    movie1_users = set(df[df['movie_id'] == pair[0]]['user_id'])
    movie2_users = set(df[df['movie_id'] == pair[1]]['user_id'])
    jaccard_similarity = len(movie1_users.intersection(movie2_users)) / len(movie1_users.union(movie2_users))
    similarities.append((pair[0], pair[1], jaccard_similarity))

# 创建包含相似度的数据框
similarity_df = pd.DataFrame(similarities, columns=['movie_id', 'other_movie_id', 'similarity'])
print(similarity_df)

   movie_id  other_movie_id  similarity
0         1               1    1.000000
1         1               2    0.000000
2         1               3    0.500000
3         2               1    0.000000
4         2               2    1.000000
5         2               3    0.333333
6         3               1    0.500000
7         3               2    0.333333
8         3               3    1.000000


In [None]:
import numpy as np
from pymoo.model.problem import Problem
from pymoo.algorithms.nsga2 import NSGA2
from pymoo.factory import get_sampling, get_crossover, get_mutation
from pymoo.optimize import minimize

class MyProblem(Problem):
    def __init__(self, Q, S):
        self.Q = Q
        self.S = S
        super().__init__(n_var=len(Q), n_obj=1, xl=0, xu=10)  # 假设变量的取值范围为 [0, 10]

    def _evaluate(self, x, out, *args, **kwargs):
        f = np.sum(x * self.Q) - np.sum(self.S * (np.tile(x, (len(x), 1)).T - np.tile(x, (len(x), 1))) * np.tile(self.Q, (len(x), 1)).T)
        out["F"] = f,

# 初始化 Q 和 S
Q = np.array([2, 3, 1, 4, 2])  # 假设给定的 Q_i
S = np.array([[1, 0.5, 2, 1, 1], [0.5, 1, 1, 0.8, 0.6], [2, 1, 3, 2, 2], [1, 0.8, 2, 2.5, 1.5], [1, 0.6, 2, 1.5, 2]])

problem = MyProblem(Q, S)

# 选择算法和设置参数
algorithm = NSGA2(pop_size=100)

# 优化
res = minimize(problem,
               algorithm,
               ('n_gen', 100),
               seed=1,
               verbose=True)

# 输出 Pareto 最优解
print("Pareto 最优解:")
print(res.X, res.F)

In [None]:
import numpy as np
import pandas as pd
from pymoo.model.problem import Problem
from pymoo.algorithms.nsga2 import NSGA2
from pymoo.optimize import minimize

# 从Pandas数据框中读取常数项
constants_df = pd.DataFrame({
    'P_i_upper_limit': [10, 10, 10],  # 举例，假设有3个变量
    'P_i_lower_limit': [0, 0, 0],
    'Q_i': [2, 3, 1],
    'S_ij_1': [1, 0.5, 2],
    'S_ij_2': [0.5, 1, 1],
    'S_ij_3': [2, 1, 3]
})

# 定义多目标寻优问题
class MyProblem(Problem):
    def __init__(self, constants):
        self.constants = constants
        n_var = len(constants) - 3  # 总的常数列数减去不属于P变量的列数
        super().__init__(n_var=n_var, n_obj=1, xl=constants['P_i_lower_limit'].values, xu=constants['P_i_upper_limit'].values)

    def _evaluate(self, x, out, *args, **kwargs):
        Q = self.constants['Q_i'].values
        S = self.constants.filter(like='S_ij').values.reshape(-1, len(self.constants)-3)
        
        f = np.sum(x * Q) - np.sum(S * (np.tile(x, (len(x), 1)).T - np.tile(x, (len(x), 1))) * np.tile(Q, (len(x), 1)).T)
        out["F"] = f,

# 创建问题实例并进行优化
problem = MyProblem(constants_df)
algorithm = NSGA2(pop_size=100)
res = minimize(problem, algorithm, ('n_gen', 100), seed=1, verbose=True)

# 输出 Pareto 最优解
print("Pareto 最优解:")
print(res.X, res.F)


In [10]:
import pandas as pd

# 创建示例数据表
data = {
    'movie_id': [1, 2, 3, 1, 2],
    'movie_sales': [5000, 3000, 6000, 2000, 4000],
    'sales_time': ['2022-01-15', '2022-01-10', '2022-01-20', '2022-01-12', '2022-01-18']
}
df = pd.DataFrame(data)

# 根据 movie_id 分组，对每个分组内的 movie_sales 按 sales_time 升序排列，并转换为列表形式
movie_sales_list = df.sort_values('sales_time').groupby('movie_id')['movie_sales'].apply(list).reset_index()

print(movie_sales_list)


   movie_id   movie_sales
0         1  [2000, 5000]
1         2  [3000, 4000]
2         3        [6000]


In [None]:
# 转换列表中的元素为浮点数
movie_sales_list['movie_sales'] = movie_sales_list['movie_sales'].apply(lambda x: [float(i) for i in x])

# 将数据存储为csv文件
# movie_sales_list.to_csv('movie_sales_list.csv', index=False)
