In [1]:
import random
import pandas as pd
import numpy as np

# Define the function to generate student personality traits
def generate_student_personality():
    personality_types = {
        "Studious": "Constantly engrossed in books and academic work, often found in the library.",
        "Social Butterfly": "Always on the move, attending various social events, with a large group of friends.",
        "Independent Learner": "Preferring to work alone, not interested in group projects, focused on personal growth.",
        "Competitive Spirit": "Always striving for the top, may sometimes be perceived as aggressive towards peers.",
        "Casual Student": "Easygoing, not easily stressed by exams or assignments, takes things as they come.",
        "Perfectionist": "Demanding the best, can be tough on themselves and others when expectations are not met.",
        "Procrastinator": "Often delays tasks until the last minute, struggles with meeting deadlines.",
        "Gifted Individual": "Naturally talented in one or more areas, often achieving high levels of success with minimal effort."
    }

    exclusive_pairs = [("Studious", "Casual Student"), ("Independent Learner", "Social Butterfly"), ("Perfectionist", "Procrastinator")]
    likely_together_pairs = [("Studious", "Perfectionist"), ("Independent Learner", "Gifted Individual"), ("Social Butterfly", "Casual Student")]

    selected_personality = random.choice(list(personality_types.keys()))
    student_personality = [selected_personality + ',' + personality_types[selected_personality]]
    personality_types.pop(selected_personality, None)

    for pair in exclusive_pairs:
        if selected_personality in pair:
            opposite = pair[1] if pair[0] == selected_personality else pair[0]
            personality_types.pop(opposite, None)

    for pair in likely_together_pairs:
        if selected_personality in pair:
            additional_trait = pair[1] if pair[0] == selected_personality else pair[0]
            if additional_trait in personality_types and random.random() < 0.4:
                student_personality.append(additional_trait + ',' + personality_types[additional_trait])

    if len(student_personality) < 2 and random.random() < 0.6:
        additional_trait = random.choice(list(personality_types.keys()))
        student_personality.append(additional_trait + ',' + personality_types[additional_trait])

    return student_personality

In [2]:
import pandas as pd
import random

# Load the uploaded CSV file
file_path = 'user_1000.csv'
roster_df = pd.read_csv(file_path)

# Extract the names from the second column and convert it to a list
names_list = roster_df.iloc[:, 1].tolist()

# Function to get a random name without replacement
def get_random_name(names_list):
    if not names_list:  # If the list is empty
        return None
    index = random.randint(0, len(names_list) - 1)
    return names_list.pop(index)

In [3]:
# get_random_name(names_list)

In [4]:
def generate_student_record(id):
    genders = ["male", "female"]
    grades = ["Year 1 Undergraduate", "Year 2 Undergraduate", "Year 3 Undergraduate", "Year 4 Undergraduate", 
             "Year 1 Master's Student", "Year 2 Master's Student", "Year 3 Master's Student", 
             "Year 1 PhD Student", "Year 2 PhD Student"]


    grade = random.choice(grades)
    age = grades.index(grade) + 18
    if random.random() < 0.3:
        if random.random() < 0.5:
            age += 1
        else:
            age -= 1
    
    student = {
        "id": id,
        "name": get_random_name(names_list),
        "gender": random.choice(genders),
        "age": age,
        "traits": "",
        "status": grade,
        "interest": "",  # Placeholder for interests
        "feature": generate_student_personality()
    }
    
    return student

In [5]:
# generate_student_record(1)

In [7]:
user_num = 100
student_data = [generate_student_record(id) for id in range(user_num)]

# 将生成的数据转换为DataFrame
df_students = pd.DataFrame(student_data)

# 保存到CSV文件
csv_file_path = f'user_{user_num}.csv'
df_students.to_csv(csv_file_path, index=False)

file_path = f'user_{user_num}.txt'
with open(file_path, 'w') as file:
    for record in student_data:
        record_str = str(record)  # Convert the dictionary to a string
        file.write(record_str + '\n\n')

In [10]:
relationship_types = {
    'acquaintance': {'mean': 0.247, 'std': 0.146},
    'friend': {'mean': 0.750, 'std': 0.145}
}

# 重新读取学生数据，以确保我们拥有所需的数据
student_data_path = csv_file_path
student_df = pd.read_csv(student_data_path)

# 重新生成2000对关系
num_relationships = 2000
relationships = []

for _ in range(num_relationships):
    # 随机选择关系类型
    relationship_type = np.random.choice(['acquaintance', 'friend'])
    
    # 根据关系类型生成亲密程度
    closeness = np.random.normal(
        loc=relationship_types[relationship_type]['mean'],
        scale=relationship_types[relationship_type]['std']
    )
    
    # 保证亲密程度在0和1之间
    closeness = max(0, min(closeness, 1))
    
    # 随机选择两个学生ID
    user_1 = np.random.choice(student_df['id'].values)
    user_2 = np.random.choice(student_df['id'].values)
    
    # 确保user_1和user_2不同
    while user_1 == user_2:
        user_2 = np.random.choice(student_df['id'].values)
    
    # 添加到关系列表
    relationships.append([user_1, user_2, relationship_type, closeness])

# 转换为DataFrame
relationships_df = pd.DataFrame(relationships, columns=['user_1', 'user_2', 'relationship', 'closeness'])

# 保存到CSV文件
relationships_csv_path = f'relationship_{user_num}.csv'
relationships_df.to_csv(relationships_csv_path, index=False)