# Contacts
This notebook is going to generate fake contacts and perform analysis.

In [1]:
import pandas as pd
import numpy as np
import os
import random
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties


np.random.seed(42)  # 确保生成的数据具有一定的可重复性

def generate_phone_number():
    return "1" + "".join(np.random.choice(list("0123456789"), 10))

# Generate 100 responses
num_responses = 100

communities = ["Downtown", "Inner Loop", "I-610 to Beltway 8", "Energy Corridor", "Katy", "The Woodlands", "Sugar Land", "Pearland", "Cypress", "Spring", "League City", "Baytown", "Pasadena", "Clear Lake"]

last_names = ["张", "王", "李", "刘", "赵", "孙", "周", "杨", "林", "陈"]
first_names = ["一", "二", "三", "四", "五", "六", "七", "八", "九", "十"]
nick_names = [f"{last_name}{first_name}" for last_name in last_names for first_name in first_names]
# Create a DataFrame with the specified columns and random data
df = pd.DataFrame({
    "Timestamp": pd.date_range(start="2024-01-01", periods=num_responses, freq="D"),
    "Email Address": [f"user{i}@example.com" for i in range(num_responses)],
    "中文姓名": [f"中文名字{i}" for i in range(num_responses)],
    "English Name": [f"EnglishName{i}" for i in range(num_responses)],
    "昵称": nick_names,
    "性别": np.random.choice(["Male", "Female"], num_responses),
    "入学年份": np.random.choice(range(2000, 2025), num_responses),
    "学院": np.random.choice(["Engineering", "Business", "Arts", "Science"], num_responses),
    "学位": np.random.choice(["Bachelor", "Master", "PhD"], num_responses),
    "现居住地邮编": [f"{np.random.randint(10000, 99999)}" for _ in range(num_responses)],
    "所属社区": random.choices(communities, k=num_responses),
    "微信": [f"WeChatID{i}" for i in range(num_responses)],
    "手机号": [generate_phone_number() for _ in range(num_responses)],
    "Instagram": [f"Insta{i}" for i in range(num_responses)],
    "愿意通过哪个方式被联系到？": np.random.choice(["Email", "WeChat", "Phone", "Instagram"], num_responses),
    "现居住地址": [f"Address{i}" for i in range(num_responses)],
    "是否单身？": np.random.choice(["Yes", "No"], num_responses),
    "就职公司名称": [f"Company{i}" for i in range(num_responses)],
    "所属行业": np.random.choice(["Tech", "Education", "Healthcare", "Finance"], num_responses),
    "在行业内的年限": np.random.choice(range(1, 21), num_responses),
    "是否愿意推荐校友去自己的公司？": np.random.choice(["Yes", "No"], num_responses),
    "是否愿意向校友推广自己的公司？": np.random.choice(["Yes", "No"], num_responses),
    "公司名称": [f"Company{i}" for i in range(num_responses)],
    "联系方式": [generate_phone_number() for _ in range(num_responses)],
    "地址": [f"CompanyAddress{i}" for i in range(num_responses)],
    "邮编": [f"{np.random.randint(10000, 99999)}" for _ in range(num_responses)],
    "业务范围": np.random.choice(["Product", "Service", "Consulting"], num_responses),
    "公司图标": [f"Logo{i}.png" for i in range(num_responses)],
    "匹配社区": [f"{np.random.randint(10000, 99999)}" for _ in range(num_responses)],
    "社交爱好": np.random.choice(["Photography", "Reading", "Travel", "Painting"], num_responses),
    "体育运动类爱好": np.random.choice(["Running", "Swimming", "Basketball", "Yoga"], num_responses),
    "关注的热点话题": np.random.choice(["Health", "Politics", "Finance", "Technology"], num_responses),
})
df.to_csv("test.csv")
df.head()  # Display the first few rows of the DataFrame


Unnamed: 0,Timestamp,Email Address,中文姓名,English Name,昵称,性别,入学年份,学院,学位,现居住地邮编,...,公司名称,联系方式,地址,邮编,业务范围,公司图标,匹配社区,社交爱好,体育运动类爱好,关注的热点话题
0,2024-01-01,user0@example.com,中文名字0,EnglishName0,张一,Male,2017,Science,Master,42635,...,Company0,11818671653,CompanyAddress0,71709,Product,Logo0.png,34995,Photography,Basketball,Finance
1,2024-01-02,user1@example.com,中文名字1,EnglishName1,张二,Female,2011,Science,Bachelor,73208,...,Company1,16319671406,CompanyAddress1,79785,Consulting,Logo1.png,26977,Photography,Yoga,Politics
2,2024-01-03,user2@example.com,中文名字2,EnglishName2,张三,Male,2001,Business,PhD,43828,...,Company2,17545106699,CompanyAddress2,44518,Service,Logo2.png,44542,Reading,Swimming,Finance
3,2024-01-04,user3@example.com,中文名字3,EnglishName3,张四,Male,2009,Business,PhD,28711,...,Company3,11109770783,CompanyAddress3,63909,Service,Logo3.png,20651,Photography,Yoga,Finance
4,2024-01-05,user4@example.com,中文名字4,EnglishName4,张五,Male,2003,Arts,PhD,13420,...,Company4,10024489115,CompanyAddress4,28351,Service,Logo4.png,88931,Reading,Swimming,Health


# Create pie chart

In [2]:
IMG_DIR = "images"
os.makedirs(IMG_DIR, exist_ok=True)
# 设置 matplotlib 的全局字体
plt.rcParams['font.sans-serif'] = ['SimHei']  # 'SimHei' 是一种常见的支持中文的字体
plt.rcParams['axes.unicode_minus'] = False  # 正确显示负号

targets_chinese = ["所属社区", '学院', '所属行业', '社交爱好', '体育运动类爱好', '关注的热点话题']
targets_english = ["community", "college", "industry", "hobby", "sports", "hot"]
for target_chinese, target_english in zip(targets_chinese, targets_english):
    target_counts = df[target_chinese].value_counts()

    # 生成饼图
    plt.figure(figsize=(8, 8))
    plt.pie(target_counts, labels=target_counts.index, autopct='%1.1f%%')
    plt.title(f'校友在{target_chinese}的分布')
    plt.savefig(f'{IMG_DIR}/{target_english}_distribution_pie.png')
    plt.close()

    # 绘制柱状图
    plt.figure(figsize=(12, 8))
    target_counts.plot(kind='bar')
    plt.title(f'校友在{target_chinese}的分布')
    plt.xlabel(f'{target_chinese}')
    plt.ylabel('校友数')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig(f"{IMG_DIR}/{target_english}_distribution_hist.png")
    plt.close()

# Heat map

In [11]:
# import folium
# from folium.plugins import HeatMap
#
# locations = {
#     "Downtown": [29.756954817074494, -95.36034777819523],
#     "Inner Loop": [29.76448961582833, -95.45081705842782],
#                 "I-610 to Beltway 8": [29.691940442442785, -95.32770384076193],
#                 "Energy Corridor": [29.781303209000473, -95.63629434412921],
#                 "Katy": [29.785875525326624, -95.82584461805949],
#                 "The Woodlands": [30.161819582336765, -95.46694781136829],
#                 "Sugar Land": [29.59840969080056, -95.6238495746984],
#                 "Pearland": [29.561231189817843, -95.28508703898648],
#                 "Cypress": [29.971107675680898, -95.69749878968416],
#                 "Spring": [30.0804293862743, -95.41570339335017],
#                 "League City": [29.50744016209295, -95.0954181692523],
#                 "Baytown": [29.735659030914295, -94.97644605010763],
#                 "Pasadena": [29.690529444311803, -95.18710323438891],
#     "Clear Lake": [29.57696847562151, -95.12253624511492]
# }
#
# # 创建休斯顿地图的中心点
# map_houston = folium.Map(location=[29.7604, -95.3698], zoom_start=10)
#
# heat_data = [(loc[0], loc[1], float(count)) for area, loc in locations.items() for count in [area_counts.get(area, 0)]]
#
# # 创建热力图并添加到地图
# HeatMap(heat_data).add_to(map_houston)
#
# # 保存地图为HTML文件
# map_houston.save("houston_heatmap.html")

# Get people with shared interest

In [3]:
personal_details = {}

for index, row in df.iterrows():
    email = row['Email Address']
    features = []
    for target in targets_chinese:
        features.append(row[target])
    college = row['学院']
    industry = row['所属行业']
    community = row['匹配社区']
    social_hobby = row['社交爱好']
    sport_hobby = row['体育运动类爱好']
    hot_topic = row['关注的热点话题']

    nickname_or_name = row['昵称'] if pd.notna(row['昵称']) and row['昵称'] != '' else row['中文姓名']

    personal_details[email] = {
        "name": nickname_or_name}
    # 查找共同学院的人
    for feature, target_chinese, target_english in zip(features, targets_chinese, targets_english):
        personal_details[email][f"common_{target_english}"] = [(df.loc[idx, '昵称'] if pd.notna(df.loc[idx, '昵称']) and df.loc[idx, '昵称'] != '' else df.loc[idx, '中文姓名'], idx)
                             for idx in df[df[target_chinese] == feature].index if df.loc[idx, 'Email Address'] != email]


In [7]:
from fpdf import FPDF
import matplotlib.pyplot as plt
from PyPDF2 import PdfReader, PdfWriter

PDF_DIR = "pdf"
os.makedirs(PDF_DIR, exist_ok=True)

class PDF(FPDF):
    def __init__(self):
        super().__init__()
        self.add_font('ChineseFont', '', 'C:\Windows\Fonts\simhei.ttf', uni=True)


    # def header(self):
    #     self.set_font('ChineseFont', '', 12)
    #     self.cell(0, 10, '北京大学休斯敦校友会黄页', 0, 1, 'C')

    def chapter_title(self, title):
        self.set_font('ChineseFont', '', 12)
        self.cell(0, 10, title, 0, 1, 'L')
        self.ln(10)

    def chapter_body(self, body):
        self.set_font('ChineseFont', '', 8)
        self.multi_cell(0, 10, body)

for person in list(personal_details.keys())[:2]:
    pdf = PDF()
    pdf.add_page()
    details = personal_details[person]
    pdf.chapter_title("基本信息")
    pdf.chapter_body(f"邮箱: {person}\n昵称或中文名: {details['name']}")

    for target_chinese, target_english in zip(targets_chinese, targets_english):
        pdf.add_page()
        pdf.chapter_title(f"在休斯顿，共有{len(details[f'common_{target_english}'])}人与您有共同的{target_chinese}，他们分别是：")

        common_people_str = ', '.join([f"{name} (ID: {idx})" for name, idx in details[f'common_{target_english}']])
        pdf.image(f'{IMG_DIR}/{target_english}_distribution_pie.png', x=10, y=100, w=180)

        pdf.chapter_body(common_people_str)

    draft_pdf_path = f'{PDF_DIR}/{person}.pdf'
    pdf.output(draft_pdf_path)

    # 使用PyPDF2为PDF添加书签
    reader = PdfReader(draft_pdf_path)
    writer = PdfWriter()

    for i, page in enumerate(reader.pages):
        writer.add_page(page)
        if i == 0:
            writer.add_outline_item('基本信息', i)
        else:
            writer.add_outline_item(f'共同的{targets_chinese[i - 1]}', i)

    # 保存最终PDF
    final_pdf_path = f'{PDF_DIR}/{person}.pdf'
    with open(final_pdf_path, 'wb') as f_out:
        writer.write(f_out)

  self.add_font('ChineseFont', '', 'C:\Windows\Fonts\simhei.ttf', uni=True)
  self.cell(0, 10, title, 0, 1, 'L')
