## 整理資料


In [9]:
# Importing necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

df = pd.read_excel('data.xlsx')
# 性別
X = pd.DataFrame(df['1. 生理性別'])

# 類組（一二三類）
X['類組'] = '第一類組'
X.loc[df['2. 學院'].str.contains('理|工|電|資|原子|半導體|科學'), '類組'] = '第二類組'
X.loc[df['2. 學院'].str.contains('醫|生命科學|生物|藥物科學|護理'), '類組'] = '第三類組'

# 年級（一二、三、四以上）大三是人生重要轉淚點 
X['年紀'] = '大三以下'
X.loc[df['3. 年級'].str.contains('大三'), '年紀'] = '大三'
X.loc[df['3. 年級'].str.contains('大四－大六|碩士|博士'), '年紀'] = '大三以上'

# 台灣風氣看法（不一定要生、生1 2 3）
X['生育個數風氣'] = '不生'
X.loc[df['4. 請問你認為關於生育，台灣目前的風氣為何？'].str.contains('生1個|生2個|生3個'), '生育個數風氣'] = '生'

# 家務分工（自己、平均、伴侶）
question = '5. 依你過往的觀察，你認為台灣家庭中誰負擔較多養育小孩及打理家務的責任？'
X['家務分工'] = '家務平均負擔'
X.loc[df[question].str.contains('父親'), '家務分工'] = '父親負擔較多家務'
X.loc[df[question].str.contains('母親'), '家務分工'] = '母親負擔較多家務'

# 對台灣少子化政策暸解（0、12、345)
question = '10. 請問你認為你多了解台灣目前對應少子化的政策？（生育津貼、托育補助等）'
X['政策了解'] = '不了解台灣政策'
X.loc[df[question].astype(str).str.contains('1' or '2'), '政策了解'] = '了解台灣政策'
X.loc[df[question].astype(str).str.contains('3' or '4' or '5'), '政策了解'] = '非常了解台灣政策'


# 少子化政策是否足夠（0、12、345）
question = '11. 請問你認為台灣政府目前對應少子化的政策是否足夠？'
X['政策足夠'] = '少子化的政策不足'
X.loc[df[question].astype(str).str.contains('1' or '2'), '政策足夠'] = '少子化的政策尚可'
X.loc[df[question].astype(str).str.contains('3' or '4' or '5'), '政策足夠'] = '少子化的政策足夠'

# Adding a column '生' to indicate the intention to have children
X['生'] = df['7. 請問你打算生小孩嗎？']


In [10]:
# Create the 'piechart' directory if it doesn't exist
save_folder = 'piechart/個別vs生小孩意願'
if not os.path.exists(save_folder):
    os.makedirs(save_folder)

# Plot pie chart for each column in X against y
for column in X:
    if(column == '生'):
        break
    column_count = X[column].value_counts()

    # Group by gender and school, and count the number of each response for y
    grouped = X.groupby(X[column])['生'].value_counts().unstack().fillna(0)

    # Calculate the total number of responses for each gender and school
    total_responses = grouped.sum(axis=1)

    # Calculate the percentage of each response for each gender and school
    percentage = grouped.div(total_responses, axis=0) * 100

    # Plot the pie charts for each gender and school combination
    for index, row in percentage.iterrows():
        plt.rcParams['font.sans-serif'] = ['STHeiti', 'SimHei', 'Microsoft YaHei', 'Noto Sans CJK SC', 'Arial']
        plt.figure(figsize=(6, 6))
        
        # Create labels with both percentages and counts
        labels = [f'{response} ({int(count)})\n{percentage:.1f}%' for response, count, percentage in zip(row.index, grouped.loc[index], row)]
        
        plt.pie(row, labels=labels, autopct='', startangle=140)  # autopct='' to prevent duplicate percentages
        plt.title(f'{index} 生小孩的比例')
        plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
        
        # Save the pie chart
        plt.savefig(os.path.join(save_folder, f"{index}_生小孩的比例.png"))
        plt.close()  # Close the figure to avoid memory leak


## 生理性別與其他屬性合併

In [11]:
save_folder = 'piechart/(性別+其他屬性)vs生小孩意願'
if not os.path.exists(save_folder):
    os.makedirs(save_folder)
    # Plot pie chart for each column in X against y
for column in X:
    if(column == '生' or column == '1. 生理性別'):
        continue
    # Group by gender and school, and count the number of each response for y
    grouped = X.groupby(['1. 生理性別', column])['生'].value_counts().unstack().fillna(0)

    # Calculate the total number of responses for each gender and school
    total_responses = grouped.sum(axis=1)

    # Calculate the percentage of each response for each gender and school
    percentage = grouped.div(total_responses, axis=0) * 100

    # Plot the pie charts for each gender and school combination
    for index, row in percentage.iterrows():
        plt.rcParams['font.sans-serif'] = ['STHeiti', 'SimHei', 'Microsoft YaHei', 'Noto Sans CJK SC', 'Arial']
        plt.figure(figsize=(6, 6))
        labels = [f'{response} ({int(count)})' for response, count in zip(row.index, grouped.loc[index])]
        plt.pie(row, labels=labels, autopct='%1.1f%%', startangle=140)
        plt.title(f"生理性別: {index[0]}, {column}: {index[1]}")
        plt.axis('equal') 
        plt.savefig(os.path.join(save_folder, f"{index[0]}_{column}_{index[1]}.png"))
        plt.close()  # Close the figure to avoid memory leak