## Import Package

In [386]:
import pandas as pd
import numpy as np
import plotly.express as px
import seaborn as sns               
import matplotlib.pyplot as plt     
import statsmodels.formula.api as smf
from statsmodels.miscmodels.ordinal_model import OrderedModel

## Load Data

In [387]:
df = pd.read_spss('/Users/wen/Desktop/論文/E10053_4/data.sav')

In [388]:
year1 = pd.read_spss('/Users/wen/Desktop/論文/E10053_1/data.sav')

In [389]:
mergedf = year1.loc[:, ['id', 'fedu', 'medu']]

In [390]:
df = pd.merge(df, mergedf, on = 'id')

In [391]:
df

Unnamed: 0,id,v01,v03,v10,v11,v12,v13,v14,a1a,a1b,...,fcareer8,mcareer5,mcareer8,fpartyid,mpartyid,fparty,mparty,w,fedu,medu
0,120101.0,國立臺灣大學,27.0,5.0,13.0,3.0,18.0,5.0,很少,很少,...,私部門管理階層及專業人員,高、中級白領,私部門管理階層及專業人員,無反應及其他政黨,中立及看情形,中立無反應,中立無反應,0.959659,大學及以上,大學及以上
1,120102.0,國立政治大學,14.0,4.0,22.0,3.0,11.0,50.0,很少,很少,...,軍公教人員,其他,家管,中立及看情形,中立及看情形,中立無反應,中立無反應,0.959659,大學及以上,高中、職
2,120103.0,國立政治大學,13.0,4.0,14.0,2.0,11.0,6.0,有時,時常,...,私部門勞工,其他,家管,中立及看情形,中立及看情形,中立無反應,中立無反應,0.959659,高中、職,高中、職
3,120107.0,國立政治大學,25.0,4.0,14.0,2.0,11.0,5.0,很少,很少,...,軍公教人員,高、中級白領,軍公教人員,無反應及其他政黨,國民黨,中立無反應,普通支持國民黨,1.087015,大學及以上,大學及以上
4,120108.0,國立政治大學,20.0,5.0,5.0,2.0,15.0,26.0,時常,很少,...,私部門管理階層及專業人員,藍領,私部門勞工,民進黨,民進黨,普通支持民進黨,普通支持民進黨,0.959659,專科,高中、職
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1089,2560308.0,國立高雄師範大學,19.0,3.0,26.0,4.0,8.0,40.0,有時,很少,...,私部門勞工,其他,家管,民進黨,民進黨,普通支持民進黨,偏民進黨,0.897055,大學及以上,高中、職
1090,2560309.0,國立高雄師範大學,18.0,3.0,26.0,4.0,12.0,7.0,有時,很少,...,軍公教人員,高、中級白領,私部門管理階層及專業人員,國民黨,民進黨,普通支持國民黨,偏民進黨,1.016103,專科,高中、職
1091,2560310.0,國立高雄師範大學,32.0,3.0,26.0,4.0,12.0,6.0,有時,很少,...,軍公教人員,其他,家管,國民黨,國民黨,普通支持國民黨,非常支持國民黨,1.016103,專科,專科
1092,2560311.0,國立高雄師範大學,15.0,3.0,26.0,4.0,12.0,7.0,有時,有時,...,私部門勞工,藍領,私部門勞工,無反應及其他政黨,無反應及其他政黨,中立無反應,中立無反應,1.016103,高中、職,高中、職


## Data Cleaning

* 家庭
* 教育 - v01
* 政治參與

### 學校屬性

* 受訪者學校

In [392]:
df['strata5'].value_counts()

私立技職    404
私立一般    359
公立一般    210
公立技職     83
師範       38
Name: strata5, dtype: int64

In [393]:
def schooltype(strata5):
    if strata5 == '私立技職':
        return '私立技職'
    elif strata5 == '私立一般':
        return '私立一般'
    elif strata5 == '公立一般':
        return '公立一般'
    elif strata5 == '公立技職':
        return '公立技職'
    elif strata5 == '師範':
        return '公立一般'
    else:
        return float('NaN')

In [394]:
df['SchoolType'] = df['strata5'].apply(schooltype)

In [395]:
df['SchoolType'].value_counts()

私立技職    404
私立一般    359
公立一般    248
公立技職     83
Name: SchoolType, dtype: int64

In [396]:
df1 = df.groupby(["SchoolType"]).count().reset_index()

fig1 = px.bar(df1, height=500, width=600,
             y = df.groupby(["SchoolType"]).size(),
             x="SchoolType",
             color='SchoolType',
             text = df.groupby(["SchoolType"]).size(), 
             title = '學校屬性分佈')
fig1.show()

* SchoolType to num：數值越大教育資源越高(假設的)

In [397]:
def schooltype_num(SchoolType):
    if SchoolType == '公立一般':
        return float(4)
    elif SchoolType == '私立一般':
        return float(3)
    elif SchoolType == '公立技職':
        return float(2)
    else:
        return float(1)

In [398]:
df['SchoolType_num'] = df['SchoolType'].apply(schooltype_num)

* 受訪者科系

In [399]:
df['strata9'].value_counts()

商學管理    299
工科      296
人文藝術    178
理科      123
經社心理    105
醫科       38
教育       27
農科       19
法律        9
Name: strata9, dtype: int64

In [400]:
def major(strata9):
    if strata9 == '經社心理':
        return int(1)
    else:
        return int(0)

In [401]:
df['Major'] = df['strata9'].apply(major)

* 學校到立法院距離(最短行車距離) </br>
資料搜集日期：2022.08.18 14:23-14:45

In [402]:
def distance(v01):
    if v01 == '明志科技大學':
        return float(12.3)
    elif v01 == '東吳大學':
        return float(9.0)
    elif v01 == '明道大學':
        return float(202)
    elif v01 == '銘傳大學':
        return  float(7.1)
    elif v01 == '臺南應用科技大學':
        return float(299)
    elif v01 == '輔仁大學':
        return float(10.9)
    elif v01 == '高苑科技大學':
        return float(325)
    elif v01 == '黎明技術學院':
        return float(11.9)
    elif v01 == '國立中興大學':
        return float(166)
    elif v01 == '元培醫事科技大學(原:元培科技大學)':
        return float(83.9)
    elif v01 == '國立成功大學':
        return float(306)
    elif v01 == '大同技術學院':
        return float(251)
    elif v01 == '國立東華大學':
        return float(172)
    elif v01 == '逢甲大學':
        return float(155)
    elif v01 == '國立政治大學':
        return float(12.3)
    elif v01 == '元智大學':
        return float(36.3)
    elif v01 == '龍華科技大學':
        return float(15.5)
    elif v01 == '國立中央大學':
        return float(41.8)
    elif v01 == '國立屏東大學(屏商校區)(原:國立屏東商)':
        return float(365)
    elif v01 == '國立虎尾科技大學':
        return float(224)
    elif v01 == '國立高雄師範大學':
        return float(347)
    elif v01 == '國立高雄海洋科技大學':
        return float(337)
    elif v01 == '中臺科技大學':
        return float(159)
    elif v01 == '國立臺北教育大學':
        return float(4.6)
    elif v01 == '國立勤益科技大學':
        return float(161)
    elif v01 == '國立臺灣大學':
        return float(4.0)
    else:
        return float('NaN')

In [403]:
df['Distance'] = df['v01'].apply(distance)

* 2012平均最低入取分數

In [404]:
def Score(v01):
    if v01 == '明志科技大學':
        return float(12.3)
    elif v01 == '東吳大學':
        return float(9.0)
    elif v01 == '明道大學':
        return float(202)
    elif v01 == '銘傳大學':
        return  float(7.1)
    elif v01 == '臺南應用科技大學':
        return float(299)
    elif v01 == '輔仁大學':
        return float(10.9)
    elif v01 == '高苑科技大學':
        return float(325)
    elif v01 == '黎明技術學院':
        return float(11.9)
    elif v01 == '國立中興大學':
        return float(166)
    elif v01 == '元培醫事科技大學(原:元培科技大學)':
        return float(83.9)
    elif v01 == '國立成功大學':
        return float(306)
    elif v01 == '大同技術學院':
        return float(251)
    elif v01 == '國立東華大學':
        return float(172)
    elif v01 == '逢甲大學':
        return float(155)
    elif v01 == '國立政治大學':
        return float(12.3)
    elif v01 == '元智大學':
        return float(36.3)
    elif v01 == '龍華科技大學':
        return float(15.5)
    elif v01 == '國立中央大學':
        return float(41.8)
    elif v01 == '國立屏東大學(屏商校區)(原:國立屏東商)':
        return float(365)
    elif v01 == '國立虎尾科技大學':
        return float(224)
    elif v01 == '國立高雄師範大學':
        return float(347)
    elif v01 == '國立高雄海洋科技大學':
        return float(337)
    elif v01 == '中臺科技大學':
        return float(159)
    elif v01 == '國立臺北教育大學':
        return float(4.6)
    elif v01 == '國立勤益科技大學':
        return float(161)
    elif v01 == '國立臺灣大學':
        return float(4.0)
    else:
        return float('NaN')

### 政治參與 / 投票

* 是否參加太陽花學運

In [405]:
df['q17'].value_counts()

從不(跳答R1題)    772
有時           140
很少           138
時常            42
拒答             2
Name: q17, dtype: int64

In [406]:
def Funflower(q17):
    if q17 == '從不(跳答R1題)':
        return '沒有'
    elif q17 == '拒答':
        return float('NaN')
    else:
        return '有'

In [407]:
df['Funflower_movement'] = df['q17'].apply(Funflower)

In [408]:
df2 = df.groupby(["Funflower_movement"]).count().reset_index()

fig2 = px.bar(df2, height=500, width=600,
             y = df.groupby(["Funflower_movement"]).size(),
             x="Funflower_movement",
             color='Funflower_movement',
             text = df.groupby(["Funflower_movement"]).size(), 
             title = '是否參與太陽花學運')
fig2.show()

* Funflower_movement to dummy

In [409]:
def Funflower_num(q17):
    if q17 == '從不(跳答R1題)':
        return int(0)
    elif q17 == '拒答':
        return float('NaN')
    else:
        return int(1)

In [410]:
df['Funflower_movement_num'] = df['q17'].apply(Funflower_num)

* Vote

In [411]:
df['q08'].value_counts()

民進黨     347
沒有去投    337
國民黨     190
無黨籍     119
投廢票      45
沒領票      30
不知道      21
拒答        3
無投票權      2
Name: q08, dtype: int64

In [412]:
def Vote(q08):
    if q08 == '民進黨':
        return '有'
    elif q08 == '沒有去投':
        return '沒有'
    elif q08 == '國民黨':
        return '有'
    elif q08 == '無黨籍':
        return '有'
    elif q08 == '投廢票':
        return '有'
    elif q08 == '沒領票':
        return '沒有'
    else:
        return float('NaN')

In [413]:
df['Vote'] = df['q08'].apply(Vote)

In [414]:
df3 = df.groupby(["Vote"]).count().reset_index()

fig3 = px.bar(df3, height=500, width=600,
             y = df.groupby(["Vote"]).size(),
             x="Vote",
             color='Vote',
             text = df.groupby(["Vote"]).size(), 
             title = '2014縣市長選舉是否投票')
fig3.show()

* Vote to Dummy

In [415]:
def Vote_num(Vote):
    if Vote == '沒有':
        return int(0)
    elif Vote == '有':
        return int(1)
    else:
        return float('NaN')

In [416]:
df['Vote_num'] = df['Vote'].apply(Vote_num)

In [417]:
df['Vote_num'].value_counts()

1.0    701
0.0    367
Name: Vote_num, dtype: int64

### 家庭屬性

* 主觀階級

In [418]:
df['u04'].value_counts()

中等    659
中下    245
中上    133
下層     55
上層      2
Name: u04, dtype: int64

In [419]:
df4 = df.groupby(["u04"]).count().reset_index()

fig4 = px.bar(df4, height=500, width=600,
             y = df.groupby(["u04"]).size(),
             x="u04",
             color='u04',
             text = df.groupby(["u04"]).size(), 
             title = '原生家庭社經階級')
fig4.show()

In [420]:
def Class(u04):
    if u04 == '上層':
        return int(5)
    elif u04 == '中上':
        return int(4)
    elif u04 == '中等':
        return int(3)
    elif u04 == '中下':
        return int(2)
    elif u04 == '下層':
        return int(1)
    else:
        return float('NaN')

In [421]:
df['Fam_Class'] = df['u04'].apply(Class)

In [422]:
df['Fam_Class'].value_counts()

3    659
2    245
4    133
1     55
5      2
Name: Fam_Class, dtype: int64

* 父母親收入

In [423]:
df['u13'].value_counts()

50000-69999      203
30000-49999      188
70000-99999      175
不知道              174
100000-199999    125
15000-29999      108
沒有收入              40
1-14999           35
200000-300000     16
300000以上          11
拒答                10
看情形                8
漏填                 1
Name: u13, dtype: int64

In [424]:
def Income(u13):
    if u13 == '300000以上':
        return int(8)
    elif u13 == '200000-300000':
        return int(7)
    elif u13 == '100000-199999':
        return int(6)
    elif u13 == '70000-99999':
        return int(5)
    elif u13 == '50000-69999':
        return int(4)
    elif u13 == '30000-49999':
        return int(3)    
    elif u13 == '15000-29999':
        return int(2)    
    elif u13 == '1-14999':
        return int(1)      
    else:
        return float('NaN')

In [425]:
df['Fam_Income'] = df['u13'].apply(Income)

### 控制變數

* 性別

In [426]:
df['sex'].value_counts()

女性    548
男性    546
Name: sex, dtype: int64

In [427]:
def Sex(sex):
    if sex == '女性':
        return int(0)
    elif sex == '男性':
        return int(1)
    else:
        return float('NaN')

In [428]:
df['Sex'] = df['sex'].apply(Sex)

In [429]:
df['Sex'].value_counts()

0    548
1    546
Name: Sex, dtype: int64

* 年齡

In [430]:
df['age'].value_counts()

22.0    691
23.0    364
24.0     15
無反應      11
25.0      7
21.0      2
26.0      1
27.0      1
28.0      1
29.0      1
Name: age, dtype: int64

In [431]:
def Age(age):
    if age == 22.0:
        return int(22.0)
    elif age == 23.0:
        return int(23.0)
    elif age == 24.0:
        return int(24.0)
    elif age == 25.0:
        return int(25.0)
    elif age == 21.0:
        return int(21.0)
    elif age == 26.0:
        return int(26.0)
    elif age == 27.0:
        return int(27.0)
    elif age == 28.0:
        return int(28.0)
    elif age == 29.0:
        return int(29.0)
    else:
        return float('NaN')

In [432]:
df['Age'] = df['age'].apply(Age)

In [433]:
df['Age'].value_counts()

22.0    691
23.0    364
24.0     15
25.0      7
21.0      2
28.0      1
26.0      1
27.0      1
29.0      1
Name: Age, dtype: int64

* 政黨三分類

In [434]:
df['partyid3'].value_counts()

中立無傾向       546
泛綠          302
泛藍          226
無反應及其他政黨     20
Name: partyid3, dtype: int64

In [435]:
def party(partyid3):
    if partyid3 == '泛藍':
        return str('泛藍')
    elif partyid3 == '泛綠':
        return str('泛綠')
    elif partyid3 == '中立無傾向':
        return str('中立無傾向')
    else:
        return float('NaN')

In [436]:
df['Party3'] = df['partyid3'].apply(party)

* 統獨三分類

In [437]:
df['tondu3'].value_counts()

傾向獨立      556
傾向維持現狀    442
傾向統一       78
無反應        18
Name: tondu3, dtype: int64

In [438]:
def tondu(tondu3):
    if tondu3 == '傾向獨立':
        return str('傾向獨立')
    elif tondu3 == '傾向維持現狀':
        return str('傾向維持現狀')
    elif tondu3 == '傾向統一':
        return str('傾向統一')
    else:
        return float('NaN')

In [439]:
df['Tondu3'] = df['tondu3'].apply(tondu)

In [440]:
df['Tondu3'].value_counts()

傾向獨立      556
傾向維持現狀    442
傾向統一       78
Name: Tondu3, dtype: int64

* 台灣人/中國人認同

In [441]:
df['t_cidentity'].value_counts()

臺灣人    945
都是     130
中國人     13
無反應      6
Name: t_cidentity, dtype: int64

In [442]:
def TC_identity(t_cidentity):
    if t_cidentity == '臺灣人':
        return str('臺灣人')
    elif t_cidentity == '都是':
        return str('都是')
    elif t_cidentity == '中國人':
        return str('中國人')
    else:
        return float('NaN')

In [443]:
df['TC_identity'] = df['t_cidentity'].apply(TC_identity)

In [444]:
df['TC_identity'].value_counts()

臺灣人    945
都是     130
中國人     13
Name: TC_identity, dtype: int64

* 政治興趣

In [445]:
df['q01'].value_counts()

不太有興趣    551
有點興趣     333
非常沒興趣    166
非常有興趣     44
Name: q01, dtype: int64

In [446]:
def pp_interest(q01):
    if q01 == '不太有興趣':
        return int(0)
    elif q01 == '非常沒興趣':
        return int(0)
    elif q01 == '有點興趣':
        return int(1)
    elif q01 == '非常有興趣':
        return int(1)
    else:
        return float('NaN')

In [447]:
df['Political_Interest'] = df['q01'].apply(pp_interest)

In [448]:
df['Political_Interest'].value_counts()

0    717
1    377
Name: Political_Interest, dtype: int64

* 對中國態度

In [449]:
df['p01'].value_counts()

不友善      677
友善       247
非常不友善    155
非常友善      14
不知道        1
Name: p01, dtype: int64

In [450]:
def CN_attitude(p01):
    if p01 == '非常不友善':
        return int(1)
    elif p01 == '不友善':
        return int(2)
    elif p01 == '友善':
        return int(3)
    elif p01 == '非常友善':
        return int(4)
    else:
        return float('NaN')

In [451]:
df['CN_attitude'] = df['p01'].apply(CN_attitude)

In [452]:
df['CN_attitude'].value_counts()

2.0    677
3.0    247
1.0    155
4.0     14
Name: CN_attitude, dtype: int64

* 父親 / 母親職業 -> 是否受雇於政府部門有影響

In [453]:
df['fcareer8'].value_counts()

私部門管理階層及專業人員    312
私部門勞工           231
私部門職員           190
軍公教人員           177
其他              137
農林漁牧             33
家管               14
Name: fcareer8, dtype: int64

In [454]:
def career_dummy(fcareer8):
    if fcareer8 == '軍公教人員':
        return int(1)
    elif fcareer8 == '私部門管理階層及專業人員':
        return int(0)
    elif fcareer8 == '私部門勞工':
        return int(0)
    elif fcareer8 == '私部門職員':
        return int(0)
    elif fcareer8 == '其他':
        return int(0)
    elif fcareer8 == '農林漁牧':
        return int(0)   
    elif fcareer8 == '家管':
        return int(0) 
    else:
        return float('NaN')

In [455]:
df['Fcareer'] = df['fcareer8'].apply(career_dummy)

In [456]:
df['Fcareer'].value_counts()

0    917
1    177
Name: Fcareer, dtype: int64

In [457]:
df['mcareer8'].value_counts()

家管              351
私部門職員           239
私部門勞工           132
私部門管理階層及專業人員    129
軍公教人員           124
其他              104
農林漁牧             15
Name: mcareer8, dtype: int64

In [458]:
df['Mcareer'] = df['mcareer8'].apply(career_dummy)

In [459]:
df['Mcareer'].value_counts()

0    970
1    124
Name: Mcareer, dtype: int64

* 城鄉差距 - 高中居住縣市

In [460]:
df['u10'].value_counts()

臺北市    207
新北市    181
臺中市    128
高雄市    117
臺南市     87
彰化縣     62
桃園市     56
新竹市     41
桃園縣     36
宜蘭縣     30
嘉義市     24
新竹縣     18
苗栗縣     17
雲林縣     16
屏東縣     14
基隆市     12
嘉義縣     11
臺東縣     11
花蓮縣     11
南投縣     11
澎湖縣      2
越南       1
金門縣      1
Name: u10, dtype: int64

In [461]:
def living_city(u10):
    if u10 == '臺北市':
        return int(1)
    else:
        return int(0)

In [462]:
df['LivingCity'] = df['u10'].apply(living_city)

In [463]:
df['LivingCity'].value_counts()

0    887
1    207
Name: LivingCity, dtype: int64

* 父親 / 母親教育程度

In [464]:
df['fedu'].value_counts()

高中、職     406
大學及以上    239
專科       204
國、初中     153
小學及以下     57
無反應       35
Name: fedu, dtype: int64

In [465]:
def fa_edu(fedu):
    if fedu == '大學及以上':
        return int(1)
    else:
        return int(0)

In [466]:
df['Fa_Edu'] = df['fedu'].apply(fa_edu)

In [467]:
df['Fa_Edu'].value_counts()

0    855
1    239
Name: Fa_Edu, dtype: int64

In [468]:
df['medu'].value_counts()

高中、職     494
專科       166
大學及以上    165
國、初中     154
小學及以下     80
無反應       35
Name: medu, dtype: int64

In [469]:
df['Ma_Edu'] = df['medu'].apply(fa_edu)

In [470]:
df['Ma_Edu'].value_counts()

0    929
1    165
Name: Ma_Edu, dtype: int64

* 父親 / 母親省級

In [471]:
df['sengi'].value_counts()

本省閩南人     815
本省客家人     128
大陸各省市人     80
無反應及其他     55
原住民        16
Name: sengi, dtype: int64

In [472]:
def sengi_dummy(sengi):
    if sengi == '大陸各省市人':
        return int(1)
    else:
        return int(0)

In [473]:
df['Fa_Sengi'] = df['sengi'].apply(sengi_dummy)

In [474]:
df['Fa_Sengi'].value_counts()

0    1014
1      80
Name: Fa_Sengi, dtype: int64

In [475]:
df['msengi'].value_counts()

本省閩南人     830
本省客家人     120
大陸各省市人     66
無反應及其他     54
原住民        24
Name: msengi, dtype: int64

In [476]:
df['Ma_Sengi'] = df['msengi'].apply(sengi_dummy)

In [477]:
df['Ma_Sengi'].value_counts()

0    1028
1      66
Name: Ma_Sengi, dtype: int64

## 模型先驗假設定
Doc：
https://www.statsmodels.org/devel/regression.html </br>
OLS：
https://datatofish.com/statsmodels-linear-regression/ </br>
Logit：
https://www.geeksforgeeks.org/logistic-regression-using-statsmodels/ </br>
Ordered Logit：
https://www.statsmodels.org/dev/examples/notebooks/generated/ordinal_regression.html</br>
moderator–mediator：
https://pypi.org/project/PyProcessMacro/

#### 變數操作記錄
1. **Political_Interest** 放入後會稀釋家庭與學校效果(雖然仍然顯著) **0.000 / 0.059** ->  **0.012 / 0.098**
2. 最終模型 **class 0.059** -> **income 0.123**，但家庭對政治參與用 **income 0.010** (要確認)
3. **CN_attitude** 家庭對政治參與 fam_class 0.004 -> 0.003 最終模型 **0.000 / 0.059** -> **0.001 / 0.049** (但是已經有放統獨三分類)
4. **Age** 最終模型 **0.00 / 0.59** -> **0.00 / 0.45**

### Reshape Data

In [478]:
ml_data = df.loc[:, ['v01', 'Funflower_movement_num', 'Vote_num', 'Fam_Class', 'Fam_Income', 'SchoolType', 'SchoolType_num', 'Major', 'Distance', 'Sex', 'Party3', 'Tondu3', 
                'TC_identity', 'Fcareer', 'Mcareer', 'LivingCity', 'Fa_Edu', 'Ma_Edu', 'Fa_Sengi', 'Ma_Sengi']]
ml_data = ml_data.rename(columns = {'v01': 'School', 'Funflower_movement_num': 'Funflower_movement_num', 'Vote_num': 'Vote_num', 'Fam_Class': 'Fam_Class', 
                            'Fam_Income':'Fam_Income', 'SchoolType': 'SchoolType', 'SchoolType_num': 'SchoolType_num', 'Major':'Major', 'Distance': 'Distance', 
                            'Sex':'Sex', 'Party3': 'Party3', 'Tondu3':'Tondu3','TC_identity':'TC_identity', 'Fcareer':'Fcareer', 'Mcareer':'Mcareer', 
                            'LivingCity':'LivingCity', 'Fa_Edu':'Fa_Edu', 'Ma_Edu':'Ma_Edu', 'Fa_Sengi':'Fa_Sengi', 'Ma_Sengi':'Ma_Sengi'})
ml_data = ml_data.dropna()
ml_data = ml_data.astype({"Funflower_movement_num":'int64', "Vote_num":'int64', "Fam_Class":'int64', 'Fam_Income':'int64', "SchoolType_num":'int64'}) 
SchoolType_order = pd.CategoricalDtype(categories = ['公立一般', '私立一般', '公立技職', '私立技職'], ordered = True)
ml_data['SchoolType'] = ml_data['SchoolType'].astype(SchoolType_order)
ml_data

Unnamed: 0,School,Funflower_movement_num,Vote_num,Fam_Class,Fam_Income,SchoolType,SchoolType_num,Major,Distance,Sex,Party3,Tondu3,TC_identity,Fcareer,Mcareer,LivingCity,Fa_Edu,Ma_Edu,Fa_Sengi,Ma_Sengi
0,國立臺灣大學,1,1,3,5,公立一般,4,0,4.0,0,泛綠,傾向獨立,臺灣人,0,0,1,1,1,1,0
1,國立政治大學,1,0,3,4,公立一般,4,0,12.3,0,泛綠,傾向獨立,臺灣人,1,0,1,1,0,0,0
2,國立政治大學,1,1,3,4,公立一般,4,0,12.3,0,泛綠,傾向獨立,臺灣人,0,0,0,0,0,0,0
4,國立政治大學,1,1,3,5,公立一般,4,0,12.3,0,泛綠,傾向獨立,臺灣人,0,0,0,0,0,0,0
5,國立政治大學,1,1,3,4,公立一般,4,0,12.3,0,中立無傾向,傾向獨立,臺灣人,1,0,0,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1088,國立高雄師範大學,0,0,2,4,公立一般,4,0,347.0,0,中立無傾向,傾向維持現狀,臺灣人,0,0,0,1,0,0,0
1089,國立高雄師範大學,1,1,3,5,公立一般,4,0,347.0,0,泛綠,傾向獨立,臺灣人,0,0,0,1,0,0,0
1090,國立高雄師範大學,0,1,3,4,公立一般,4,0,347.0,1,泛綠,傾向獨立,臺灣人,1,0,0,0,0,0,0
1092,國立高雄師範大學,1,0,3,4,公立一般,4,0,347.0,1,中立無傾向,傾向獨立,臺灣人,0,0,0,0,0,0,0


### 最基本三變數模型

* 家庭對政治參與

In [479]:
fam_pp_log = smf.logit(formula = "Funflower_movement_num ~ Fam_Class + Distance + Sex + C(Party3) + C(Tondu3) + C(TC_identity)", data = ml_data).fit()
fam_pp_log.summary()

Optimization terminated successfully.
         Current function value: 0.563753
         Iterations 6


0,1,2,3
Dep. Variable:,Funflower_movement_num,No. Observations:,824.0
Model:,Logit,Df Residuals:,814.0
Method:,MLE,Df Model:,9.0
Date:,"Fri, 16 Sep 2022",Pseudo R-squ.:,0.09712
Time:,13:40:52,Log-Likelihood:,-464.53
converged:,True,LL-Null:,-514.5
Covariance Type:,nonrobust,LLR p-value:,1.622e-17

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-2.2690,1.189,-1.909,0.056,-4.599,0.061
Sex[T.1],0.2948,0.162,1.817,0.069,-0.023,0.613
C(Party3)[T.泛綠],0.7280,0.184,3.954,0.000,0.367,1.089
C(Party3)[T.泛藍],-0.1495,0.213,-0.703,0.482,-0.566,0.267
C(Tondu3)[T.傾向統一],-0.5553,0.350,-1.585,0.113,-1.242,0.132
C(Tondu3)[T.傾向維持現狀],-0.6309,0.178,-3.553,0.000,-0.979,-0.283
C(TC_identity)[T.臺灣人],1.0051,1.101,0.913,0.361,-1.152,3.162
C(TC_identity)[T.都是],0.8935,1.118,0.799,0.424,-1.298,3.085
Fam_Class,0.3407,0.117,2.902,0.004,0.111,0.571


* 學校對政治參與(不用跑)

In [480]:
school_pp_log = smf.logit(formula = "Funflower_movement_num ~ SchoolType_num + Distance + Sex + C(Party3) + C(Tondu3) + C(TC_identity)", data = ml_data).fit()
school_pp_log.summary()

Optimization terminated successfully.
         Current function value: 0.554562
         Iterations 6


0,1,2,3
Dep. Variable:,Funflower_movement_num,No. Observations:,824.0
Model:,Logit,Df Residuals:,814.0
Method:,MLE,Df Model:,9.0
Date:,"Fri, 16 Sep 2022",Pseudo R-squ.:,0.1118
Time:,13:40:52,Log-Likelihood:,-456.96
converged:,True,LL-Null:,-514.5
Covariance Type:,nonrobust,LLR p-value:,1.3530000000000002e-20

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-1.8248,1.124,-1.623,0.105,-4.028,0.379
Sex[T.1],0.3429,0.165,2.084,0.037,0.020,0.665
C(Party3)[T.泛綠],0.7300,0.186,3.917,0.000,0.365,1.095
C(Party3)[T.泛藍],-0.2040,0.214,-0.951,0.341,-0.624,0.216
C(Tondu3)[T.傾向統一],-0.4509,0.349,-1.293,0.196,-1.135,0.233
C(Tondu3)[T.傾向維持現狀],-0.5666,0.179,-3.162,0.002,-0.918,-0.215
C(TC_identity)[T.臺灣人],0.6598,1.099,0.601,0.548,-1.493,2.813
C(TC_identity)[T.都是],0.5448,1.115,0.489,0.625,-1.640,2.730
SchoolType_num,0.3348,0.070,4.789,0.000,0.198,0.472


* 家庭對學校</br>
A -> 預算</br>
A 如果是類別的時候</br>
order logit試試看

In [481]:
school_fm_lm = smf.ols(formula = "SchoolType_num ~ Fam_Class + Sex + Fcareer + Mcareer + Fa_Edu + Ma_Edu + Fa_Sengi + Ma_Sengi + LivingCity", data = ml_data).fit()
school_fm_lm.summary()

0,1,2,3
Dep. Variable:,SchoolType_num,R-squared:,0.115
Model:,OLS,Adj. R-squared:,0.105
Method:,Least Squares,F-statistic:,11.7
Date:,"Fri, 16 Sep 2022",Prob (F-statistic):,2.06e-17
Time:,13:40:52,Log-Likelihood:,-1266.1
No. Observations:,824,AIC:,2552.0
Df Residuals:,814,BIC:,2599.0
Df Model:,9,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,1.5482,0.166,9.350,0.000,1.223,1.873
Sex[T.1],-0.1145,0.079,-1.448,0.148,-0.270,0.041
Fam_Class,0.2728,0.057,4.750,0.000,0.160,0.386
Fcareer,0.1063,0.113,0.939,0.348,-0.116,0.328
Mcareer,-0.0053,0.137,-0.039,0.969,-0.274,0.263
Fa_Edu,0.5097,0.113,4.494,0.000,0.287,0.732
Ma_Edu,0.1169,0.135,0.865,0.387,-0.148,0.382
Fa_Sengi,-0.0145,0.164,-0.089,0.929,-0.336,0.307
Ma_Sengi,-0.0377,0.173,-0.218,0.828,-0.378,0.303

0,1,2,3
Omnibus:,521.123,Durbin-Watson:,0.253
Prob(Omnibus):,0.0,Jarque-Bera (JB):,48.204
Skew:,-0.01,Prob(JB):,3.41e-11
Kurtosis:,1.815,Cond. No.,15.3


In [482]:
# Check SchoolType if as ordinal
ml_data['SchoolType'].dtypes

CategoricalDtype(categories=['公立一般', '私立一般', '公立技職', '私立技職'], ordered=True)

In [486]:
# 家庭對學校 Ordered Logit -> 變顯著負相關
school_fm_log = OrderedModel.from_formula("SchoolType ~ Fam_Class + Sex + Fcareer + Mcareer + Fa_Edu + Ma_Edu + Fa_Sengi + Ma_Sengi + LivingCity", ml_data, distr = 'logit').fit(method='bfgs', disp=False)                                   
school_fm_log.summary()

0,1,2,3
Dep. Variable:,SchoolType,Log-Likelihood:,-1006.4
Model:,OrderedModel,AIC:,2037.0
Method:,Maximum Likelihood,BIC:,2093.0
Date:,"Fri, 16 Sep 2022",,
Time:,13:41:18,,
No. Observations:,824,,
Df Residuals:,812,,
Df Model:,12,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Sex[T.1],0.1251,0.130,0.966,0.334,-0.129,0.379
Fam_Class,-0.4139,0.095,-4.349,0.000,-0.600,-0.227
Fcareer,-0.2207,0.184,-1.199,0.230,-0.581,0.140
Mcareer,0.0120,0.220,0.054,0.957,-0.419,0.443
Fa_Edu,-0.7650,0.185,-4.134,0.000,-1.128,-0.402
Ma_Edu,-0.2586,0.216,-1.199,0.230,-0.681,0.164
Fa_Sengi,0.0324,0.265,0.123,0.903,-0.486,0.551
Ma_Sengi,0.1567,0.280,0.560,0.576,-0.392,0.705
LivingCity,-0.5450,0.169,-3.232,0.001,-0.876,-0.215


* 家庭 + 學校對政治參與

In [484]:
school_fam_pp_log = smf.logit(formula = "Funflower_movement_num ~ SchoolType_num + Fam_Class + Distance + Sex + Major + C(Party3) + C(Tondu3) + C(TC_identity)", data = ml_data).fit()
school_fam_pp_log.summary()

Optimization terminated successfully.
         Current function value: 0.547352
         Iterations 6


0,1,2,3
Dep. Variable:,Funflower_movement_num,No. Observations:,824.0
Model:,Logit,Df Residuals:,812.0
Method:,MLE,Df Model:,11.0
Date:,"Fri, 16 Sep 2022",Pseudo R-squ.:,0.1234
Time:,13:40:52,Log-Likelihood:,-451.02
converged:,True,LL-Null:,-514.5
Covariance Type:,nonrobust,LLR p-value:,7.149999999999999e-22

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-2.5720,1.194,-2.154,0.031,-4.912,-0.232
Sex[T.1],0.4053,0.167,2.424,0.015,0.078,0.733
C(Party3)[T.泛綠],0.7325,0.188,3.901,0.000,0.364,1.101
C(Party3)[T.泛藍],-0.2420,0.218,-1.110,0.267,-0.669,0.185
C(Tondu3)[T.傾向統一],-0.4701,0.352,-1.336,0.182,-1.160,0.220
C(Tondu3)[T.傾向維持現狀],-0.5528,0.181,-3.060,0.002,-0.907,-0.199
C(TC_identity)[T.臺灣人],0.7740,1.103,0.702,0.483,-1.388,2.936
C(TC_identity)[T.都是],0.6268,1.120,0.560,0.576,-1.568,2.821
SchoolType_num,0.2624,0.073,3.577,0.000,0.119,0.406


#### Political Socialization Agent
1. 家庭 - 省級 家庭討論
2. 學校 - 學校討論 相關課程
3. 媒體 - 媒體接觸
4. 同儕 - 同儕討論 同儕態度
5. 重要事件 - 太陽花(V)
6. 世代 - 同世代不討論(V)