In [1]:
# 라이브러리 불러오기
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 경고창 무시
import warnings
warnings.filterwarnings('ignore')

# 예측 모델링을 위한 라이브러리 불러오기
from sklearn.model_selection import train_test_split # 훈련/평가 데이터분리
from sklearn.linear_model import LogisticRegression # 로지스틱 모델
from sklearn.svm import LinearSVC # 분류용 SVM모델
from sklearn import metrics # 평가용 모듈

from sklearn.model_selection import cross_val_score # 교차검증 함수

# 모델 평가를 위한 라이브러리 불러오기
from sklearn.metrics import classification_report # 분류평가지표

In [2]:
# 데이터 불러오기
df = pd.read_csv("data/data_students.csv")
df.head(1)

Unnamed: 0,Marital status,Application mode,Application order,Course,Previous qualification,Previous qualification (grade),Nacionality,Mother's qualification,Father's qualification,Mother's occupation,...,Curricular units 2nd sem (enrolled),Curricular units 2nd sem (evaluations),Curricular units 2nd sem (approved),Curricular units 2nd sem (grade),Curricular units 2nd sem (without evaluations),Unemployment rate,Inflation rate,GDP,Target,Daytime/evening attendance
0,single,17,5,Animation and Multimedia Design,Secondary education,122.0,Portuguese,Basic Education 3rd Cycle (9th/10th/11th Year)...,Other - 11th Year of Schooling,"Personal Services, Security and Safety Workers...",...,0,0,0,0.0,0,10.8,1.4,1.74,Dropout,daytime


In [3]:
# 통계
df["Target"].value_counts()

Target
Graduate    2209
Dropout     1421
Enrolled     794
Name: count, dtype: int64

In [4]:
# 중도탈락률 계산 : 약 32%
1421/(2209+1421+794)

0.3212025316455696

In [5]:
# Target 컬럼의 값이 'Graduate' 또는 'Dropout'인 행들로 필터링
r_df = df[(df['Target'] == "Graduate") | (df['Target'] == "Dropout")]

# 조건을 제외한 데이터프레임
r_df = df[df['Target'] != "Enrolled"]
# 중퇴 및 졸업 여부 인코딩
r_df['Target'] = r_df['Target'].map({"Graduate":1, "Dropout":0})
r_df

Unnamed: 0,Marital status,Application mode,Application order,Course,Previous qualification,Previous qualification (grade),Nacionality,Mother's qualification,Father's qualification,Mother's occupation,...,Curricular units 2nd sem (enrolled),Curricular units 2nd sem (evaluations),Curricular units 2nd sem (approved),Curricular units 2nd sem (grade),Curricular units 2nd sem (without evaluations),Unemployment rate,Inflation rate,GDP,Target,Daytime/evening attendance
0,single,17,5,Animation and Multimedia Design,Secondary education,122.0,Portuguese,Basic Education 3rd Cycle (9th/10th/11th Year)...,Other - 11th Year of Schooling,"Personal Services, Security and Safety Workers...",...,0,0,0,0.000000,0,10.8,1.4,1.74,0,daytime
1,single,15,1,Tourism,Secondary education,160.0,Portuguese,Secondary Education - 12th Year of Schooling o...,Higher Education - Degree,Intermediate Level Technicians and Professions,...,6,6,6,13.666667,0,13.9,-0.3,0.79,1,daytime
2,single,1,5,Communication Design,Secondary education,122.0,Portuguese,Basic education 1st cycle (4th/5th year) or eq...,Basic education 1st cycle (4th/5th year) or eq...,Unskilled Workers,...,6,0,0,0.000000,0,10.8,1.4,1.74,0,daytime
3,single,17,2,Journalism and Communication,Secondary education,122.0,Portuguese,Basic Education 2nd Cycle (6th/7th/8th Year) o...,Basic education 1st cycle (4th/5th year) or eq...,"Personal Services, Security and Safety Workers...",...,6,10,5,12.400000,0,9.4,-0.8,-3.12,1,daytime
4,married,39,1,Social Service (evening attendance),Secondary education,100.0,Portuguese,Basic education 1st cycle (4th/5th year) or eq...,Basic Education 2nd Cycle (6th/7th/8th Year) o...,Unskilled Workers,...,6,6,6,13.000000,0,13.9,-0.3,0.79,1,evening
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4419,single,1,6,Journalism and Communication,Secondary education,125.0,Portuguese,Secondary Education - 12th Year of Schooling o...,Secondary Education - 12th Year of Schooling o...,"Personal Services, Security and Safety Workers...",...,6,8,5,12.666667,0,15.5,2.8,-4.06,1,daytime
4420,single,1,2,Journalism and Communication,Secondary education,120.0,Russian,Secondary Education - 12th Year of Schooling o...,Secondary Education - 12th Year of Schooling o...,Unskilled Workers,...,6,6,2,11.000000,0,11.1,0.6,2.02,0,daytime
4421,single,1,1,Nursing,Secondary education,154.0,Portuguese,Basic education 1st cycle (4th/5th year) or eq...,Basic education 1st cycle (4th/5th year) or eq...,Unskilled Workers,...,8,9,1,13.500000,0,13.9,-0.3,0.79,0,daytime
4422,single,1,1,Management,Secondary education,180.0,Portuguese,Basic education 1st cycle (4th/5th year) or eq...,Basic education 1st cycle (4th/5th year) or eq...,"Skilled Workers in Industry, Construction and ...",...,5,6,5,12.000000,0,9.4,-0.8,-3.12,1,daytime


In [6]:
r_df['Daytime/evening attendance'].value_counts()

Daytime/evening attendance
daytime    3222
evening     408
Name: count, dtype: int64

In [7]:
# 주간수업과 야간수업을 인코딩
r_df['Daytime/evening attendance'] = r_df['Daytime/evening attendance'].map({"daytime":1, "evening":0})

In [8]:
r_df.head()

Unnamed: 0,Marital status,Application mode,Application order,Course,Previous qualification,Previous qualification (grade),Nacionality,Mother's qualification,Father's qualification,Mother's occupation,...,Curricular units 2nd sem (enrolled),Curricular units 2nd sem (evaluations),Curricular units 2nd sem (approved),Curricular units 2nd sem (grade),Curricular units 2nd sem (without evaluations),Unemployment rate,Inflation rate,GDP,Target,Daytime/evening attendance
0,single,17,5,Animation and Multimedia Design,Secondary education,122.0,Portuguese,Basic Education 3rd Cycle (9th/10th/11th Year)...,Other - 11th Year of Schooling,"Personal Services, Security and Safety Workers...",...,0,0,0,0.0,0,10.8,1.4,1.74,0,1
1,single,15,1,Tourism,Secondary education,160.0,Portuguese,Secondary Education - 12th Year of Schooling o...,Higher Education - Degree,Intermediate Level Technicians and Professions,...,6,6,6,13.666667,0,13.9,-0.3,0.79,1,1
2,single,1,5,Communication Design,Secondary education,122.0,Portuguese,Basic education 1st cycle (4th/5th year) or eq...,Basic education 1st cycle (4th/5th year) or eq...,Unskilled Workers,...,6,0,0,0.0,0,10.8,1.4,1.74,0,1
3,single,17,2,Journalism and Communication,Secondary education,122.0,Portuguese,Basic Education 2nd Cycle (6th/7th/8th Year) o...,Basic education 1st cycle (4th/5th year) or eq...,"Personal Services, Security and Safety Workers...",...,6,10,5,12.4,0,9.4,-0.8,-3.12,1,1
4,married,39,1,Social Service (evening attendance),Secondary education,100.0,Portuguese,Basic education 1st cycle (4th/5th year) or eq...,Basic Education 2nd Cycle (6th/7th/8th Year) o...,Unskilled Workers,...,6,6,6,13.0,0,13.9,-0.3,0.79,1,0


In [9]:
# r_df["Father's qualification"].value_counts()

In [10]:
# r_df["Mother's qualification"].value_counts()

In [11]:
# 학력 수준을 라벨링하는 함수
def label_education(level):
    if level in ["Can't read or write", "Can read without having a 4th year of schooling"]:
        return "No Education"
    elif level in [
        "Basic education 1st cycle (4th/5th year) or equiv.", 
        "7th Year (Old)", 
        "8th year of schooling", 
        "7th year of schooling"
    ]:
        return "Primary Education"
    elif level in [
        "Basic Education 2nd Cycle (6th/7th/8th Year) or Equiv.", 
        "Basic Education 3rd Cycle (9th/10th/11th Year) or Equiv.",
        "9th Year of Schooling - Not Completed", 
        "10th Year of Schooling", 
        "11th Year of Schooling - Not Completed",
        "Other - 11th Year of Schooling", 
        "12th Year of Schooling - Not Completed"
    ]:
        return "Lower Secondary Education"
    elif level in [
        "Secondary Education - 12th Year of Schooling or Eq.", 
        "General commerce course", 
        "Technical-professional course",
        "Technological specialization course", 
        "Frequency of Higher Education", 
        "Professional higher technical course",
        "2nd cycle of the general high school course"
    ]:
        return "Upper Secondary Education"
    elif level in [
        "Higher Education - Degree", 
        "Higher Education - Bachelor's Degree", 
        "Higher Education - Master's",
        "Higher Education - Doctorate", 
        "Higher Education - Doctorate (3rd cycle)", 
        "Higher education - degree (1st cycle)",
        "Specialized higher studies course", 
        "Higher Education - Master (2nd cycle)"
    ]:
        return "Higher Education"
    else:
        return "Unknown"

# 어머니와 아버지의 학력 수준 라벨링 적용
r_df['Mother_Education_Label'] = df['Mother\'s qualification'].apply(label_education)
r_df['Father_Education_Label'] = df['Father\'s qualification'].apply(label_education)

# 결과 확인
print(r_df['Mother_Education_Label'].value_counts())
print(r_df['Father_Education_Label'].value_counts())

Mother_Education_Label
Lower Secondary Education    1303
Upper Secondary Education     882
Primary Education             847
Higher Education              465
Unknown                       127
No Education                    6
Name: count, dtype: int64
Father_Education_Label
Lower Secondary Education    1408
Primary Education            1025
Upper Secondary Education     754
Higher Education              319
Unknown                       114
No Education                   10
Name: count, dtype: int64


In [12]:
r_df['Mother_Education_lev'] = r_df['Mother_Education_Label'].map({
    "No Education":0, "Primary Education":1, "Lower Secondary Education":2, "Upper Secondary Education":3, "Higher Education":4})

r_df['Father_Education_lev'] = r_df['Father_Education_Label'].map({
    "No Education":0, "Primary Education":1, "Lower Secondary Education":2, "Upper Secondary Education":3, "Higher Education":4})

r_df

Unnamed: 0,Marital status,Application mode,Application order,Course,Previous qualification,Previous qualification (grade),Nacionality,Mother's qualification,Father's qualification,Mother's occupation,...,Curricular units 2nd sem (without evaluations),Unemployment rate,Inflation rate,GDP,Target,Daytime/evening attendance,Mother_Education_Label,Father_Education_Label,Mother_Education_lev,Father_Education_lev
0,single,17,5,Animation and Multimedia Design,Secondary education,122.0,Portuguese,Basic Education 3rd Cycle (9th/10th/11th Year)...,Other - 11th Year of Schooling,"Personal Services, Security and Safety Workers...",...,0,10.8,1.4,1.74,0,1,Lower Secondary Education,Lower Secondary Education,2.0,2.0
1,single,15,1,Tourism,Secondary education,160.0,Portuguese,Secondary Education - 12th Year of Schooling o...,Higher Education - Degree,Intermediate Level Technicians and Professions,...,0,13.9,-0.3,0.79,1,1,Upper Secondary Education,Higher Education,3.0,4.0
2,single,1,5,Communication Design,Secondary education,122.0,Portuguese,Basic education 1st cycle (4th/5th year) or eq...,Basic education 1st cycle (4th/5th year) or eq...,Unskilled Workers,...,0,10.8,1.4,1.74,0,1,Primary Education,Primary Education,1.0,1.0
3,single,17,2,Journalism and Communication,Secondary education,122.0,Portuguese,Basic Education 2nd Cycle (6th/7th/8th Year) o...,Basic education 1st cycle (4th/5th year) or eq...,"Personal Services, Security and Safety Workers...",...,0,9.4,-0.8,-3.12,1,1,Lower Secondary Education,Primary Education,2.0,1.0
4,married,39,1,Social Service (evening attendance),Secondary education,100.0,Portuguese,Basic education 1st cycle (4th/5th year) or eq...,Basic Education 2nd Cycle (6th/7th/8th Year) o...,Unskilled Workers,...,0,13.9,-0.3,0.79,1,0,Primary Education,Lower Secondary Education,1.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4419,single,1,6,Journalism and Communication,Secondary education,125.0,Portuguese,Secondary Education - 12th Year of Schooling o...,Secondary Education - 12th Year of Schooling o...,"Personal Services, Security and Safety Workers...",...,0,15.5,2.8,-4.06,1,1,Upper Secondary Education,Upper Secondary Education,3.0,3.0
4420,single,1,2,Journalism and Communication,Secondary education,120.0,Russian,Secondary Education - 12th Year of Schooling o...,Secondary Education - 12th Year of Schooling o...,Unskilled Workers,...,0,11.1,0.6,2.02,0,1,Upper Secondary Education,Upper Secondary Education,3.0,3.0
4421,single,1,1,Nursing,Secondary education,154.0,Portuguese,Basic education 1st cycle (4th/5th year) or eq...,Basic education 1st cycle (4th/5th year) or eq...,Unskilled Workers,...,0,13.9,-0.3,0.79,0,1,Primary Education,Primary Education,1.0,1.0
4422,single,1,1,Management,Secondary education,180.0,Portuguese,Basic education 1st cycle (4th/5th year) or eq...,Basic education 1st cycle (4th/5th year) or eq...,"Skilled Workers in Industry, Construction and ...",...,0,9.4,-0.8,-3.12,1,1,Primary Education,Primary Education,1.0,1.0


In [13]:
# 부모학력 수준별 졸업률 조사
df_gp = r_df.groupby('Mother_Education_lev')['Target'].agg(['count','sum'])
df_gp['ratio'] = round( df_gp['sum']/df_gp['count'] *100, 1)
df_gp

Unnamed: 0_level_0,count,sum,ratio
Mother_Education_lev,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0.0,6,2,33.3
1.0,847,461,54.4
2.0,1303,861,66.1
3.0,882,571,64.7
4.0,465,283,60.9


In [14]:
df_gp = r_df.groupby('Father_Education_lev')['Target'].agg(['count','sum'])
df_gp['ratio'] = round( df_gp['sum']/df_gp['count'] *100, 1)
df_gp

Unnamed: 0_level_0,count,sum,ratio
Father_Education_lev,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0.0,10,3,30.0
1.0,1025,587,57.3
2.0,1408,956,67.9
3.0,754,457,60.6
4.0,319,178,55.8


In [15]:
# 에듀케이션 레벨이 2인 어머니,아버지를 가진 학생이 졸업율이 높게 나오는 결과가 나왔다. 
# 0 4 1 3 2 순으로 졸업률이 높다


In [16]:
r_df['Father_Education_tier'] = r_df['Father_Education_lev'].map({0:0,4:1,1:2,3:3,2:4})
r_df['Mother_Education_tier'] = r_df['Mother_Education_lev'].map({0:0,4:1,1:2,3:3,2:4})

In [17]:
# 가설1 : 부모의 학력수준이 높을수록 졸업률이 높을것이다.
# 결과 : 부모의 학력수준이 낮을수록 자녀의 대학 졸업률이 오히려 높은것으로 드러난다.

In [18]:
# 가설2 : 높은 실업률과 인플레이션율이 있는 지역의 학생은 졸업률이 낮을것이다.

In [None]:
# 실패

In [19]:
# 가설3 : Age at enrollment: 등록 시 나이가 빠를수록 졸업률이 높을것이다.

In [20]:
def categorize_age(age):
    if age < 20:
        return 'Teenager'
    elif 20 <= age < 30:
        return '20s'
    elif 30 <= age < 40:
        return '30s'
    else:
        return '40+'

# Age at enrollment 컬럼이 존재한다고 가정하고, 새 컬럼을 추가
r_df['Age_Group'] = r_df['Age at enrollment'].apply(categorize_age)

# Age_Group 값을 숫자로 매핑
age_group_map = {
    '40+': 0,
    '30s': 1,
    '20s': 2,
    'Teenager': 3
}

# Age_Group 컬럼을 숫자로 매핑하여 새로운 컬럼에 저장
r_df['Age_Group_Mapped'] = r_df['Age_Group'].map(age_group_map)

In [21]:
df_gp = r_df.groupby('Age_Group_Mapped')['Target'].agg(['count','sum'])
df_gp['ratio'] = round( df_gp['sum']/df_gp['count'] *100, 1)
df_gp

Unnamed: 0_level_0,count,sum,ratio
Age_Group_Mapped,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,218,91,41.7
1,403,145,36.0
2,1388,761,54.8
3,1621,1212,74.8


In [22]:
# 결과 : 입학시 나이가 어릴수록 졸업률이 분명하게 높은것으로 드러난다.

In [23]:
# 가설4 : 주간 수업 참여 학생이 야간보다 졸업률이 높을것이다. Daytime/evening attendance: 주간/야간 수업 여부

In [24]:
df_gp = r_df.groupby('Daytime/evening attendance')['Target'].agg(['count','sum'])
df_gp['ratio'] = round( df_gp['sum']/df_gp['count'] *100, 1)
df_gp

Unnamed: 0_level_0,count,sum,ratio
Daytime/evening attendance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,408,201,49.3
1,3222,2008,62.3


In [25]:
# 결과 : 주간수업 학생이 졸업률이 높은것으로 드러난다.

In [26]:
# 특정 과목의 성적이 낮은 학생은 졸업률이 낮을것이다.

In [58]:
# 패스

In [27]:
# 국적에 따라 졸업률이 낮을것이다. Nacionality: 국적

In [60]:
# 패스

In [28]:
# 장학금 수혜 여부에 따른 졸업률 차이

In [29]:
df_gp = r_df.groupby('Scholarship holder')['Target'].agg(['count','sum'])
df_gp['ratio'] = round( df_gp['sum']/df_gp['count'] *100, 1)
df_gp

Unnamed: 0_level_0,count,sum,ratio
Scholarship holder,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
no,2661,1374,51.6
yes,969,835,86.2


In [30]:
# 등록금 납부 상태에 따른 졸업률 차이
df_gp = r_df.groupby('Tuition fees up to date')['Target'].agg(['count','sum'])
df_gp['ratio'] = round( df_gp['sum']/df_gp['count'] *100, 1)
df_gp

Unnamed: 0_level_0,count,sum,ratio
Tuition fees up to date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
no,486,29,6.0
yes,3144,2180,69.3


In [31]:
# 전공에 따른 졸업률 차이
df_gp = r_df.groupby('Course')['Target'].agg(['count','sum'])
df_gp['ratio'] = round( df_gp['sum']/df_gp['count'] *100, 1)

# 졸업률 구간을 나누고, 각 구간에 번호를 부여
df_gp['Graduation_Rate_Band'] = pd.cut(df_gp['ratio'], 
                                       bins=[0, 40, 60, 100], 
                                       labels=[0, 1, 2])

# r_df의 Course 컬럼에 따라 Graduation_Rate_Band 값을 새로 추가
r_df = r_df.merge(df_gp[['Graduation_Rate_Band']], on='Course', how='left')

In [33]:
# 성별에 따른 졸업률 차이
df_gp = r_df.groupby('Gender')['Target'].agg(['count','sum'])
df_gp['ratio'] = round( df_gp['sum']/df_gp['count'] *100, 1)
df_gp

Unnamed: 0_level_0,count,sum,ratio
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,2381,1661,69.8
male,1249,548,43.9


In [34]:
# 남성의 졸업률이 여성보다 낮은것으로 드러난다.
gender_map = {
    'male': 0,
    'female': 1
}

# Gender 컬럼을 숫자로 매핑하여 새로운 컬럼에 저장
r_df['encoded_gender'] = r_df['Gender'].map(gender_map)

In [35]:
# 채무 여부에 따른 졸업률 차이

In [36]:
df_gp = r_df.groupby('Debtor')['Target'].agg(['count','sum'])
df_gp['ratio'] = round( df_gp['sum']/df_gp['count'] *100, 1)
df_gp

Unnamed: 0_level_0,count,sum,ratio
Debtor,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
no,3217,2108,65.5
yes,413,101,24.5


In [37]:
# 채무 여부에 따른 졸업률의 차이가 존재한다. no인 학생의 졸업률이 3배가량 높음
Debtor_map = {
    'yes': 0,
    'no': 1
}

# Debtor 컬럼을 숫자로 매핑하여 새로운 컬럼에 저장
r_df['encoded_Debtor'] = r_df['Debtor'].map(Debtor_map)

In [38]:
r_df['Graduation_Rate_Band'] = r_df['Graduation_Rate_Band'].astype('int64')
# 카테고리 데이터 숫자로 변환


# Mother_Education_tier와 Father_Education_tier 컬럼의 NaN 값을 -1로 채우기
r_df['Mother_Education_tier'] = r_df['Mother_Education_tier'].fillna(-1)
r_df['Father_Education_tier'] = r_df['Father_Education_tier'].fillna(-1)

In [39]:
r_df.head(1)

Unnamed: 0,Marital status,Application mode,Application order,Course,Previous qualification,Previous qualification (grade),Nacionality,Mother's qualification,Father's qualification,Mother's occupation,...,Father_Education_Label,Mother_Education_lev,Father_Education_lev,Father_Education_tier,Mother_Education_tier,Age_Group,Age_Group_Mapped,Graduation_Rate_Band,encoded_gender,encoded_Debtor
0,single,17,5,Animation and Multimedia Design,Secondary education,122.0,Portuguese,Basic Education 3rd Cycle (9th/10th/11th Year)...,Other - 11th Year of Schooling,"Personal Services, Security and Safety Workers...",...,Lower Secondary Education,2.0,2.0,4.0,4.0,20s,2,1,0,1


In [40]:
#문제와 답 분리
# 원하는 컬럼만 선택하여 X에 담기
X = r_df[['Daytime/evening attendance', 'Mother_Education_tier', 'Father_Education_tier', 'Age_Group_Mapped', 'Graduation_Rate_Band', 'encoded_gender', 'encoded_Debtor']]
y = r_df['Target']

In [41]:
y

0       0
1       1
2       0
3       1
4       1
       ..
3625    1
3626    0
3627    0
3628    1
3629    1
Name: Target, Length: 3630, dtype: int64

In [42]:
y.shape

(3630,)

In [43]:
# 훈련용과 평가용 분리
X_train, X_test, y_train, y_test = train_test_split(X,y, # 문제와 정답
                                                    test_size=0.3, # 테스트데이터의 비율
                                                   random_state=819, # 같은 샘플추출을 위한 난수고정
                                                   stratify=y) # 훈련용과 평가용의 정답 클래스 비율 유지

In [44]:
# 선형분류 모델활용 학습
# 1. 모델객체 생성
Target_logi = LogisticRegression()
Target_svm = LinearSVC()

In [45]:
X['Graduation_Rate_Band'].value_counts()

Graduation_Rate_Band
2    1916
1    1265
0     449
Name: count, dtype: int64

In [46]:
# X_train과 y_train의 결측값 확인
print(X.isnull().sum())  # 각 컬럼별 결측값 개수


Daytime/evening attendance    0
Mother_Education_tier         0
Father_Education_tier         0
Age_Group_Mapped              0
Graduation_Rate_Band          0
encoded_gender                0
encoded_Debtor                0
dtype: int64


In [47]:
# 2. 모델학습
Target_logi.fit(X_train, y_train)
Target_svm.fit(X_train, y_train)

In [50]:
# 3. 모델예측
logi_pre = Target_logi.predict(X_test)
svm_pre = Target_svm.predict(X_test)

In [52]:
# 4. 모델평가(정확도)
print("logi accuarcy : ",metrics.accuracy_score(y_test, logi_pre))
print("svm accuarcy : ",metrics.accuracy_score(y_test, svm_pre))

logi accuarcy :  0.72910927456382
svm accuarcy :  0.72910927456382


In [56]:
# 다양한 분류평가지표로 검증
print(classification_report(y_test, logi_pre))
print(classification_report(y_test, svm_pre))

              precision    recall  f1-score   support

           0       0.69      0.56      0.62       426
           1       0.75      0.84      0.79       663

    accuracy                           0.73      1089
   macro avg       0.72      0.70      0.70      1089
weighted avg       0.72      0.73      0.72      1089

              precision    recall  f1-score   support

           0       0.69      0.56      0.62       426
           1       0.75      0.84      0.79       663

    accuracy                           0.73      1089
   macro avg       0.72      0.70      0.70      1089
weighted avg       0.72      0.73      0.72      1089

