In [1]:
from sklearn import tree
import pandas as pd
import os

In [2]:
df = pd.read_csv(os.path.join('..','divorce_data.csv'),sep=";")
# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')
# Drop the null rows
df = df.dropna()
df.head()

Unnamed: 0,Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,...,Q46,Q47,Q48,Q49,Q50,Q51,Q52,Q53,Q54,Divorce
0,2,2,4,1,0,0,0,0,0,0,...,2,1,3,3,3,2,3,2,1,1
1,4,4,4,4,4,0,0,4,4,4,...,2,2,3,4,4,4,4,2,2,1
2,2,2,2,2,1,3,2,1,1,2,...,3,2,3,1,1,1,2,2,2,1
3,3,2,3,2,3,3,3,3,3,3,...,2,2,3,3,3,3,2,2,2,1
4,2,2,1,1,1,1,0,0,0,0,...,2,1,2,3,2,2,2,1,0,1


In [3]:
target = df["Divorce"]
data = df.drop("Divorce", axis=1)

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, target, random_state=42)

In [5]:
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X_train, y_train)
clf.score(X_test, y_test)

0.8837209302325582

In [6]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=200)
rf = rf.fit(X_train, y_train)
rf.score(X_test, y_test)

0.9767441860465116

In [7]:
question_data = pd.read_csv('../reference.tsv', delimiter='|')
question_data

Unnamed: 0,atribute_id,description
0,1,If one of us apologizes when our discussion de...
1,2,"I know we can ignore our differences, even if ..."
2,3,"When we need it, we can take our discussions w..."
3,4,"When I discuss with my spouse, to contact him ..."
4,5,The time I spent with my wife is special for us.
5,6,We don't have time at home as partners.
6,7,We are like two strangers who share the same e...
7,8,I enjoy our holidays with my wife.
8,9,I enjoy traveling with my wife.
9,10,Most of our goals are common to my spouse.


In [8]:
feature = df.columns

In [9]:
imp_features = sorted(zip(rf.feature_importances_, feature), reverse=True)
imp_features = pd.DataFrame(imp_features)

In [10]:
imp_features = imp_features.rename(columns = { 0: 'importance_score', 1: 'atribute_id'})
imp_features['atribute_id'] = imp_features['atribute_id'].str.replace('Q', '').astype('int64')
imp_features.head(10)

Unnamed: 0,importance_score,atribute_id
0,0.111713,40
1,0.092697,38
2,0.073404,12
3,0.069514,19
4,0.062124,16
5,0.057459,18
6,0.050367,20
7,0.047036,15
8,0.045324,9
9,0.040297,36


In [13]:
merged_df = pd.merge(imp_features, question_data, on="atribute_id")
merged_df.head(10)

Unnamed: 0,importance_score,atribute_id,description
0,0.111713,40,We're just starting a discussion before I know...
1,0.092697,38,I hate my spouse's way of open a subject.
2,0.073404,12,My spouse and I have similar values in terms o...
3,0.069514,19,My spouse and I have similar ideas about how r...
4,0.062124,16,We're compatible with my spouse about what lov...
5,0.057459,18,My spouse and I have similar ideas about how m...
6,0.050367,20,My spouse and I have similar values in trust.
7,0.047036,15,Our dreams with my spouse are similar and harm...
8,0.045324,9,I enjoy traveling with my wife.
9,0.040297,36,I can be humiliating when we discussions.


In [12]:
feature_questions = []
i=0
for i in range(len(merged_df)):
    question = merged_df.iloc[i, 2]
    feature_questions.append(question)

feature_questions


["We're just starting a discussion before I know what's going on.",
 "I hate my spouse's way of open a subject.",
 'My spouse and I have similar values in terms of personal freedom.',
 'My spouse and I have similar ideas about how roles should be in marriage',
 "We're compatible with my spouse about what love should be.",
 'My spouse and I have similar ideas about how marriage should be',
 'My spouse and I have similar values in trust.',
 'Our dreams with my spouse are similar and harmonious.',
 'I enjoy traveling with my wife.',
 'I can be humiliating when we discussions.',
 "I know my spouse's friends and their social relationships.",
 'We share the same views about being happy in our life with my spouse',
 "I have knowledge of my spouse's inner world.",
 'I think that one day in the future, when I look back, I see that my spouse and I have been in harmony with each other.',
 "I know my spouse's hopes and wishes.",
 "I know my spouse's basic anxieties.",
 'Most of our goals for peopl