# Imports necessary

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
%matplotlib inline

# Read datasets

In [None]:
titanic = pd.read_excel('../data/dataset/titanic3.xls')

In [29]:
hypothyroid = pd.read_csv('../data/dataset/dataset_57_hypothyroid.csv')

# Take a look at the data

## Titanic

In [None]:
titanic.head()

In [None]:
titanic.info(verbose=True, null_counts=True)

In [None]:
titanic.age.fillna(titanic.age.mean(), inplace=True)

In [None]:
titanic.fare.fillna(titanic.age.mean(), inplace=True)

In [None]:
titanic = titanic[~titanic.embarked.isnull()]

In [None]:
titanic.info(verbose=True, null_counts=True)

In [None]:
plt.figure(figsize=[15, 5])
sns.countplot(x='survived', data=titanic)

In [None]:
plt.figure(figsize=[15, 5])
sns.countplot(x='survived', hue='embarked', data=titanic)

In [None]:
plt.figure(figsize=[15, 5])
sns.countplot(x='survived', hue='sex', data=titanic)

In [None]:
plt.figure(figsize=[15, 5])
sns.countplot(x='survived', hue='parch', data=titanic)

In [None]:
plt.figure(figsize=[15, 5])
sns.countplot(x='survived', hue='pclass', data=titanic)

In [None]:
plt.figure(figsize=[15, 5])
sns.countplot(x='survived', hue='sibsp', data=titanic)

In [None]:
features = ['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare', 'embarked']

In [None]:
X = titanic[features]

In [None]:
Y = titanic.survived

## Hypothyroid

In [30]:
hypothyroid.head().T

Unnamed: 0,0,1,2,3,4
age,41,23,46,70,70
sex,F,F,M,F,F
on_thyroxine,f,f,f,t,f
query_on_thyroxine,f,f,f,f,f
on_antithyroid_medication,f,f,f,f,f
sick,f,f,f,f,f
pregnant,f,f,f,f,f
thyroid_surgery,f,f,f,f,f
I131_treatment,f,f,f,f,f
query_hypothyroid,f,f,f,f,f


In [31]:
hypothyroid.info(verbose=True, null_counts=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3772 entries, 0 to 3771
Data columns (total 30 columns):
age                          3772 non-null object
sex                          3772 non-null object
on_thyroxine                 3772 non-null object
query_on_thyroxine           3772 non-null object
on_antithyroid_medication    3772 non-null object
sick                         3772 non-null object
pregnant                     3772 non-null object
thyroid_surgery              3772 non-null object
I131_treatment               3772 non-null object
query_hypothyroid            3772 non-null object
query_hyperthyroid           3772 non-null object
lithium                      3772 non-null object
goitre                       3772 non-null object
tumor                        3772 non-null object
hypopituitary                3772 non-null object
psych                        3772 non-null object
TSH_measured                 3772 non-null object
TSH                          3772 non-null 

# Data transformations

## Titanic

In [None]:
titanic.sex.replace({'male': 0, 'female': 1}, inplace=True)

In [None]:
titanic.embarked.replace({'S': 0, 'C': 1, 'Q': 2}, inplace=True)

## Hypothyroid

In [32]:
hypothyroid.sex.replace({'M': 0, 'F': 1}, inplace=True)

In [33]:
hypothyroid.replace({'f': 0, 't': 1}, inplace=True)

In [34]:
hypothyroid.replace({'?': 0}, inplace=True)

In [35]:
hypothyroid.drop(['TBG', 'TBG_measured'], axis=1, inplace=True)

In [36]:
hypothyroid.age = hypothyroid.age.astype(np.float32)
hypothyroid.TSH = hypothyroid.TSH.astype(np.float32)
hypothyroid.T3 = hypothyroid.T3.astype(np.float32)
hypothyroid.TT4 = hypothyroid.TT4.astype(np.float32)
hypothyroid.T4U = hypothyroid.T4U.astype(np.float32)
hypothyroid.FTI = hypothyroid.FTI.astype(np.float32)

In [37]:
hypothyroid.age.fillna(hypothyroid.age.mean(), inplace=True)
hypothyroid.TSH.fillna(hypothyroid.TSH.mean(), inplace=True)
hypothyroid.T3.fillna(hypothyroid.T3.mean(), inplace=True)
hypothyroid.TT4.fillna(hypothyroid.TT4.mean(), inplace=True)
hypothyroid.T4U.fillna(hypothyroid.T4U.mean(), inplace=True)
hypothyroid.FTI.fillna(hypothyroid.FTI.mean(), inplace=True)

In [38]:
hypothyroid.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3772 entries, 0 to 3771
Data columns (total 28 columns):
age                          3772 non-null float32
sex                          3772 non-null int64
on_thyroxine                 3772 non-null int64
query_on_thyroxine           3772 non-null int64
on_antithyroid_medication    3772 non-null int64
sick                         3772 non-null int64
pregnant                     3772 non-null int64
thyroid_surgery              3772 non-null int64
I131_treatment               3772 non-null int64
query_hypothyroid            3772 non-null int64
query_hyperthyroid           3772 non-null int64
lithium                      3772 non-null int64
goitre                       3772 non-null int64
tumor                        3772 non-null int64
hypopituitary                3772 non-null int64
psych                        3772 non-null int64
TSH_measured                 3772 non-null int64
TSH                          3772 non-null float32
T3_meas

In [39]:
hypothyroid.head().T

Unnamed: 0,0,1,2,3,4
age,41,23,46,70,70
sex,1,1,0,1,1
on_thyroxine,0,0,0,1,0
query_on_thyroxine,0,0,0,0,0
on_antithyroid_medication,0,0,0,0,0
sick,0,0,0,0,0
pregnant,0,0,0,0,0
thyroid_surgery,0,0,0,0,0
I131_treatment,0,0,0,0,0
query_hypothyroid,0,0,0,0,0
