In [187]:
# importing libraries
import pandas            as pd                       # data science essentials
import matplotlib.pyplot as plt                      # data visualization
import seaborn           as sns                      # enhanced data viz
from sklearn.model_selection import train_test_split # train-test split
from sklearn.linear_model import LogisticRegression  # logistic regression
import statsmodels.formula.api as smf                # logistic regression
from sklearn.metrics import confusion_matrix         # confusion matrix
from sklearn.metrics import roc_auc_score            # auc score
from sklearn.neighbors import KNeighborsClassifier   # KNN for classification
from sklearn.neighbors import KNeighborsRegressor    # KNN for regression
from sklearn.preprocessing import StandardScaler     # standard scaler
from sklearn.tree import DecisionTreeClassifier      # classification trees
from sklearn.tree import plot_tree                   # tree plots

# loading data
got = pd.read_excel('./__storage/GOT_character_predictions.xlsx')


# setting pandas print options
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 100)


# displaying the head of the dataset
got.head(n = 5)

Unnamed: 0,S.No,name,title,culture,dateOfBirth,mother,father,heir,house,spouse,book1_A_Game_Of_Thrones,book2_A_Clash_Of_Kings,book3_A_Storm_Of_Swords,book4_A_Feast_For_Crows,book5_A_Dance_with_Dragons,isAliveMother,isAliveFather,isAliveHeir,isAliveSpouse,isMarried,isNoble,age,numDeadRelations,popularity,isAlive
0,1,Viserys II Targaryen,,,,Rhaenyra Targaryen,Daemon Targaryen,Aegon IV Targaryen,,,0,0,0,0,0,1.0,0.0,0.0,,0,0,,11,0.605351,0
1,2,Walder Frey,Lord of the Crossing,Rivermen,208.0,,,,House Frey,Perra Royce,1,1,1,1,1,,,,1.0,1,1,97.0,1,0.896321,1
2,3,Addison Hill,Ser,,,,,,House Swyft,,0,0,0,1,0,,,,,0,1,,0,0.267559,1
3,4,Aemma Arryn,Queen,,82.0,,,,House Arryn,Viserys I Targaryen,0,0,0,0,0,,,,0.0,1,1,23.0,0,0.183946,0
4,5,Sylva Santagar,Greenstone,Dornish,276.0,,,,House Santagar,Eldon Estermont,0,0,0,1,0,,,,1.0,1,1,29.0,0,0.043478,1


Checking the correlation values between the response variable and the explanatory variables.

In [188]:
#creating correlation
df_corr = got.corr().round(2)

df_corr['isAlive'].sort_values(ascending = False)

isAlive                       1.00
isAliveHeir                   0.38
book4_A_Feast_For_Crows       0.27
isAliveFather                 0.20
isAliveSpouse                 0.17
age                           0.09
book5_A_Dance_with_Dragons    0.03
book3_A_Storm_Of_Swords       0.01
isAliveMother                -0.04
isNoble                      -0.04
isMarried                    -0.05
book2_A_Clash_Of_Kings       -0.07
dateOfBirth                  -0.09
S.No                         -0.13
book1_A_Game_Of_Thrones      -0.15
popularity                   -0.18
numDeadRelations             -0.19
Name: isAlive, dtype: float64

Here, spliting the name from got data, excluding first_name copying remaing into lastname variable. Because house are
mentioned in few names as surnames.

In [189]:
#creating placeholder
pl=[]
last_name= got['name'].str.split(' ')
for x in last_name:
    lname =''.join(x[-1])
    pl.append(lname)
got['lastname'] = pd.Series(pl)

In [190]:
#checking whether lastname is added to the column
got.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1946 entries, 0 to 1945
Data columns (total 26 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   S.No                        1946 non-null   int64  
 1   name                        1946 non-null   object 
 2   title                       938 non-null    object 
 3   culture                     677 non-null    object 
 4   dateOfBirth                 433 non-null    float64
 5   mother                      21 non-null     object 
 6   father                      26 non-null     object 
 7   heir                        23 non-null     object 
 8   house                       1519 non-null   object 
 9   spouse                      276 non-null    object 
 10  book1_A_Game_Of_Thrones     1946 non-null   int64  
 11  book2_A_Clash_Of_Kings      1946 non-null   int64  
 12  book3_A_Storm_Of_Swords     1946 non-null   int64  
 13  book4_A_Feast_For_Crows     1946 

In [191]:
#checking whether lastname values are copied from name.
got.head(10)

Unnamed: 0,S.No,name,title,culture,dateOfBirth,mother,father,heir,house,spouse,book1_A_Game_Of_Thrones,book2_A_Clash_Of_Kings,book3_A_Storm_Of_Swords,book4_A_Feast_For_Crows,book5_A_Dance_with_Dragons,isAliveMother,isAliveFather,isAliveHeir,isAliveSpouse,isMarried,isNoble,age,numDeadRelations,popularity,isAlive,lastname
0,1,Viserys II Targaryen,,,,Rhaenyra Targaryen,Daemon Targaryen,Aegon IV Targaryen,,,0,0,0,0,0,1.0,0.0,0.0,,0,0,,11,0.605351,0,Targaryen
1,2,Walder Frey,Lord of the Crossing,Rivermen,208.0,,,,House Frey,Perra Royce,1,1,1,1,1,,,,1.0,1,1,97.0,1,0.896321,1,Frey
2,3,Addison Hill,Ser,,,,,,House Swyft,,0,0,0,1,0,,,,,0,1,,0,0.267559,1,Hill
3,4,Aemma Arryn,Queen,,82.0,,,,House Arryn,Viserys I Targaryen,0,0,0,0,0,,,,0.0,1,1,23.0,0,0.183946,0,Arryn
4,5,Sylva Santagar,Greenstone,Dornish,276.0,,,,House Santagar,Eldon Estermont,0,0,0,1,0,,,,1.0,1,1,29.0,0,0.043478,1,Santagar
5,6,Tommen Baratheon,,,,Cersei Lannister,Robert Baratheon,Myrcella Baratheon,,,0,0,0,0,0,1.0,1.0,1.0,,0,0,,5,1.0,1,Baratheon
6,7,Valarr Targaryen,Hand of the King,Valyrian,183.0,,,,House Targaryen,Kiera of Tyrosh,0,0,0,0,0,,,,1.0,1,1,26.0,0,0.431438,0,Targaryen
7,8,Viserys I Targaryen,,,,Alyssa Targaryen,Baelon Targaryen,Rhaenyra Targaryen,,,0,0,0,0,0,1.0,1.0,1.0,,0,0,,5,0.67893,0,Targaryen
8,9,Wilbert,Ser,,,,,,,,0,0,1,0,0,,,,,0,1,,0,0.006689,0,Wilbert
9,10,Wilbert Osgrey,Ser,,,,,,House Osgrey,,0,0,0,0,0,,,,,0,1,,0,0.020067,1,Osgrey


In [192]:
got.head(10)

Unnamed: 0,S.No,name,title,culture,dateOfBirth,mother,father,heir,house,spouse,book1_A_Game_Of_Thrones,book2_A_Clash_Of_Kings,book3_A_Storm_Of_Swords,book4_A_Feast_For_Crows,book5_A_Dance_with_Dragons,isAliveMother,isAliveFather,isAliveHeir,isAliveSpouse,isMarried,isNoble,age,numDeadRelations,popularity,isAlive,lastname
0,1,Viserys II Targaryen,,,,Rhaenyra Targaryen,Daemon Targaryen,Aegon IV Targaryen,,,0,0,0,0,0,1.0,0.0,0.0,,0,0,,11,0.605351,0,Targaryen
1,2,Walder Frey,Lord of the Crossing,Rivermen,208.0,,,,House Frey,Perra Royce,1,1,1,1,1,,,,1.0,1,1,97.0,1,0.896321,1,Frey
2,3,Addison Hill,Ser,,,,,,House Swyft,,0,0,0,1,0,,,,,0,1,,0,0.267559,1,Hill
3,4,Aemma Arryn,Queen,,82.0,,,,House Arryn,Viserys I Targaryen,0,0,0,0,0,,,,0.0,1,1,23.0,0,0.183946,0,Arryn
4,5,Sylva Santagar,Greenstone,Dornish,276.0,,,,House Santagar,Eldon Estermont,0,0,0,1,0,,,,1.0,1,1,29.0,0,0.043478,1,Santagar
5,6,Tommen Baratheon,,,,Cersei Lannister,Robert Baratheon,Myrcella Baratheon,,,0,0,0,0,0,1.0,1.0,1.0,,0,0,,5,1.0,1,Baratheon
6,7,Valarr Targaryen,Hand of the King,Valyrian,183.0,,,,House Targaryen,Kiera of Tyrosh,0,0,0,0,0,,,,1.0,1,1,26.0,0,0.431438,0,Targaryen
7,8,Viserys I Targaryen,,,,Alyssa Targaryen,Baelon Targaryen,Rhaenyra Targaryen,,,0,0,0,0,0,1.0,1.0,1.0,,0,0,,5,0.67893,0,Targaryen
8,9,Wilbert,Ser,,,,,,,,0,0,1,0,0,,,,,0,1,,0,0.006689,0,Wilbert
9,10,Wilbert Osgrey,Ser,,,,,,House Osgrey,,0,0,0,0,0,,,,,0,1,,0,0.020067,1,Osgrey


In [193]:
# checking each feature for missing values
got.isnull().sum(axis = 0)

S.No                             0
name                             0
title                         1008
culture                       1269
dateOfBirth                   1513
mother                        1925
father                        1920
heir                          1923
house                          427
spouse                        1670
book1_A_Game_Of_Thrones          0
book2_A_Clash_Of_Kings           0
book3_A_Storm_Of_Swords          0
book4_A_Feast_For_Crows          0
book5_A_Dance_with_Dragons       0
isAliveMother                 1925
isAliveFather                 1920
isAliveHeir                   1923
isAliveSpouse                 1670
isMarried                        0
isNoble                          0
age                           1513
numDeadRelations                 0
popularity                       0
isAlive                          0
lastname                         0
dtype: int64

Here, missing values are filled with 'no_data' only for cloumns which have datatype as object. 
For remaining value are filled with -1000 (source : DataCamp - Machine Learning with the Experts- School Budgets)

In [194]:
# imputing missing values

got['title'].fillna(value = 'no_data',inplace= True)
got['culture'].fillna(value = 'no_data',inplace= True)
got['dateOfBirth'].fillna(value = -1000, inplace= True)
got['mother'].fillna(value = 'no_data',inplace= True)
got['father'].fillna(value = 'no_data',inplace= True)
got['heir'].fillna(value = 'no_data',inplace= True)
got['house'].fillna(value = 'no_data',inplace= True)
got['spouse'].fillna(value = 'no_data',inplace= True)
got['isAliveMother'].fillna(value = -1000, inplace= True)
got['isAliveFather'].fillna(value = -1000, inplace= True)
got['isAliveHeir'].fillna(value = -1000, inplace= True)
got['isAliveSpouse'].fillna(value = -1000, inplace= True)
got['age'].fillna(value = -1000, inplace= True)

In [195]:
# checking each feature for missing values
got.isnull().sum(axis = 0)

S.No                          0
name                          0
title                         0
culture                       0
dateOfBirth                   0
mother                        0
father                        0
heir                          0
house                         0
spouse                        0
book1_A_Game_Of_Thrones       0
book2_A_Clash_Of_Kings        0
book3_A_Storm_Of_Swords       0
book4_A_Feast_For_Crows       0
book5_A_Dance_with_Dragons    0
isAliveMother                 0
isAliveFather                 0
isAliveHeir                   0
isAliveSpouse                 0
isMarried                     0
isNoble                       0
age                           0
numDeadRelations              0
popularity                    0
isAlive                       0
lastname                      0
dtype: int64

In [196]:
#converting object types into category datatype.

got.name = got.name.astype('category')
got.title = got.title.astype('category')
got.culture = got.culture.astype('category')
got.mother = got.mother.astype('category')
got.father = got.father.astype('category')
got.heir = got.heir.astype('category')
got.house = got.house.astype('category')
got.spouse = got.spouse.astype('category')
got.lastname = got.lastname.astype('category')

#checking datatype object are change to category dtype
got.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1946 entries, 0 to 1945
Data columns (total 26 columns):
 #   Column                      Non-Null Count  Dtype   
---  ------                      --------------  -----   
 0   S.No                        1946 non-null   int64   
 1   name                        1946 non-null   category
 2   title                       1946 non-null   category
 3   culture                     1946 non-null   category
 4   dateOfBirth                 1946 non-null   float64 
 5   mother                      1946 non-null   category
 6   father                      1946 non-null   category
 7   heir                        1946 non-null   category
 8   house                       1946 non-null   category
 9   spouse                      1946 non-null   category
 10  book1_A_Game_Of_Thrones     1946 non-null   int64   
 11  book2_A_Clash_Of_Kings      1946 non-null   int64   
 12  book3_A_Storm_Of_Swords     1946 non-null   int64   
 13  book4_A_Feast_For_

In [197]:
# creating dummy varibles for categorical features
got = pd.get_dummies(got, columns=['title','culture','mother','father','heir','spouse','isAliveMother','isAliveFather',
                                   'isAliveHeir','isAliveSpouse','lastname'], 
                                     drop_first = True)



In [198]:
got.shape #checking number of rows and columns

(1946, 1771)

In [199]:
got.head(n=5) #printing first 5 features 

Unnamed: 0,S.No,name,dateOfBirth,house,book1_A_Game_Of_Thrones,book2_A_Clash_Of_Kings,book3_A_Storm_Of_Swords,book4_A_Feast_For_Crows,book5_A_Dance_with_Dragons,isMarried,isNoble,age,numDeadRelations,popularity,isAlive,title_Andals,title_Arbor,title_Archmaester,title_Ashford,title_Banefort,title_Barrowton,title_Bear Island,title_Big BucketThe Wull,title_Bitterbridge,title_Black Wind,title_Blackcrown,title_Blackmont,title_Blacktyde,title_Bloodrider,title_Blue Grace,title_Brightwater,title_Broad Arch,title_Brother,title_BrotherProctor,title_Captain,title_Captain of the guard,title_Captain-General,title_Castellan,title_Castellan of Harrenhal,title_CastellanCommander,title_Casterly Rock,title_Castle Lychester,title_Cerwyn,title_Chief Undergaoler,title_Claw Isle,title_Cobblecat,title_Coldmoat,title_Coldwater Burn,title_Commander of the City Watch,title_Commander of the Second Sons,title_Crag,title_Crakehall,title_Cupbearer,title_Darry,title_Deepwood Motte,title_Dragonstone,title_Dreadfort,title_Duskendale,title_Dyre Den,title_Eastwatch-by-the-Sea,title_Eyrie,title_Fair Isle,title_Feastfires,title_Felwood,title_First Builder,title_First Ranger,title_First Sword of Braavos,title_Foamdrinker,title_Ghost Hill,title_Godsgrace,title_Godswife,title_Golden Storm,title_Golden Tooth,title_Goldengrove,title_Goldgrass,title_Good Master,title_Goodman,title_Goodwife,title_Grand Maester,title_Grassy Vale,title_Green Grace,title_Greenshield,title_Greenstone,title_Grey Glen,title_Greywater Watch,title_Gulltown,title_Hand of the King,title_Harlaw,title_Harrenhal,title_Harridan Hill,title_Hayford,title_Haystack Hall,title_Heart's Home,title_High Septon,title_High Steward of Highgarden,title_Highgarden,title_Hightower,title_Horn Hill,title_Hornvale,title_Hornwood,title_Iron Islands,title_Ironoaks,title_Karhold,title_Karl's Hold,title_Kayce,title_Keeper of the Gates of the Moon,title_Khal,title_KhalKo (formerly),title_Khalakka,title_King,title_King in the North,title_King of Astapor,title_King of Winter,title_King of the Andals,title_King of the Iron Islands,title_King-Beyond-the-Wall,title_Knight,title_Knight of Griffin's Roost,title_Lady,title_Lady Marya,title_Lady of Bear Island,title_Lady of Darry,title_Lady of Torrhen's Square,title_Lady of the Leaves,title_Lady of the Vale,title_LadyQueen,title_LadyQueenDowager Queen,title_Last Hearth,title_Light of the West,title_Lonely Light,title_Longbow Hall,title_Longsister,title_Longtable,title_Lord,title_Lord Captain of the Iron Fleet,title_Lord Commander of the Night's Watch,title_Lord Paramount of the Mander,title_Lord Paramount of the Stormlands,title_Lord Paramount of the Trident,title_Lord Reaper of Pyke,title_Lord Seneschal,title_Lord Steward,title_Lord Steward of the Iron Islands,title_Lord of Atranta,title_Lord of Blackhaven,title_Lord of Coldmoat,title_Lord of Crows Nest,title_Lord of Darry,title_Lord of Dragonstone,title_Lord of Flint's Finger,title_Lord of Greyshield,title_Lord of Griffin's Roost,title_Lord of Hammerhorn,title_Lord of Harrenhal,title_Lord of Hellholt,title_Lord of Honeyholt,title_Lord of Iron Holt,title_Lord of Kingsgrave,title_Lord of Oakenshield,title_Lord of Oldcastle,title_Lord of Pebbleton,title_Lord of Southshield,title_Lord of Starfall,title_Lord of Sunflower Hall,title_Lord of White Harbor,title_Lord of the Crossing,title_Lord of the Deep Den,title_Lord of the Hornwood,title_Lord of the Iron Islands,title_Lord of the Marches,title_Lord of the Red Dunes,title_Lord of the Seven Kingdoms,title_Lord of the Snakewood,title_Lord of the Ten TowersLord Harlaw of HarlawHarlaw of Harlaw,title_Lord of the Tides,title_Lord of the Tor,title_LordWisdom,title_Lordsport,title_Maester,title_Magister,title_Magister of Pentos,title_Magnar of Thenn,title_Maidenpool,title_Master of Coin,title_Master of Deepwood Motte,title_Master of Harlaw Hall,title_Master of Whisperers,title_Master of coin,title_Master of whisperers,title_Master-at-Arms,title_Mistress of whisperers,title_Nightsong,title_Oakenshield,title_Oarmaster,title_Old Oak,title_Old Wyk,title_Pinkmaiden,title_Prince,title_Prince of Dorne,title_Prince of Dragonstone,title_Prince of Winterfell,title_Prince of WinterfellHeir to Winterfell,title_Prince of the Narrow Sea,title_Princess,title_PrincessQueen,title_PrincessQueenDowager Queen,title_PrincessSepta,title_Protector of the Realm,title_Queen,title_QueenBlack Bride,title_QueenDowager Queen,title_Rain House,title_Raventree Hall,title_Red Flower Vale,title_Red Jester,title_Red Priest,title_Redfort,title_Rills,title_Riverrun,title_Rook's Rest,title_Ruddy Hall,title_Runestone,title_Salt Shore,title_Sandship,title_Sandstone,title_Seagard,title_Sealord,title_Sealskin Point,title_Seneschal,title_Septa,title_Septon,title_Ser,title_SerCastellan of Casterly Rock,title_Serthe Knight of Saltpans,title_Seven Kingdoms,title_Sharp Point,title_Shatterstone,title_Skyreach,title_Slave of R'hllor,title_Starpike,title_Steward,title_Stokeworth,title_Stone Hedge,title_Stonehelm,title_Storm's End,title_Strongsong,title_Sunspear,title_Sweetport Sound,title_Sweetsister,title_Ten Towers,...,lastname_Ryles,lastname_Ryman),lastname_Rymolf,lastname_Ryndoon,lastname_Ryswell,lastname_S'vrone,lastname_Saan,lastname_Saathos,lastname_Sallor,lastname_Salloreon,lastname_Sand,lastname_Santagar,lastname_Sarsfield,lastname_Sathmantes,lastname_Satin,lastname_Sawwood,lastname_Sawyer,lastname_Scarb,lastname_Scolera,lastname_Seastar,lastname_Seaworth,lastname_Sedgekins,lastname_Selmy,lastname_Senelle,lastname_Serala,lastname_Serra,lastname_Serry,lastname_Sevenstreams,lastname_Shae,lastname_Shagga,lastname_Shagwell,lastname_Sharna,lastname_Sharp,lastname_Shella,lastname_Shepherd,lastname_Sherrit,lastname_Shett,lastname_Shield,lastname_Shortear,lastname_Shyra,lastname_Sigrin,lastname_Skinner,lastname_Skittrick,lastname_Skyte,lastname_Sloey,lastname_Slynt,lastname_Smallwood,lastname_Snow,lastname_Softfoot,lastname_Sparr,lastname_Sparrow,lastname_Spicer,lastname_Squint,lastname_Squirrel,lastname_Stackspear,lastname_Staedmon,lastname_Star-Eyes,lastname_Stark,lastname_Staunton,lastname_Steelskin,lastname_Stevron),lastname_Stilwood,lastname_Stiv,lastname_Stokeworth,lastname_Stone,lastname_Stonehand,lastname_Stonehouse,lastname_Stonesnake,lastname_Stonetree,lastname_Stoops,lastname_Storm,lastname_Stout,lastname_Strickland,lastname_Stripeback,lastname_Strong,lastname_Stygg,lastname_Styr,lastname_Su,lastname_Suggs,lastname_Sunderland,lastname_Sunglass,lastname_Swann,lastname_Swyft,lastname_Sybassion,lastname_Sylas,lastname_Tagganaro,lastname_Talea,lastname_Tallad,lastname_Tallhart,lastname_Tangletongue,lastname_Tanner,lastname_Tanselle,lastname_Tansy,lastname_Tarber,lastname_Targaryen,lastname_Tarle,lastname_Tarly,lastname_Tarth,lastname_Tattersalt,lastname_Tawney,lastname_Temmo,lastname_Templeton,lastname_Tendyris,lastname_Terro,lastname_Terys,lastname_Theobald,lastname_Theodore),lastname_Thistle,lastname_Thorne,lastname_Thrall,lastname_Three-Tooth,lastname_Thumbs,lastname_Timeon,lastname_Timett,lastname_Timon,lastname_Timoth,lastname_Tobbot,lastname_Todder,lastname_Todric,lastname_Toefinger,lastname_Toes,lastname_Toland,lastname_Tollett,lastname_TomToo,lastname_Tomard,lastname_Tongue,lastname_Toraq,lastname_Torbert,lastname_Toregg,lastname_Tormund,lastname_Torrek,lastname_Torwynd,lastname_Tothmure,lastname_Town),lastname_Toyne,lastname_Trant,lastname_Tregar,lastname_Tristimun,lastname_Tuffleberry,lastname_Tully,lastname_Tumberjon,lastname_Tumitis,lastname_Turnberry,lastname_Turnip,lastname_Turquin,lastname_Tym,lastname_Tymor,lastname_Tyrell,lastname_Tysha,lastname_Uffering,lastname_Uhoris,lastname_Uller,lastname_Ullhor,lastname_Ulmer,lastname_Umar,lastname_Umber,lastname_Umfred,lastname_Umma,lastname_Underleaf,lastname_Unella,lastname_Urrathon,lastname_Urreg,lastname_Urswyck,lastname_Urzen,lastname_Utt,lastname_Vaellyn,lastname_Vaith,lastname_Val,lastname_Vance,lastname_Varamyr,lastname_Varly,lastname_Varner,lastname_Varys,lastname_Velaryon,lastname_Victor),lastname_Vikary,lastname_Violet,lastname_Volentin,lastname_Volmark,lastname_Votyris,lastname_Vylarr,lastname_Vyman,lastname_Vypren,lastname_Vyrwel,lastname_Wagstaff,lastname_Walgrave,lastname_Wallen,lastname_Walrus,lastname_Walton,lastname_Walton),lastname_Watch),lastname_Wate,lastname_Waters,lastname_Watt,lastname_Watty,lastname_Wayn,lastname_Waynwood,lastname_Weasel,lastname_Weaver,lastname_Webber,lastname_Weese,lastname_Weg,lastname_Wells,lastname_Wenda,lastname_Wendamyr,lastname_Werlag,lastname_Westerling,lastname_Whent,lastname_White,lastname_Whiteye,lastname_Whittler,lastname_Wife,lastname_Wilbert,lastname_Will,lastname_Willam,lastname_Willifer,lastname_Willit,lastname_Willum,lastname_Witch-eye,lastname_Wode,lastname_Wolmer,lastname_Woods,lastname_Woodwright,lastname_Woolfield,lastname_Worm,lastname_Woth,lastname_Wulfe,lastname_Wull,lastname_Wun,lastname_Wylde,lastname_Wylla,lastname_Wynch,lastname_Wythers,lastname_Xho,lastname_Xhondo,lastname_Yandry,lastname_Yarwyck,lastname_Yew,lastname_Ygritte,lastname_Yna,lastname_Yoren,lastname_Yorkel,lastname_Yormwell,lastname_Yronwood,lastname_Ysilla,lastname_Zarabelo,lastname_Zei,lastname_Zekko,lastname_Zollo,lastname_daughterless)
0,1,Viserys II Targaryen,-1000.0,no_data,0,0,0,0,0,0,0,-1000.0,11,0.605351,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,2,Walder Frey,208.0,House Frey,1,1,1,1,1,1,1,97.0,1,0.896321,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,3,Addison Hill,-1000.0,House Swyft,0,0,0,1,0,0,1,-1000.0,0,0.267559,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,4,Aemma Arryn,82.0,House Arryn,0,0,0,0,0,1,1,23.0,0,0.183946,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,5,Sylva Santagar,276.0,House Santagar,0,0,0,1,0,1,1,29.0,0,0.043478,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


Here, showing 1 is alive and 0 is not alive

In [200]:
got.loc[ : ,'isAlive'].value_counts(normalize = True).round(decimals = 2)

1    0.75
0    0.25
Name: isAlive, dtype: float64

In [201]:
# declaring explanatory variables
got_data = got.drop(['isAlive','book2_A_Clash_Of_Kings'], axis = 1)


# declaring response variable
got_target = got.loc[ : , 'isAlive']

In [202]:
# train-test split with stratification
x_train, x_test, y_train, y_test = train_test_split(
            got_data,
            got_target,
            test_size    = 0.10,
            random_state = 219,
            stratify     = got_target) # preserving balance


# merging training data for statsmodels
got_train = pd.concat([x_train, y_train], axis = 1)

In [203]:
print(f"""

Response Variable Proportions (Training Set)
--------------------------------------------
{y_train.value_counts(normalize = True).round(decimals = 2)}



Response Variable Proportions (Testing Set)
--------------------------------------------
{y_test.value_counts(normalize = True).round(decimals = 2)}
""")





Response Variable Proportions (Training Set)
--------------------------------------------
1    0.75
0    0.25
Name: isAlive, dtype: float64



Response Variable Proportions (Testing Set)
--------------------------------------------
1    0.74
0    0.26
Name: isAlive, dtype: float64



In [204]:
#code to generate the (Pearson) correlations between the response variable and the explanatory variables
df_corr = got.corr().round(2)

df_corr['isAlive'].sort_values(ascending = False)

isAlive                    1.00
book4_A_Feast_For_Crows    0.27
mother_no_data             0.14
father_no_data             0.14
heir_no_data               0.13
                           ... 
book1_A_Game_Of_Thrones   -0.15
popularity                -0.18
numDeadRelations          -0.19
culture_Valyrian          -0.21
lastname_Targaryen        -0.23
Name: isAlive, Length: 1769, dtype: float64

In [205]:
# instantiating a logistic regression model object
logistic_small = smf.logit(formula = """isAlive ~ book4_A_Feast_For_Crows""",
                           data    = got_train)


# fitting the model object
results_logistic = logistic_small.fit()


# checking the results SUMMARY
results_logistic.summary2() # summary2() has AIC and BIC

Optimization terminated successfully.
         Current function value: 0.534562
         Iterations 6


0,1,2,3
Model:,Logit,Pseudo R-squared:,0.057
Dependent Variable:,isAlive,AIC:,1876.0368
Date:,2021-12-05 21:56,BIC:,1886.9727
No. Observations:,1751,Log-Likelihood:,-936.02
Df Model:,1,LL-Null:,-992.53
Df Residuals:,1749,LLR p-value:,2.1240999999999998e-26
Converged:,1.0000,Scale:,1.0
No. Iterations:,6.0000,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,0.4601,0.0765,6.0172,0.0000,0.3102,0.6100
book4_A_Feast_For_Crows,1.1895,0.1141,10.4212,0.0000,0.9658,1.4132


In [206]:
for val in got_data:
    print(f" {val} + ")

 S.No + 
 name + 
 dateOfBirth + 
 house + 
 book1_A_Game_Of_Thrones + 
 book3_A_Storm_Of_Swords + 
 book4_A_Feast_For_Crows + 
 book5_A_Dance_with_Dragons + 
 isMarried + 
 isNoble + 
 age + 
 numDeadRelations + 
 popularity + 
 title_Andals + 
 title_Arbor + 
 title_Archmaester + 
 title_Ashford + 
 title_Banefort + 
 title_Barrowton + 
 title_Bear Island + 
 title_Big BucketThe Wull + 
 title_Bitterbridge + 
 title_Black Wind + 
 title_Blackcrown + 
 title_Blackmont + 
 title_Blacktyde + 
 title_Bloodrider + 
 title_Blue Grace + 
 title_Brightwater + 
 title_Broad Arch + 
 title_Brother + 
 title_BrotherProctor + 
 title_Captain + 
 title_Captain of the guard + 
 title_Captain-General + 
 title_Castellan + 
 title_Castellan of Harrenhal + 
 title_CastellanCommander + 
 title_Casterly Rock + 
 title_Castle Lychester + 
 title_Cerwyn + 
 title_Chief Undergaoler + 
 title_Claw Isle + 
 title_Cobblecat + 
 title_Coldmoat + 
 title_Coldwater Burn + 
 title_Commander of the City Watch + 


In [207]:
# instantiating a logistic regression model object
logistic_small = smf.logit(formula = """isAlive ~    book4_A_Feast_For_Crows + 
                                                     mother_no_data + 
                                                     father_no_data + 
                                                     heir_no_data +
                                                     numDeadRelations +
                                                     culture_Valyrian +
                                                     lastname_Targaryen""",
                                                     data    = got_train)


# fitting the model object
results_logistic = logistic_small.fit()


# checking the results SUMMARY
results_logistic.summary2() # summary2() has AIC and BIC

Optimization terminated successfully.
         Current function value: 0.506993
         Iterations 7


0,1,2,3
Model:,Logit,Pseudo R-squared:,0.106
Dependent Variable:,isAlive,AIC:,1791.4892
Date:,2021-12-05 21:56,BIC:,1835.2327
No. Observations:,1751,Log-Likelihood:,-887.74
Df Model:,7,LL-Null:,-992.53
Df Residuals:,1743,LLR p-value:,1.0718e-41
Converged:,1.0000,Scale:,1.0
No. Iterations:,7.0000,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,0.6599,0.8421,0.7836,0.4333,-0.9907,2.3104
book4_A_Feast_For_Crows,1.1344,0.1201,9.4490,0.0000,0.8991,1.3698
mother_no_data,0.0588,1.2871,0.0457,0.9636,-2.4639,2.5814
father_no_data,0.1492,1.0051,0.1484,0.8820,-1.8207,2.1191
heir_no_data,-0.2336,0.9915,-0.2356,0.8137,-2.1769,1.7097
numDeadRelations,-0.2088,0.0505,-4.1326,0.0000,-0.3078,-0.1098
culture_Valyrian,-1.8402,0.4828,-3.8114,0.0001,-2.7865,-0.8939
lastname_Targaryen,-2.2468,0.8876,-2.5313,0.0114,-3.9865,-0.5071


Removed mother_no_data,father_no_data,heir_no_data columns because p-valaue is greater than 0.05

In [208]:
# instantiating a logistic regression model object
logistic_small = smf.logit(formula = """isAlive ~    book4_A_Feast_For_Crows + 
                                                     numDeadRelations +
                                                     culture_Valyrian +
                                                     lastname_Targaryen""",
                                                     data    = got_train)


# fitting the model object
results_logistic = logistic_small.fit()


# checking the results SUMMARY
results_logistic.summary2() # summary2() has AIC and BIC

Optimization terminated successfully.
         Current function value: 0.507010
         Iterations 7


0,1,2,3
Model:,Logit,Pseudo R-squared:,0.106
Dependent Variable:,isAlive,AIC:,1785.5501
Date:,2021-12-05 21:56,BIC:,1812.8898
No. Observations:,1751,Log-Likelihood:,-887.78
Df Model:,4,LL-Null:,-992.53
Df Residuals:,1746,LLR p-value:,3.3734999999999997e-44
Converged:,1.0000,Scale:,1.0
No. Iterations:,7.0000,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,0.6342,0.0808,7.8456,0.0000,0.4758,0.7927
book4_A_Feast_For_Crows,1.1344,0.1190,9.5291,0.0000,0.9010,1.3677
numDeadRelations,-0.2086,0.0489,-4.2652,0.0000,-0.3044,-0.1127
culture_Valyrian,-1.8398,0.4816,-3.8206,0.0001,-2.7837,-0.8960
lastname_Targaryen,-2.2536,0.7862,-2.8666,0.0041,-3.7945,-0.7128


In [209]:
candidate_dict = {
    
 # significant variables only 
 'logit_sig'    : ['book4_A_Feast_For_Crows' ,'numDeadRelations', 
                   'culture_Valyrian','lastname_Targaryen']

}

In [210]:
# train/test split with the full model
got_data   =  got.loc[ : , candidate_dict['logit_sig']]
got_target =  got.loc[ : , 'isAlive']


# this is the exact code we were using before
x_train, x_test, y_train, y_test = train_test_split(
            got_data,
            got_target,
            random_state = 219,
            test_size    = 0.10,
            stratify     = got_target)


# INSTANTIATING a logistic regression model
logreg = LogisticRegression(solver = 'lbfgs',
                            C = 1,
                            random_state = 219)


# FITTING the training data
logreg_fit = logreg.fit(x_train, y_train)


# PREDICTING based on the testing set
logreg_pred = logreg_fit.predict(x_test)


# SCORING the results
print('LogReg Training ACCURACY:', logreg_fit.score(x_train, y_train).round(4))
print('LogReg Testing  ACCURACY:', logreg_fit.score(x_test, y_test).round(4))

# saving scoring data for future use
logreg_train_score = logreg_fit.score(x_train, y_train).round(4) # accuracy
logreg_test_score  = logreg_fit.score(x_test, y_test).round(4)   # accuracy


# displaying and saving the gap between training and testing
print('LogReg Train-Test Gap   :', abs(logreg_train_score - logreg_test_score).round(4))
logreg_test_gap = abs(logreg_train_score - logreg_test_score).round(4)

LogReg Training ACCURACY: 0.775
LogReg Testing  ACCURACY: 0.7897
LogReg Train-Test Gap   : 0.0147


In [211]:
# area under the roc curve (auc)
print(' AUC Score:',roc_auc_score(y_true  = y_test,
                    y_score = logreg_pred).round(decimals = 4))


# saving AUC score for future use
logreg_auc_score = roc_auc_score(y_true  = y_test,
                                 y_score = logreg_pred).round(decimals = 4)



 AUC Score: 0.59


In [212]:
# creating a confusion matrix
print(confusion_matrix(y_true = y_test,
                       y_pred = logreg_pred))

[[  9  41]
 [  0 145]]


In [213]:
# unpacking the confusion matrix
logreg_tn, \
logreg_fp, \
logreg_fn, \
logreg_tp = confusion_matrix(y_true = y_test, y_pred = logreg_pred).ravel()


# printing each result one-by-one
print(f"""
True Negatives : {logreg_tn}
False Positives: {logreg_fp}
False Negatives: {logreg_fn}
True Positives : {logreg_tp}
""")


True Negatives : 9
False Positives: 41
False Negatives: 0
True Positives : 145



In [214]:
########################################
# plot_feature_importances
########################################
def plot_feature_importances(model, train, export = False):
    """
    Plots the importance of features from a CART model.
    
    PARAMETERS
    ----------
    model  : CART model
    train  : explanatory variable training data
    export : whether or not to export as a .png image, default False
    """
    
    # declaring the number
    n_features = x_train.shape[1]
    
    # setting plot window
    fig, ax = plt.subplots(figsize=(12,9))
    
    plt.barh(range(n_features), model.feature_importances_, align='center')
    plt.yticks(np.arange(n_features), train.columns)
    plt.xlabel("Feature importance")
    plt.ylabel("Feature")
    
    if export == True:
        plt.savefig('Tree_Leaf_50_Feature_Importance.png')

In [215]:
# INSTANTIATING a classification tree object
full_tree = DecisionTreeClassifier()


# FITTING the training data
full_tree_fit = full_tree.fit(x_train, y_train)


# PREDICTING on new data
full_tree_pred = full_tree_fit.predict(x_test)


# SCORING the model
print('Full Tree Training ACCURACY:', full_tree_fit.score(x_train,
                                                     y_train).round(4))

print('Full Tree Testing ACCURACY :', full_tree_fit.score(x_test,
                                                     y_test).round(4))

print('Full Tree AUC Score:', roc_auc_score(y_true  = y_test,
                                            y_score = full_tree_pred).round(4))


# saving scoring data for future use
full_tree_train_score = full_tree_fit.score(x_train, y_train).round(4) # accuracy
full_tree_test_score  = full_tree_fit.score(x_test, y_test).round(4)   # accuracy

# displaying and saving the gap between training and testing
print('Full Tree Train-Test Gap   :', abs(full_tree_train_score - full_tree_test_score).round(4))
full_tree_test_gap = abs(full_tree_train_score - full_tree_test_score).round(4)

# saving AUC
full_tree_auc_score   = roc_auc_score(y_true  = y_test,
                                      y_score = full_tree_pred).round(4) # auc


Full Tree Training ACCURACY: 0.779
Full Tree Testing ACCURACY : 0.7949
Full Tree AUC Score: 0.6131
Full Tree Train-Test Gap   : 0.0159


In [216]:
# unpacking the confusion matrix
dt_tn, \
dt_fp, \
dt_fn, \
dt_tp = confusion_matrix(y_true = y_test, y_pred = full_tree_pred).ravel()


# printing each result one-by-one
print(f"""
True Negatives : {dt_tn}
False Positives: {dt_fp}
False Negatives: {dt_fn}
True Positives : {dt_tp}
""")


True Negatives : 12
False Positives: 38
False Negatives: 2
True Positives : 143



In [217]:
# comparing results

print(f"""
Model      Train Score      Test Score   Train-Test Gap   AUC 
-----      -----------      ----------   -------------    -----
Logistic       {logreg_train_score}         {logreg_test_score}       {logreg_test_gap}          {logreg_auc_score}
DecisionTree   {full_tree_train_score}         {full_tree_test_score}       {full_tree_test_gap}          {full_tree_auc_score}

""")


print(f"""
Confusion matrix     Logistic     DecisionTree  
----------------     --------     ------------   
True Negatives :     {logreg_tn}           {dt_tn}       
False Positives:     {logreg_fp}          {dt_fp}
False Negatives:     {logreg_fn}           {dt_fn}             
True Positives :     {logreg_tp}         {dt_tp}  
""")

print(f"""Conclusion : Based on confusion matrix 'Logistic Model' is best because False negative is : 0 and True Postive is: 145 
compare to all other models""")


Model      Train Score      Test Score   Train-Test Gap   AUC 
-----      -----------      ----------   -------------    -----
Logistic       0.775         0.7897       0.0147          0.59
DecisionTree   0.779         0.7949       0.0159          0.6131



Confusion matrix     Logistic     DecisionTree  
----------------     --------     ------------   
True Negatives :     9           12       
False Positives:     41          38
False Negatives:     0           2             
True Positives :     145         143  

Conclusion : Based on confusion matrix 'Logistic Model' is best because False negative is : 0 and True Postive is: 145 
compare to all other models
