# Spaceship Titanic - Training

## Importação das bibliotecas

In [65]:
import pandas as pd

from sklearn.svm import LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.dummy import DummyClassifier

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_transformer


## Importação dos dados

In [2]:
spaceship = pd.read_csv('Datasets/spaceship_titanic_train.csv')
spaceship

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,Transported
0,0001_01,Europa,False,B/0/P,TRAPPIST-1e,39.0,False,0.0,0.0,0.0,0.0,0.0,Maham Ofracculy,False
1,0002_01,Earth,False,F/0/S,TRAPPIST-1e,24.0,False,109.0,9.0,25.0,549.0,44.0,Juanna Vines,True
2,0003_01,Europa,False,A/0/S,TRAPPIST-1e,58.0,True,43.0,3576.0,0.0,6715.0,49.0,Altark Susent,False
3,0003_02,Europa,False,A/0/S,TRAPPIST-1e,33.0,False,0.0,1283.0,371.0,3329.0,193.0,Solam Susent,False
4,0004_01,Earth,False,F/1/S,TRAPPIST-1e,16.0,False,303.0,70.0,151.0,565.0,2.0,Willy Santantines,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8688,9276_01,Europa,False,A/98/P,55 Cancri e,41.0,True,0.0,6819.0,0.0,1643.0,74.0,Gravior Noxnuther,False
8689,9278_01,Earth,True,G/1499/S,PSO J318.5-22,18.0,False,0.0,0.0,0.0,0.0,0.0,Kurta Mondalley,False
8690,9279_01,Earth,False,G/1500/S,TRAPPIST-1e,26.0,False,0.0,0.0,1872.0,1.0,0.0,Fayey Connon,True
8691,9280_01,Europa,False,E/608/S,55 Cancri e,32.0,False,0.0,1049.0,0.0,353.0,3235.0,Celeon Hontichre,False


## Funções

In [97]:
def acuracia_modelo(algoritmo):
    
    modelo = algoritmo()
    modelo.fit(train_x,train_y)
    previsoes = modelo.predict(test_x)

    acuracia = accuracy_score(test_y, previsoes).round(3) * 100
    return acuracia


## 1 - ML Aleatório - Teste

In [74]:
SEED = 135

In [75]:
spaceship_copia = spaceship.copy()
spaceship_copia.dropna(inplace=True)
spaceship_copia.shape

(6606, 14)

In [76]:
x = spaceship_copia.iloc[:,:-1]
y = spaceship_copia.iloc[:,-1]

In [77]:
df = pd.get_dummies(x, columns=['HomePlanet', 'CryoSleep', 'Cabin', 'Destination','VIP','Name'])
df

Unnamed: 0,PassengerId,Age,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,HomePlanet_Earth,HomePlanet_Europa,HomePlanet_Mars,...,Name_Zinonon Veriedeezy,Name_Zinopus Spandisket,Name_Zinor Axlentindy,Name_Zinor Proorbeng,Name_Zinoth Lansuffle,Name_Zosmark Trattle,Name_Zosmark Unaasor,Name_Zosmas Ineedeve,Name_Zosmas Mormonized,Name_Zubeneb Pasharne
0,0001_01,39.0,0.0,0.0,0.0,0.0,0.0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1,0002_01,24.0,109.0,9.0,25.0,549.0,44.0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0003_01,58.0,43.0,3576.0,0.0,6715.0,49.0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,0003_02,33.0,0.0,1283.0,371.0,3329.0,193.0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
4,0004_01,16.0,303.0,70.0,151.0,565.0,2.0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8688,9276_01,41.0,0.0,6819.0,0.0,1643.0,74.0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
8689,9278_01,18.0,0.0,0.0,0.0,0.0,0.0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
8690,9279_01,26.0,0.0,0.0,1872.0,1.0,0.0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
8691,9280_01,32.0,0.0,1049.0,0.0,353.0,3235.0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [78]:
train_x,test_x,train_y,test_y = train_test_split(df,y,test_size=0.25, random_state=SEED)
print(train_x.shape)
print(train_y.shape)

(4954, 11912)
(4954,)


In [124]:
comparacao_modelos = pd.DataFrame(
    {'modelos':
        ['DummyClassifier',
         'LinearSVC',
         'DecisionTreeClassifier',
         'RandomForestClassifier',
         'ExtraTreesClassifier',
         'BaggingClassifier'],
    'acuracia':
        [acuracia_modelo(DummyClassifier),
         acuracia_modelo(LinearSVC),
         acuracia_modelo(DecisionTreeClassifier),
         acuracia_modelo(RandomForestClassifier),
         acuracia_modelo(ExtraTreesClassifier),
         acuracia_modelo(BaggingClassifier)]}
    )
comparacao_modelos



Unnamed: 0,modelos,acuracia
0,DummyClassifier,50.4
1,LinearSVC,58.1
2,DecisionTreeClassifier,76.9
3,RandomForestClassifier,78.7
4,ExtraTreesClassifier,78.0
5,BaggingClassifier,78.8


In [126]:
comparacao_modelos.sort_values(by='acuracia',ascending=False)

Unnamed: 0,modelos,acuracia
5,BaggingClassifier,78.8
3,RandomForestClassifier,78.7
4,ExtraTreesClassifier,78.0
2,DecisionTreeClassifier,76.9
1,LinearSVC,58.1
0,DummyClassifier,50.4
