# Treinamento de Modelos

- Linear Models (MMQ)
- Support Vector Machines
- KNN
- Naive Bayes
- Decision Trees
- Ensamble methods (Random florest)

### **IMPORTS**

In [1]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.linear_model import LinearRegression
from sklearn import svm
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier

In [2]:
mod=pd.read_csv('Data/Train_final.csv')
mod

Unnamed: 0.1,Unnamed: 0,PassengerId,Age,Fare,Survived,Sex,SibSp,Parch,Pclass1,Pclass2,Pclass3,Cherbourg,Queenstow,Southampton
0,0,-1.730108,-0.592481,-0.502445,0,0,1,0,0,0,1,0,0,1
1,1,-1.726220,0.638789,0.786845,1,1,1,0,1,0,0,1,0,0
2,2,-1.722332,-0.284663,-0.488854,1,1,0,0,0,0,1,0,0,1
3,3,-1.718444,0.407926,0.420730,1,1,1,0,1,0,0,0,0,1
4,4,-1.714556,0.407926,-0.486337,0,0,0,0,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1093,1093,-0.153521,1.201607,-0.413418,1,1,0,0,0,1,0,0,0,1
1094,1094,1.035713,-0.266525,-0.415634,1,1,0,0,0,1,0,0,0,1
1095,1095,-1.009735,-0.319104,-0.492480,1,1,0,0,0,0,1,0,1,0
1096,1096,1.089412,0.183312,3.718377,1,1,0,0,1,0,0,0,1,0


In [3]:
mod.shape

(1098, 14)

### **SEPARAÇÃO DE DADOS**

In [4]:
X = mod.drop(columns=['Survived'])
y = mod['Survived']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2 ,random_state=12)

In [5]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(878, 13) (220, 13) (878,) (220,)


### **Linear Models (MMQ)**

In [6]:
mmq = LinearRegression()
mmq.fit(X=X_train, y=y_train) # dados de treino
y_pred = mmq.predict(X=X_test) # teste do modelo

In [7]:
for i in range(len(y_pred)):
    if y_pred[i] > 0.5:
        y_pred[i] = 1
        continue
    y_pred[i] = 0

In [8]:
confusion_matrix(y_true=y_test, y_pred=y_pred)

array([[83, 17],
       [24, 96]])

In [9]:
print(classification_report(y_true=y_test, y_pred=y_pred))

              precision    recall  f1-score   support

           0       0.78      0.83      0.80       100
           1       0.85      0.80      0.82       120

    accuracy                           0.81       220
   macro avg       0.81      0.81      0.81       220
weighted avg       0.82      0.81      0.81       220



### **SVM**

In [10]:
svm = svm.SVC()
svm.fit(X=X_train, y=y_train)
y_pred = svm.predict(X=X_test)

In [11]:
confusion_matrix(y_true=y_test, y_pred=y_pred)

array([[94,  6],
       [72, 48]])

In [12]:
print(classification_report(y_true=y_test, y_pred=y_pred))

              precision    recall  f1-score   support

           0       0.57      0.94      0.71       100
           1       0.89      0.40      0.55       120

    accuracy                           0.65       220
   macro avg       0.73      0.67      0.63       220
weighted avg       0.74      0.65      0.62       220



### **KNN**

In [13]:
knn = KNeighborsClassifier()
knn.fit(X=X_train, y=y_train)
y_pred = knn.predict(X=X_test)

In [14]:
confusion_matrix(y_true=y_test, y_pred=y_pred)

array([[72, 28],
       [57, 63]])

In [15]:
print(classification_report(y_true=y_test, y_pred=y_pred))

              precision    recall  f1-score   support

           0       0.56      0.72      0.63       100
           1       0.69      0.53      0.60       120

    accuracy                           0.61       220
   macro avg       0.63      0.62      0.61       220
weighted avg       0.63      0.61      0.61       220



### **Naive Bayes**


In [16]:
gnb = GaussianNB()
gnb.fit(X=X_train, y=y_train)
y_pred = gnb.predict(X=X_test)

In [17]:
confusion_matrix(y_true=y_test, y_pred=y_pred)

array([[84, 16],
       [29, 91]])

In [18]:
print(classification_report(y_true=y_test, y_pred=y_pred))

              precision    recall  f1-score   support

           0       0.74      0.84      0.79       100
           1       0.85      0.76      0.80       120

    accuracy                           0.80       220
   macro avg       0.80      0.80      0.80       220
weighted avg       0.80      0.80      0.80       220



### **Decision Trees**

In [19]:
dtree = tree.DecisionTreeClassifier()
dtree.fit(X=X_train, y=y_train)
y_pred = dtree.predict(X=X_test)

In [20]:
confusion_matrix(y_true=y_test, y_pred=y_pred)

array([[74, 26],
       [27, 93]])

In [21]:
print(classification_report(y_true=y_test, y_pred=y_pred))

              precision    recall  f1-score   support

           0       0.73      0.74      0.74       100
           1       0.78      0.78      0.78       120

    accuracy                           0.76       220
   macro avg       0.76      0.76      0.76       220
weighted avg       0.76      0.76      0.76       220



### **Ensamble methods (Random florest)**

In [22]:
emrf = RandomForestClassifier()
emrf.fit(X=X_train,y=y_train)
y_pred = emrf.predict(X=X_test)

In [23]:
confusion_matrix(y_true=y_test, y_pred=y_pred)

array([[86, 14],
       [25, 95]])

In [24]:
print(classification_report(y_true=y_test, y_pred=y_pred))

              precision    recall  f1-score   support

           0       0.77      0.86      0.82       100
           1       0.87      0.79      0.83       120

    accuracy                           0.82       220
   macro avg       0.82      0.83      0.82       220
weighted avg       0.83      0.82      0.82       220

