In [31]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import classification_report , mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.decomposition import PCA

In [2]:
train_data = pd.read_csv('../Desktop/anaconda3/train.csv')

In [3]:
train_data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [4]:
one_hot_encoder = OneHotEncoder()

In [5]:
train_data['Sex'].unique()

array(['male', 'female'], dtype=object)

In [6]:
train_data['Embarked'].unique()

array(['S', 'C', 'Q', nan], dtype=object)

In [7]:
train_data.isna().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

In [8]:
train_data.fillna(value = RandomForestRegressor , inplace = True)

In [9]:
train_data['Sex'].value_counts()

male      577
female    314
Name: Sex, dtype: int64

In [10]:
train_data['Embarked'].value_counts()

S                                                           644
C                                                           168
Q                                                            77
<class 'sklearn.ensemble._forest.RandomForestRegressor'>      2
Name: Embarked, dtype: int64

In [11]:
train_data.isna().sum()

PassengerId    0
Survived       0
Pclass         0
Name           0
Sex            0
Age            0
SibSp          0
Parch          0
Ticket         0
Fare           0
Cabin          0
Embarked       0
dtype: int64

In [12]:
train_data.sample(10)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
820,821,1,1,"Hays, Mrs. Charles Melville (Clara Jennings Gr...",female,52.0,1,1,12749,93.5,B69,S
615,616,1,2,"Herman, Miss. Alice",female,24.0,1,2,220845,65.0,<class 'sklearn.ensemble._forest.RandomForestR...,S
635,636,1,2,"Davis, Miss. Mary",female,28.0,0,0,237668,13.0,<class 'sklearn.ensemble._forest.RandomForestR...,S
451,452,0,3,"Hagland, Mr. Ingvald Olai Olsen",male,<class 'sklearn.ensemble._forest.RandomForestR...,1,0,65303,19.9667,<class 'sklearn.ensemble._forest.RandomForestR...,S
650,651,0,3,"Mitkoff, Mr. Mito",male,<class 'sklearn.ensemble._forest.RandomForestR...,0,0,349221,7.8958,<class 'sklearn.ensemble._forest.RandomForestR...,S
16,17,0,3,"Rice, Master. Eugene",male,2.0,4,1,382652,29.125,<class 'sklearn.ensemble._forest.RandomForestR...,Q
613,614,0,3,"Horgan, Mr. John",male,<class 'sklearn.ensemble._forest.RandomForestR...,0,0,370377,7.75,<class 'sklearn.ensemble._forest.RandomForestR...,Q
269,270,1,1,"Bissette, Miss. Amelia",female,35.0,0,0,PC 17760,135.6333,C99,S
444,445,1,3,"Johannesen-Bratthammer, Mr. Bernt",male,<class 'sklearn.ensemble._forest.RandomForestR...,0,0,65306,8.1125,<class 'sklearn.ensemble._forest.RandomForestR...,S
172,173,1,3,"Johnson, Miss. Eleanor Ileen",female,1.0,1,1,347742,11.1333,<class 'sklearn.ensemble._forest.RandomForestR...,S


In [13]:
mapper = {
    'male' : 0 ,
    'female' :1
}

train_data['Sex'] = train_data['Sex'].map(mapper)

In [14]:
train_data['Sex'].unique()

array([0, 1])

In [15]:
mapper_embarked = {
    'S' : 0,
    'C' : 1,
    'Q' : 2,
}

train_data['Embarked'] = train_data['Embarked'].map(mapper_embarked)

In [16]:
train_data['Embarked'].unique()

array([ 0.,  1.,  2., nan])

In [17]:
train_data['Cabin'].value_counts()

<class 'sklearn.ensemble._forest.RandomForestRegressor'>    687
C23 C25 C27                                                   4
G6                                                            4
B96 B98                                                       4
C22 C26                                                       3
                                                           ... 
E34                                                           1
C7                                                            1
C54                                                           1
E36                                                           1
C148                                                          1
Name: Cabin, Length: 148, dtype: int64

In [18]:
train_data.sample(10)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
333,334,0,3,"Vander Planke, Mr. Leo Edmondus",0,16.0,2,0,345764,18.0,<class 'sklearn.ensemble._forest.RandomForestR...,0.0
145,146,0,2,"Nicholls, Mr. Joseph Charles",0,19.0,1,1,C.A. 33112,36.75,<class 'sklearn.ensemble._forest.RandomForestR...,0.0
484,485,1,1,"Bishop, Mr. Dickinson H",0,25.0,1,0,11967,91.0792,B49,1.0
286,287,1,3,"de Mulder, Mr. Theodore",0,30.0,0,0,345774,9.5,<class 'sklearn.ensemble._forest.RandomForestR...,0.0
134,135,0,2,"Sobey, Mr. Samuel James Hayden",0,25.0,0,0,C.A. 29178,13.0,<class 'sklearn.ensemble._forest.RandomForestR...,0.0
107,108,1,3,"Moss, Mr. Albert Johan",0,<class 'sklearn.ensemble._forest.RandomForestR...,0,0,312991,7.775,<class 'sklearn.ensemble._forest.RandomForestR...,0.0
336,337,0,1,"Pears, Mr. Thomas Clinton",0,29.0,1,0,113776,66.6,C2,0.0
233,234,1,3,"Asplund, Miss. Lillian Gertrud",1,5.0,4,2,347077,31.3875,<class 'sklearn.ensemble._forest.RandomForestR...,0.0
674,675,0,2,"Watson, Mr. Ennis Hastings",0,<class 'sklearn.ensemble._forest.RandomForestR...,0,0,239856,0.0,<class 'sklearn.ensemble._forest.RandomForestR...,0.0
90,91,0,3,"Christmann, Mr. Emil",0,29.0,0,0,343276,8.05,<class 'sklearn.ensemble._forest.RandomForestR...,0.0


In [19]:
train_data['SibSp'].unique()

array([1, 0, 3, 4, 2, 5, 8])

In [20]:
train_data['SibSp'].value_counts()

0    608
1    209
2     28
4     18
3     16
8      7
5      5
Name: SibSp, dtype: int64

In [21]:
train_data.sample(10)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
633,634,0,1,"Parr, Mr. William Henry Marsh",0,<class 'sklearn.ensemble._forest.RandomForestR...,0,0,112052,0.0,<class 'sklearn.ensemble._forest.RandomForestR...,0.0
655,656,0,2,"Hickman, Mr. Leonard Mark",0,24.0,2,0,S.O.C. 14879,73.5,<class 'sklearn.ensemble._forest.RandomForestR...,0.0
748,749,0,1,"Marvin, Mr. Daniel Warner",0,19.0,1,0,113773,53.1,D30,0.0
839,840,1,1,"Marechal, Mr. Pierre",0,<class 'sklearn.ensemble._forest.RandomForestR...,0,0,11774,29.7,C47,1.0
315,316,1,3,"Nilsson, Miss. Helmina Josefina",1,26.0,0,0,347470,7.8542,<class 'sklearn.ensemble._forest.RandomForestR...,0.0
366,367,1,1,"Warren, Mrs. Frank Manley (Anna Sophia Atkinson)",1,60.0,1,0,110813,75.25,D37,1.0
804,805,1,3,"Hedman, Mr. Oskar Arvid",0,27.0,0,0,347089,6.975,<class 'sklearn.ensemble._forest.RandomForestR...,0.0
253,254,0,3,"Lobb, Mr. William Arthur",0,30.0,1,0,A/5. 3336,16.1,<class 'sklearn.ensemble._forest.RandomForestR...,0.0
819,820,0,3,"Skoog, Master. Karl Thorsten",0,10.0,3,2,347088,27.9,<class 'sklearn.ensemble._forest.RandomForestR...,0.0
330,331,1,3,"McCoy, Miss. Agnes",1,<class 'sklearn.ensemble._forest.RandomForestR...,2,0,367226,23.25,<class 'sklearn.ensemble._forest.RandomForestR...,2.0


In [22]:
train_data.columns

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

In [23]:
train_data.drop(['PassengerId' , 'Name' , 'Ticket' , 'Cabin'] ,axis = 1 , inplace = True)

In [24]:
y = train_data['Survived']
x = train_data.drop(['Survived'] , axis = 1)

In [25]:
x[:5]

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,3,0,22.0,1,0,7.25,0.0
1,1,1,38.0,1,0,71.2833,1.0
2,3,1,26.0,0,0,7.925,0.0
3,1,1,35.0,1,0,53.1,0.0
4,3,0,35.0,0,0,8.05,0.0


In [26]:
x_train , x_test , y_train , y_test = train_test_split(x , y ,
                                                      test_size = 0.2)

In [27]:
x_train[:5]

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
825,3,0,<class 'sklearn.ensemble._forest.RandomForestR...,0,0,6.95,2.0
234,2,0,24.0,0,0,10.5,0.0
77,3,0,<class 'sklearn.ensemble._forest.RandomForestR...,0,0,8.05,0.0
605,3,0,36.0,1,0,15.55,0.0
47,3,1,<class 'sklearn.ensemble._forest.RandomForestR...,0,0,7.75,2.0


In [29]:
dtree = DecisionTreeRegressor()
rand_forest = RandomForestRegressor()
knn = KNeighborsRegressor(n_neighbors = 7 , n_jobs = -1)

In [33]:
x_train

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
825,3,0,<class 'sklearn.ensemble._forest.RandomForestR...,0,0,6.9500,2.0
234,2,0,24.0,0,0,10.5000,0.0
77,3,0,<class 'sklearn.ensemble._forest.RandomForestR...,0,0,8.0500,0.0
605,3,0,36.0,1,0,15.5500,0.0
47,3,1,<class 'sklearn.ensemble._forest.RandomForestR...,0,0,7.7500,2.0
...,...,...,...,...,...,...,...
593,3,1,<class 'sklearn.ensemble._forest.RandomForestR...,0,2,7.7500,2.0
407,2,0,3.0,1,1,18.7500,0.0
37,3,0,21.0,0,0,8.0500,0.0
297,1,1,2.0,1,2,151.5500,0.0


In [30]:
dtree.fit(x_train , y_train)

TypeError: float() argument must be a string or a real number, not 'ABCMeta'

In [None]:
rand_forest.fit(x_train , y_train)

In [None]:
    train_data[train_data['Survived'] == 'ABCMeta']