Skip to content

Commit

Permalink
initial Data preparation is done
Browse files Browse the repository at this point in the history
  • Loading branch information
LordSomen committed Jul 1, 2018
1 parent 843301e commit a091482
Showing 1 changed file with 53 additions and 5 deletions.
58 changes: 53 additions & 5 deletions Kaggle/Titanic/titanic.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# %%
import pandas as pd

passenger_train = pd.read_csv("train.csv", sep=",")
path = r"/home/soumyajit/Codes/DL/100DaysOfML/Kaggle/Titanic/train.csv"
passenger_train = pd.read_csv(path, sep=",")
passenger_train

# %%
passenger_test = pd.read_csv("test.csv")
path = r"/home/soumyajit/Codes/DL/100DaysOfML/Kaggle/Titanic/test.csv"
passenger_test = pd.read_csv(path)
passenger_test

# %%
Expand Down Expand Up @@ -53,8 +54,7 @@

# %%
passenger_train_var = passenger_train
passenger_train_var["Age_Per_Fare"] = passenger_train_var["Age"]
/passenger_train_var["Fare"]
passenger_train_var["Age_Per_Fare"] = passenger_train_var["Age"]/passenger_train_var["Fare"]
passenger_train_var.plot(kind="scatter", x="Survived", y="Age_Per_Fare",
alpha=0.1, figsize=(15, 12))

Expand All @@ -68,4 +68,52 @@
passenger_train_label

#%%
passenger_train_var = passenger_train_var.drop("Name",axis = 1)
passenger_train_var = passenger_train_var.drop("Cabin",axis = 1)
passenger_train_var = passenger_train_var.drop("Ticket",axis = 1)
passenger_train_var = passenger_train_var.drop("PassengerId",axis = 1)
passenger_train_var = passenger_train_var.drop("Embarked",axis = 1)
passenger_train_var = passenger_train_var.drop("Age_Per_Fare",axis = 1)

passenger_train_var_num = passenger_train_var.drop("Sex",axis = 1)
# passenger_train_var_num = passenger_train_var_num.drop("Embarked",axis = 1)
passenger_train_var_num

#%%
from sklearn.base import BaseEstimator, TransformerMixin
class DataFrameSelector(BaseEstimator, TransformerMixin):
def __init__(self, attribute_names):
self.attribute_names = attribute_names
def fit(self, X, y=None):
return self
def transform(self, X):
return X[self.attribute_names].values
#%%
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import Imputer
from sklearn.pipeline import FeatureUnion
from sklearn.preprocessing import LabelBinarizer

num_attribs = list(passenger_train_var_num)
cat_attribs = ["Sex"]
num_pipeline = Pipeline([
('selector', DataFrameSelector(num_attribs)),
('imputer', Imputer(strategy="median")),
('std_scaler', StandardScaler()),
])
cat_pipeline = Pipeline([
('selector', DataFrameSelector(cat_attribs)),
('label_binarizer', LabelBinarizer()),
])
full_pipeline = FeatureUnion(transformer_list=[
("num_pipeline", num_pipeline),
("cat_pipeline", cat_pipeline),
])
full_pipeline

#%%
passenger_train_var
#%%
passenger_prepared = full_pipeline.fit_transform(passenger_train_var)
passenger_prepared

0 comments on commit a091482

Please sign in to comment.