# Space Travel Prediction using pipeline

In [129]:
# Libraries
import numpy as np
import pandas as pd #Loading the data
from sklearn.compose import ColumnTransformer # Data cleaning
from sklearn.impute import SimpleImputer # Data cleaning 
from sklearn.preprocessing import StandardScaler,OneHotEncoder # Data cleaning
from sklearn.feature_selection import RFE # Data cleanung
from sklearn.model_selection import train_test_split # Data partotion 
from sklearn.linear_model import LogisticRegression # Model Building 
from sklearn import metrics # Model evaluation 
from sklearn.pipeline import Pipeline # Pipeline Creation
from sklearn import set_config
set_config(display="diagram")
import warnings
warnings.filterwarnings("ignore")

# Steps

1) Loading the data in the form of csv

2) Data cleaning (Missing Values, Categorical features, Feature Selction and Units)

3) Model buidling 

4) PipeLine Creation

5) Performance evaluation and deployment 

# Loading the data

In [130]:
# Loading the data
df=pd.read_csv("Space_Travel_data.csv")
df.head()

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,Transported
0,0001_01,Europa,False,B/0/P,TRAPPIST-1e,39.0,False,0.0,0.0,0.0,0.0,0.0,Maham Ofracculy,False
1,0002_01,Earth,False,F/0/S,TRAPPIST-1e,24.0,False,109.0,9.0,25.0,549.0,44.0,Juanna Vines,True
2,0003_01,Europa,False,A/0/S,TRAPPIST-1e,58.0,True,43.0,3576.0,0.0,6715.0,49.0,Altark Susent,False
3,0003_02,Europa,False,A/0/S,TRAPPIST-1e,33.0,False,0.0,1283.0,371.0,3329.0,193.0,Solam Susent,False
4,0004_01,Earth,False,F/1/S,TRAPPIST-1e,16.0,False,303.0,70.0,151.0,565.0,2.0,Willy Santantines,True


In [131]:
# Dropping the unwanted columns after prilimnary observation
df=df.drop(["PassengerId","Cabin","Name"],axis=1)
df.head()

Unnamed: 0,HomePlanet,CryoSleep,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Transported
0,Europa,False,TRAPPIST-1e,39.0,False,0.0,0.0,0.0,0.0,0.0,False
1,Earth,False,TRAPPIST-1e,24.0,False,109.0,9.0,25.0,549.0,44.0,True
2,Europa,False,TRAPPIST-1e,58.0,True,43.0,3576.0,0.0,6715.0,49.0,False
3,Europa,False,TRAPPIST-1e,33.0,False,0.0,1283.0,371.0,3329.0,193.0,False
4,Earth,False,TRAPPIST-1e,16.0,False,303.0,70.0,151.0,565.0,2.0,True


# Data Cleaning 

In [132]:
# Missing values record
df.isnull().sum()

HomePlanet      201
CryoSleep       217
Destination     182
Age             179
VIP             203
RoomService     181
FoodCourt       183
ShoppingMall    208
Spa             183
VRDeck          188
Transported       0
dtype: int64

In [133]:
# Record of categorical features
df_cat=df.select_dtypes(exclude=np.number)
df_cat.columns

Index(['HomePlanet', 'CryoSleep', 'Destination', 'VIP', 'Transported'], dtype='object')

In [134]:
# Treatemt of target column is always done using mapping 
df["Transported"]=df["Transported"].astype("string")
df["Transported"]=df["Transported"].map({"True":1,"False":0})
df

Unnamed: 0,HomePlanet,CryoSleep,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Transported
0,Europa,False,TRAPPIST-1e,39.0,False,0.0,0.0,0.0,0.0,0.0,0
1,Earth,False,TRAPPIST-1e,24.0,False,109.0,9.0,25.0,549.0,44.0,1
2,Europa,False,TRAPPIST-1e,58.0,True,43.0,3576.0,0.0,6715.0,49.0,0
3,Europa,False,TRAPPIST-1e,33.0,False,0.0,1283.0,371.0,3329.0,193.0,0
4,Earth,False,TRAPPIST-1e,16.0,False,303.0,70.0,151.0,565.0,2.0,1
...,...,...,...,...,...,...,...,...,...,...,...
8688,Europa,False,55 Cancri e,41.0,True,0.0,6819.0,0.0,1643.0,74.0,0
8689,Earth,True,PSO J318.5-22,18.0,False,0.0,0.0,0.0,0.0,0.0,0
8690,Earth,False,TRAPPIST-1e,26.0,False,0.0,0.0,1872.0,1.0,0.0,1
8691,Europa,False,55 Cancri e,32.0,False,0.0,1049.0,0.0,353.0,3235.0,0


In [135]:
# Treament of missing values
clean_missing_values=ColumnTransformer(transformers=[("HomePlanet",SimpleImputer(strategy="most_frequent"),[0]),("CryoSleep",SimpleImputer(strategy="most_frequent"),[1]),("Destination",SimpleImputer(strategy="most_frequent"),[2]),("Age",SimpleImputer(),[3]),("VIP",SimpleImputer(strategy="most_frequent"),[4]),("RoomService",SimpleImputer(),[5]),("FoodCourt",SimpleImputer(),[6]),("ShoppingMall",SimpleImputer(),[7]),("Spa",SimpleImputer(),[8]),("VRDeck",SimpleImputer(),[9])],remainder="passthrough")

# Treatment of Categorical featues
clean_categorical_treament=ColumnTransformer(transformers=[("HomePlanet",OneHotEncoder(),[0]),("CryoSleep",OneHotEncoder(),[1]),("Destination",OneHotEncoder(),[2]),("VIP",OneHotEncoder(),[3])],remainder="passthrough")


# Feature Scaling
clean_feature_scaling=ColumnTransformer(transformers=[("Scale_features",StandardScaler(with_mean=False),[0,9])],remainder="passthrough")

# Model Building 

In [136]:
# Data Partition 
X=df.drop("Transported",axis=1)
y=df["Transported"]
X_train,X_test,y_train,y_test=train_test_split(X,y)

# Model
model=LogisticRegression()

# Pipeline Creation 

In [137]:
# Pipeline
pipe=Pipeline([("Step1",clean_missing_values),("Step2",clean_categorical_treament),("Step3",clean_feature_scaling),("Step4",model)])

In [138]:
# Visulization of pipeline
pipe.fit(X_train,y_train)

# Performance Evaluation

In [139]:
# Performance Evauation 
predict=pipe.predict(X_test)
print("Accuracy of classification is",metrics.accuracy_score(y_test,predict))

Accuracy of classification is 0.7861085556577737


# Thank You 