# SPACE SHIP TITANIC PREDICTION

### IMPORTING LIBRARIES

In [None]:
import numpy as np 
import pandas as pd

### READ TRAIN DATASET

In [None]:
train=pd.read_csv('/kaggle/input/spaceship-titanic/train.csv')
train.head()

# 1. DATA CLEANING

## FOR TRAIN DATASET

In [None]:
train.shape

In [None]:
train.info()

### REMOVE UNWANTED COLUMNS

In [None]:
train.drop(['RoomService','FoodCourt','ShoppingMall','Spa','VRDeck','Name'],axis=1,inplace=True)

In [None]:
train.head()

### CHECK FOR NULL VALUES

In [None]:
train.isnull().sum()

In [None]:
train.replace('-','nan')
train.replace('na','nan')

In [None]:
train.isnull().sum()

### FILLING THE NULL VALUES

In [None]:
train['HomePlanet'].fillna(train['HomePlanet'].mode()[0],inplace=True)

In [None]:
train['CryoSleep'].fillna(train['CryoSleep'].mode()[0],inplace=True)

#### Cabin is in the format of deck/num/side so split separately and fill the null values

In [None]:
train[['Deck', 'Num','Side']] = train['Cabin'].str.split('/', expand=True)

In [None]:
train=train.drop(['Cabin'],axis=1)
train.head()

In [None]:
train['Deck'].fillna(train['Deck'].mode()[0],inplace=True)

In [None]:
train['Num'].fillna(train['Num'].mode()[0],inplace=True)

In [None]:
train['Num'] = train['Num'].astype(int)

In [None]:
train['Side'].fillna(train['Side'].mode()[0],inplace=True)

In [None]:
train['Destination'].fillna(train['Destination'].mode()[0],inplace=True)

In [None]:
train['Age'].mean()

In [None]:
train['Age'].median()

In [None]:
train['Age'].fillna(train['Age'].mean(),inplace=True)

In [None]:
train['VIP'].fillna(train['VIP'].mode()[0],inplace=True)

In [None]:
train.isnull().sum()

### CHECK FOR DUPLICATES

In [None]:
train.duplicated().sum()

### CHECK SKEWNESS

In [None]:
train.skew()

### REMOVE SKEWNESS

In [None]:
train['Num']=np.sqrt(train['Num'])
train['Num'].skew()

### REPLACING CATEGORICAL VALUES TO NUMERICAL VALUES

In [None]:
train['VIP'].replace({True:1,False:0},inplace=True)

In [None]:
train['CryoSleep'].replace({True:1,False:0},inplace=True)

In [None]:
train['HomePlanet'].unique()

In [None]:
train['HomePlanet'].replace({'Europa':0,'Earth':1,'Mars':2},inplace=True)

In [None]:
train['Destination'].unique()

In [None]:
train['Destination'].replace({'TRAPPIST-1e':0,'PSO J318.5-22':1,'55 Cancri e':2},inplace=True)

In [None]:
train['Side'].unique()

In [None]:
train['Side'].replace({'P':1,'S':0},inplace=True)

In [None]:
train['Deck'].unique()

In [None]:
train['Deck'].replace({'A':0,'B':1,'C':2,'D':3,'E':4,'F':5,'G':6,'T':7},inplace=True)

In [None]:
train.head()

#  FIND CORRELATION FOR TRAIN DATASET

In [None]:
train.corr()

### DROP BAD CORRELATION COLUMNS

In [None]:
train.drop(['VIP',"Num"],axis=1,inplace=True)

In [None]:
train.head()

### SPLIT VALUES INTO x_train and y_train

In [None]:
x_train=train.drop('Transported',axis=1)
y_train=train['Transported']

In [None]:
x_train

In [None]:
y_train

## READ TEST DATASET

In [None]:
test=pd.read_csv('/kaggle/input/spaceship-titanic/test.csv')
test.head()

## FOR TEST DATASET

In [None]:
test.shape

In [None]:
test.info()

### REMOVE UNWANTED COLUMNS

In [None]:
test.drop(['RoomService','FoodCourt','ShoppingMall','Spa','VRDeck','Name','VIP'],axis=1,inplace=True)

In [None]:
test.head()

### CHECK FOR NULL VALUES

In [None]:
test.isnull().sum()

In [None]:
test.replace('-','nan')
test.replace('na','nan')

In [None]:
test.isnull().sum()

### FILLING NULL VALUES

In [None]:
test['HomePlanet'].fillna(test['HomePlanet'].mode()[0],inplace=True)

In [None]:
test['CryoSleep'].fillna(test['CryoSleep'].mode()[0],inplace=True)

#### Cabin is in the format of deck/num/side so split separately and fill the null values

In [None]:
test[['Deck', 'Num','Side']] = test['Cabin'].str.split('/', expand=True)

In [None]:
test.drop(['Cabin'],axis=1,inplace=True)
test.head()

In [None]:
test['Deck'].fillna(test['Deck'].mode()[0],inplace=True)

In [None]:
test.drop(['Num'],axis=1,inplace=True)

In [None]:
test['Side'].fillna(test['Side'].mode()[0],inplace=True)

In [None]:
test['Destination'].fillna(test['Destination'].mode()[0],inplace=True)

In [None]:
test['Age'].mean()

In [None]:
test['Age'].median()

In [None]:
test['Age'].fillna(test['Age'].mean(),inplace=True)

### CHECK FOR SKEWNESS

In [None]:
test.skew()

### REPLACING CATEGORICAL VALUES TO NUMERICAL VALUES

In [None]:
test['CryoSleep'].replace({True:1,False:0},inplace=True)
test.head()

In [None]:
test["HomePlanet"].unique()

In [None]:
test['HomePlanet'].replace({'Europa':0,'Earth':1,'Mars':2},inplace=True)

In [None]:
test["Destination"].unique()

In [None]:
test['Destination'].replace({'TRAPPIST-1e':0,'PSO J318.5-22':1,'55 Cancri e':2},inplace=True)

In [None]:
test["Deck"].unique()

In [None]:
test['Deck'].replace({'A':0,'B':1,'C':2,'D':3,'E':4,'F':5,'G':6,'T':7},inplace=True)

In [None]:
test['Side'].unique()

In [None]:
test['Side'].replace({"P":1,'S':0},inplace=True)
test.head()

In [None]:
test.isnull().sum()

### SPLIT DATA FOR x_test

In [None]:
x_test=test.iloc[:,0:]

# FEATURES ENGINEERING

### STANDARDISATION OF x_train and y_train

In [None]:
from sklearn.preprocessing import StandardScaler
sc_x=StandardScaler()
x_train=sc_x.fit_transform(x_train)
x_test=sc_x.transform(x_test)

### TRAIN THE DATA USING SVM ALGORITHM

In [None]:
from sklearn.svm import SVC
model=SVC(kernel='rbf',random_state=0)
model.fit(x_train,y_train)

### EVALUATION

In [None]:
y_test=model.predict(x_test)

In [None]:
y_test

### CONVERTING THE Y_TEST VALUE TO DATA FRAME

In [None]:
data=pd.DataFrame(y_test,columns=['Transported'])

In [None]:
data.head()

In [None]:
data1=pd.DataFrame(test.PassengerId)

In [None]:
data1.head()

In [None]:
result=pd.concat([data1,data],axis=1)
result

### CONVERTING TO CSV FILE

In [None]:
result.to_csv('kagglespaceshiptitanic.csv',index=False)