Importing the dependencies

In [438]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler


Importing the data

In [393]:
data = pd.read_csv(r'Datasets\train.csv')
data

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,Transported
0,0001_01,Europa,False,B/0/P,TRAPPIST-1e,39.0,False,0.0,0.0,0.0,0.0,0.0,Maham Ofracculy,False
1,0002_01,Earth,False,F/0/S,TRAPPIST-1e,24.0,False,109.0,9.0,25.0,549.0,44.0,Juanna Vines,True
2,0003_01,Europa,False,A/0/S,TRAPPIST-1e,58.0,True,43.0,3576.0,0.0,6715.0,49.0,Altark Susent,False
3,0003_02,Europa,False,A/0/S,TRAPPIST-1e,33.0,False,0.0,1283.0,371.0,3329.0,193.0,Solam Susent,False
4,0004_01,Earth,False,F/1/S,TRAPPIST-1e,16.0,False,303.0,70.0,151.0,565.0,2.0,Willy Santantines,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8688,9276_01,Europa,False,A/98/P,55 Cancri e,41.0,True,0.0,6819.0,0.0,1643.0,74.0,Gravior Noxnuther,False
8689,9278_01,Earth,True,G/1499/S,PSO J318.5-22,18.0,False,0.0,0.0,0.0,0.0,0.0,Kurta Mondalley,False
8690,9279_01,Earth,False,G/1500/S,TRAPPIST-1e,26.0,False,0.0,0.0,1872.0,1.0,0.0,Fayey Connon,True
8691,9280_01,Europa,False,E/608/S,55 Cancri e,32.0,False,0.0,1049.0,0.0,353.0,3235.0,Celeon Hontichre,False


Data preprocessing

In [394]:
data.isnull().sum()

PassengerId       0
HomePlanet      201
CryoSleep       217
Cabin           199
Destination     182
Age             179
VIP             203
RoomService     181
FoodCourt       183
ShoppingMall    208
Spa             183
VRDeck          188
Name            200
Transported       0
dtype: int64

In [395]:
data['HomePlanet'].value_counts()

HomePlanet
Earth     4602
Europa    2131
Mars      1759
Name: count, dtype: int64

In [396]:
data['Destination'].value_counts()

Destination
TRAPPIST-1e      5915
55 Cancri e      1800
PSO J318.5-22     796
Name: count, dtype: int64

In [397]:
# Vectorizing the categorical values
data.replace({'HomePlanet':{'Earth':0,'Europa':1,'Mars':2},'Destination':{'TRAPPIST-1e':0,'55 Cancri e':1,"PSO J318.5-22":2},'CryoSleep':{True:1,False:0},'Transported':{True:1,False:0},'VIP':{True:1,False:0}},inplace=True)

  data.replace({'HomePlanet':{'Earth':0,'Europa':1,'Mars':2},'Destination':{'TRAPPIST-1e':0,'55 Cancri e':1,"PSO J318.5-22":2},'CryoSleep':{True:1,False:0},'Transported':{True:1,False:0},'VIP':{True:1,False:0}},inplace=True)


In [398]:
data

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,Transported
0,0001_01,1.0,0.0,B/0/P,0.0,39.0,0.0,0.0,0.0,0.0,0.0,0.0,Maham Ofracculy,0
1,0002_01,0.0,0.0,F/0/S,0.0,24.0,0.0,109.0,9.0,25.0,549.0,44.0,Juanna Vines,1
2,0003_01,1.0,0.0,A/0/S,0.0,58.0,1.0,43.0,3576.0,0.0,6715.0,49.0,Altark Susent,0
3,0003_02,1.0,0.0,A/0/S,0.0,33.0,0.0,0.0,1283.0,371.0,3329.0,193.0,Solam Susent,0
4,0004_01,0.0,0.0,F/1/S,0.0,16.0,0.0,303.0,70.0,151.0,565.0,2.0,Willy Santantines,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8688,9276_01,1.0,0.0,A/98/P,1.0,41.0,1.0,0.0,6819.0,0.0,1643.0,74.0,Gravior Noxnuther,0
8689,9278_01,0.0,1.0,G/1499/S,2.0,18.0,0.0,0.0,0.0,0.0,0.0,0.0,Kurta Mondalley,0
8690,9279_01,0.0,0.0,G/1500/S,0.0,26.0,0.0,0.0,0.0,1872.0,1.0,0.0,Fayey Connon,1
8691,9280_01,1.0,0.0,E/608/S,1.0,32.0,0.0,0.0,1049.0,0.0,353.0,3235.0,Celeon Hontichre,0


In [399]:
# Dropping the Acccess Columns
data.drop(columns=["Name","Cabin"],inplace=True)

In [400]:
#Managing the missing values
data['HomePlanet'].fillna(data['HomePlanet'].mode()[0],inplace=True)
data['CryoSleep'].fillna(data['CryoSleep'].mode()[0],inplace=True)
data['Age'].fillna(data["Age"].mean(),inplace=True)
data['Destination'].fillna(data['Destination'].mode()[0],inplace=True)
data['RoomService'].fillna(data['RoomService'].mean(), inplace=True)
data['FoodCourt'].fillna(data['FoodCourt'].mean(), inplace=True)
data['ShoppingMall'].fillna(data['ShoppingMall'].mean(), inplace=True)
data['Spa'].fillna(data['Spa'].mean(), inplace=True)
data['VRDeck'].fillna(data['VRDeck'].mean(), inplace=True)
data['VIP'].fillna(data['VIP'].mean(),inplace=True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['HomePlanet'].fillna(data['HomePlanet'].mode()[0],inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['CryoSleep'].fillna(data['CryoSleep'].mode()[0],inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermed

In [401]:
data.isnull().sum()

PassengerId     0
HomePlanet      0
CryoSleep       0
Destination     0
Age             0
VIP             0
RoomService     0
FoodCourt       0
ShoppingMall    0
Spa             0
VRDeck          0
Transported     0
dtype: int64

In [402]:
data

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Transported
0,0001_01,1.0,0.0,0.0,39.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,0002_01,0.0,0.0,0.0,24.0,0.0,109.0,9.0,25.0,549.0,44.0,1
2,0003_01,1.0,0.0,0.0,58.0,1.0,43.0,3576.0,0.0,6715.0,49.0,0
3,0003_02,1.0,0.0,0.0,33.0,0.0,0.0,1283.0,371.0,3329.0,193.0,0
4,0004_01,0.0,0.0,0.0,16.0,0.0,303.0,70.0,151.0,565.0,2.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
8688,9276_01,1.0,0.0,1.0,41.0,1.0,0.0,6819.0,0.0,1643.0,74.0,0
8689,9278_01,0.0,1.0,2.0,18.0,0.0,0.0,0.0,0.0,0.0,0.0,0
8690,9279_01,0.0,0.0,0.0,26.0,0.0,0.0,0.0,1872.0,1.0,0.0,1
8691,9280_01,1.0,0.0,1.0,32.0,0.0,0.0,1049.0,0.0,353.0,3235.0,0


Performing the standard scaler and one hot encoding in it

In [403]:
#Standardizing the data
columns_to_scale = ['HomePlanet', 'CryoSleep', 'Destination', 'Age', 'VIP', 
                    'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck']

scaler = StandardScaler()


data[columns_to_scale] = scaler.fit_transform(data[columns_to_scale])

In [404]:
#One Hot Encoding
columns_to_encode = ['HomePlanet', 'CryoSleep', 'Destination', 'VIP']

encoder = OneHotEncoder(sparse_output=False, drop='first')  # drop='first' avoids the dummy variable trap

encoded_columns = encoder.fit_transform(data[columns_to_encode])

encoded_df = pd.DataFrame(encoded_columns, columns=encoder.get_feature_names_out(columns_to_encode))

data = pd.concat([data.drop(columns_to_encode, axis=1).reset_index(drop=True), encoded_df.reset_index(drop=True)], axis=1)

In [405]:
data

Unnamed: 0,PassengerId,Age,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Transported,HomePlanet_0.4403852871367919,HomePlanet_1.6980296041801994,CryoSleep_1.3646846431219442,Destination_0.9397363457043373,Destination_2.480790496469816,VIP_0.0,VIP_6.531424546184854
0,0001_01,0.709437,-0.340590,-0.287314,-0.290817,-0.276663,-0.269023,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0002_01,-0.336717,-0.175364,-0.281669,-0.248968,0.211505,-0.230194,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0003_01,2.034566,-0.275409,1.955616,-0.290817,5.694289,-0.225782,0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
3,0003_02,0.290975,-0.340590,0.517406,0.330225,2.683471,-0.098708,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0004_01,-0.894666,0.118709,-0.243409,-0.038048,0.225732,-0.267258,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8688,9276_01,0.848924,-0.340590,3.989682,-0.290817,1.184286,-0.203720,0,1.0,0.0,0.0,1.0,0.0,0.0,1.0
8689,9278_01,-0.755179,-0.340590,-0.287314,-0.290817,-0.276663,-0.269023,0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
8690,9279_01,-0.197230,-0.340590,-0.287314,2.842851,-0.275774,-0.269023,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8691,9280_01,0.221232,-0.340590,0.370637,-0.290817,0.037223,2.585740,0,1.0,0.0,0.0,1.0,0.0,0.0,0.0


Splitting the data 

In [406]:
X = data.drop(columns=['PassengerId','Transported'])
Y = data['Transported']

In [407]:
print(X)
print(Y)

           Age  RoomService  FoodCourt  ShoppingMall       Spa    VRDeck  \
0     0.709437    -0.340590  -0.287314     -0.290817 -0.276663 -0.269023   
1    -0.336717    -0.175364  -0.281669     -0.248968  0.211505 -0.230194   
2     2.034566    -0.275409   1.955616     -0.290817  5.694289 -0.225782   
3     0.290975    -0.340590   0.517406      0.330225  2.683471 -0.098708   
4    -0.894666     0.118709  -0.243409     -0.038048  0.225732 -0.267258   
...        ...          ...        ...           ...       ...       ...   
8688  0.848924    -0.340590   3.989682     -0.290817  1.184286 -0.203720   
8689 -0.755179    -0.340590  -0.287314     -0.290817 -0.276663 -0.269023   
8690 -0.197230    -0.340590  -0.287314      2.842851 -0.275774 -0.269023   
8691  0.221232    -0.340590   0.370637     -0.290817  0.037223  2.585740   
8692  1.058155    -0.149594   2.653082     -0.290817 -0.276663 -0.258433   

      HomePlanet_0.4403852871367919  HomePlanet_1.6980296041801994  \
0                

Train Test Split

In [408]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [409]:
print(X_test.head())

           Age  RoomService  FoodCourt  ShoppingMall           Spa    VRDeck  \
304  -0.685435     0.291514  -0.068415      0.770479 -2.739958e-01  0.663739   
2697 -0.755179    -0.334527   0.279691     -0.290817 -2.766634e-01 -0.268140   
8424  0.848924    -0.340590  -0.287314     -0.290817 -2.766634e-01 -0.269023   
1672  0.430462    -0.340590  -0.075315      0.439033 -5.054495e-17 -0.269023   
8458  0.988411    -0.340590  -0.287314     -0.290817 -2.766634e-01 -0.269023   

      HomePlanet_0.4403852871367919  HomePlanet_1.6980296041801994  \
304                             0.0                            1.0   
2697                            0.0                            0.0   
8424                            0.0                            0.0   
1672                            0.0                            0.0   
8458                            1.0                            0.0   

      CryoSleep_1.3646846431219442  Destination_0.9397363457043373  \
304                         

Model Implementation


In [410]:
#Logistic Regression
model = LogisticRegression()

In [411]:
model.fit(X_train, Y_train)

In [412]:
model.predict(X_test)

array([0, 1, 1, ..., 0, 1, 0], dtype=int64)

In [439]:
#Random Forest Classifier
random_fmodel = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, Y_train)
predictions = model.predict(X_test)

Accuracy Score

In [448]:
X_test_preddiction = accuracy_score(Y_test, predictions)

In [449]:
print(X_test_preddiction)

0.777458309373203


Kaggle contest test data

In [415]:
print(X_test)

           Age  RoomService  FoodCourt  ShoppingMall           Spa    VRDeck  \
304  -0.685435     0.291514  -0.068415      0.770479 -2.739958e-01  0.663739   
2697 -0.755179    -0.334527   0.279691     -0.290817 -2.766634e-01 -0.268140   
8424  0.848924    -0.340590  -0.287314     -0.290817 -2.766634e-01 -0.269023   
1672  0.430462    -0.340590  -0.075315      0.439033 -5.054495e-17 -0.269023   
8458  0.988411    -0.340590  -0.287314     -0.290817 -2.766634e-01 -0.269023   
...        ...          ...        ...           ...           ...       ...   
7175 -0.894666    -0.340590  -0.287314     -0.290817 -2.766634e-01 -0.269023   
3187 -2.010564    -0.340590  -0.287314     -0.290817 -2.766634e-01 -0.269023   
1302 -0.824923    -0.308757  -0.287314      0.864221 -4.547241e-02 -0.264610   
5934  0.918668    -0.340590  -0.287314     -0.290817 -2.766634e-01 -0.269023   
6093 -0.824923    -0.340590  -0.287314     -0.290817  1.329225e+00 -0.269023   

      HomePlanet_0.4403852871367919  Ho

In [416]:
Kaggle_test = pd.read_csv(r'Datasets\test.csv')
Kaggle_test

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name
0,0013_01,Earth,True,G/3/S,TRAPPIST-1e,27.0,False,0.0,0.0,0.0,0.0,0.0,Nelly Carsoning
1,0018_01,Earth,False,F/4/S,TRAPPIST-1e,19.0,False,0.0,9.0,0.0,2823.0,0.0,Lerome Peckers
2,0019_01,Europa,True,C/0/S,55 Cancri e,31.0,False,0.0,0.0,0.0,0.0,0.0,Sabih Unhearfus
3,0021_01,Europa,False,C/1/S,TRAPPIST-1e,38.0,False,0.0,6652.0,0.0,181.0,585.0,Meratz Caltilter
4,0023_01,Earth,False,F/5/S,TRAPPIST-1e,20.0,False,10.0,0.0,635.0,0.0,0.0,Brence Harperez
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4272,9266_02,Earth,True,G/1496/S,TRAPPIST-1e,34.0,False,0.0,0.0,0.0,0.0,0.0,Jeron Peter
4273,9269_01,Earth,False,,TRAPPIST-1e,42.0,False,0.0,847.0,17.0,10.0,144.0,Matty Scheron
4274,9271_01,Mars,True,D/296/P,55 Cancri e,,False,0.0,0.0,0.0,0.0,0.0,Jayrin Pore
4275,9273_01,Europa,False,D/297/P,,,False,0.0,2680.0,0.0,0.0,523.0,Kitakan Conale


In [417]:
Kaggle_test.drop(columns=["Name","Cabin"],inplace=True)

In [418]:
#Managing the missing values
Kaggle_test['HomePlanet'].fillna(Kaggle_test['HomePlanet'].mode()[0],inplace=True)
Kaggle_test['CryoSleep'].fillna(Kaggle_test['CryoSleep'].mode()[0],inplace=True)
Kaggle_test['Age'].fillna(Kaggle_test["Age"].mean(),inplace=True)
Kaggle_test['Destination'].fillna(Kaggle_test['Destination'].mode()[0],inplace=True)
Kaggle_test['RoomService'].fillna(Kaggle_test['RoomService'].mean(), inplace=True)
Kaggle_test['FoodCourt'].fillna(Kaggle_test['FoodCourt'].mean(), inplace=True)
Kaggle_test['ShoppingMall'].fillna(Kaggle_test['ShoppingMall'].mean(), inplace=True)
Kaggle_test['Spa'].fillna(Kaggle_test['Spa'].mean(), inplace=True)
Kaggle_test['VRDeck'].fillna(Kaggle_test['VRDeck'].mean(), inplace=True)
Kaggle_test['VIP'].fillna(Kaggle_test['VIP'].mean(),inplace=True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  Kaggle_test['HomePlanet'].fillna(Kaggle_test['HomePlanet'].mode()[0],inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  Kaggle_test['CryoSleep'].fillna(Kaggle_test['CryoSleep'].mode()[0],inplace=True)
  Kaggle_test['CryoSleep'].fillna(Kaggle_test['CryoSleep'].mode()[0]

In [419]:
Kaggle_test.isnull().sum()

PassengerId     0
HomePlanet      0
CryoSleep       0
Destination     0
Age             0
VIP             0
RoomService     0
FoodCourt       0
ShoppingMall    0
Spa             0
VRDeck          0
dtype: int64

In [420]:
Kaggle_test

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck
0,0013_01,Earth,True,TRAPPIST-1e,27.000000,False,0.0,0.0,0.0,0.0,0.0
1,0018_01,Earth,False,TRAPPIST-1e,19.000000,False,0.0,9.0,0.0,2823.0,0.0
2,0019_01,Europa,True,55 Cancri e,31.000000,False,0.0,0.0,0.0,0.0,0.0
3,0021_01,Europa,False,TRAPPIST-1e,38.000000,False,0.0,6652.0,0.0,181.0,585.0
4,0023_01,Earth,False,TRAPPIST-1e,20.000000,False,10.0,0.0,635.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
4272,9266_02,Earth,True,TRAPPIST-1e,34.000000,False,0.0,0.0,0.0,0.0,0.0
4273,9269_01,Earth,False,TRAPPIST-1e,42.000000,False,0.0,847.0,17.0,10.0,144.0
4274,9271_01,Mars,True,55 Cancri e,28.658146,False,0.0,0.0,0.0,0.0,0.0
4275,9273_01,Europa,False,TRAPPIST-1e,28.658146,False,0.0,2680.0,0.0,0.0,523.0


In [421]:
# Vectorizing the categorical values
Kaggle_test.replace({'HomePlanet':{'Earth':0,'Europa':1,'Mars':2},'Destination':{'TRAPPIST-1e':0,'55 Cancri e':1,"PSO J318.5-22":2},'CryoSleep':{True:1,False:0},'VIP':{True:1,False:0}},inplace=True)

  Kaggle_test.replace({'HomePlanet':{'Earth':0,'Europa':1,'Mars':2},'Destination':{'TRAPPIST-1e':0,'55 Cancri e':1,"PSO J318.5-22":2},'CryoSleep':{True:1,False:0},'VIP':{True:1,False:0}},inplace=True)


In [422]:
#Standardizing the data
columns_to_scale = ['HomePlanet', 'CryoSleep', 'Destination', 'Age', 'VIP', 
                    'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck']

scaler = StandardScaler()


Kaggle_test[columns_to_scale] = scaler.fit_transform(Kaggle_test[columns_to_scale])

In [423]:
#One Hot Encoding
columns_to_encode = ['HomePlanet', 'CryoSleep', 'Destination', 'VIP']

encoder = OneHotEncoder(sparse_output=False, drop='first')  # drop='first' avoids the dummy variable trap

encoded_columns = encoder.fit_transform(Kaggle_test[columns_to_encode])

encoded_df = pd.DataFrame(encoded_columns, columns=encoder.get_feature_names_out(columns_to_encode))

Kaggle_test = pd.concat([Kaggle_test.drop(columns_to_encode, axis=1).reset_index(drop=True), encoded_df.reset_index(drop=True)], axis=1)

In [424]:
Kaggle_test

Unnamed: 0,PassengerId,Age,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,HomePlanet_0.4117645918640081,HomePlanet_1.6476362826726838,CryoSleep_1.330442678286913,Destination_0.9636054542071728,Destination_2.5129815924192322,VIP_0.0,VIP_7.534923341603184
0,0013_01,-1.182216e-01,-0.364780,-0.291352,-0.319859,-0.274558,-0.251561,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,0018_01,-6.886014e-01,-0.364780,-0.285385,-0.319859,2.283008,-0.251561,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0019_01,1.669682e-01,-0.364780,-0.291352,-0.319859,-0.274558,-0.251561,1.0,0.0,1.0,1.0,0.0,0.0,0.0
3,0021_01,6.660505e-01,-0.364780,4.118523,-0.319859,-0.110576,0.222074,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0023_01,-6.173039e-01,-0.348143,-0.291352,0.825745,-0.274558,-0.251561,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4272,9266_02,3.808606e-01,-0.364780,-0.291352,-0.319859,-0.274558,-0.251561,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4273,9269_01,9.512404e-01,-0.364780,0.270158,-0.289189,-0.265498,-0.134974,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4274,9271_01,-2.532995e-16,-0.364780,-0.291352,-0.319859,-0.274558,-0.251561,0.0,1.0,1.0,1.0,0.0,0.0,0.0
4275,9273_01,-2.532995e-16,-0.364780,1.485327,-0.319859,-0.274558,0.171877,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [425]:
Kaggle_test.drop(columns=['PassengerId'],inplace=True)

In [426]:
Kaggle_test

Unnamed: 0,Age,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,HomePlanet_0.4117645918640081,HomePlanet_1.6476362826726838,CryoSleep_1.330442678286913,Destination_0.9636054542071728,Destination_2.5129815924192322,VIP_0.0,VIP_7.534923341603184
0,-1.182216e-01,-0.364780,-0.291352,-0.319859,-0.274558,-0.251561,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,-6.886014e-01,-0.364780,-0.285385,-0.319859,2.283008,-0.251561,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.669682e-01,-0.364780,-0.291352,-0.319859,-0.274558,-0.251561,1.0,0.0,1.0,1.0,0.0,0.0,0.0
3,6.660505e-01,-0.364780,4.118523,-0.319859,-0.110576,0.222074,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,-6.173039e-01,-0.348143,-0.291352,0.825745,-0.274558,-0.251561,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4272,3.808606e-01,-0.364780,-0.291352,-0.319859,-0.274558,-0.251561,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4273,9.512404e-01,-0.364780,0.270158,-0.289189,-0.265498,-0.134974,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4274,-2.532995e-16,-0.364780,-0.291352,-0.319859,-0.274558,-0.251561,0.0,1.0,1.0,1.0,0.0,0.0,0.0
4275,-2.532995e-16,-0.364780,1.485327,-0.319859,-0.274558,0.171877,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [427]:
#renaming the columns
# Current column names
old_column_names = [
    'HomePlanet_0.4117645918640081',
    'HomePlanet_1.6476362826726838', 
    'CryoSleep_1.330442678286913', 
    'Destination_0.9636054542071728', 
    'Destination_2.5129815924192322', 
    'VIP_0.0', 
    'VIP_7.534923341603184'
]

# New column names
new_column_names = [
    'HomePlanet_0.4403852871367919', 
    'HomePlanet_1.6980296041801994', 
    'CryoSleep_1.3646846431219442', 
    'Destination_0.9397363457043373', 
    'Destination_2.480790496469816', 
    'VIP_0.0', 
    'VIP_6.531424546184854'
]

# Create a dictionary for renaming
rename_dict = dict(zip(old_column_names, new_column_names))

# Rename the columns in Kaggle_test DataFrame
Kaggle_test.rename(columns=rename_dict, inplace=True)




In [428]:
Kaggle_test

Unnamed: 0,Age,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,HomePlanet_0.4403852871367919,HomePlanet_1.6980296041801994,CryoSleep_1.3646846431219442,Destination_0.9397363457043373,Destination_2.480790496469816,VIP_0.0,VIP_6.531424546184854
0,-1.182216e-01,-0.364780,-0.291352,-0.319859,-0.274558,-0.251561,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,-6.886014e-01,-0.364780,-0.285385,-0.319859,2.283008,-0.251561,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.669682e-01,-0.364780,-0.291352,-0.319859,-0.274558,-0.251561,1.0,0.0,1.0,1.0,0.0,0.0,0.0
3,6.660505e-01,-0.364780,4.118523,-0.319859,-0.110576,0.222074,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,-6.173039e-01,-0.348143,-0.291352,0.825745,-0.274558,-0.251561,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4272,3.808606e-01,-0.364780,-0.291352,-0.319859,-0.274558,-0.251561,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4273,9.512404e-01,-0.364780,0.270158,-0.289189,-0.265498,-0.134974,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4274,-2.532995e-16,-0.364780,-0.291352,-0.319859,-0.274558,-0.251561,0.0,1.0,1.0,1.0,0.0,0.0,0.0
4275,-2.532995e-16,-0.364780,1.485327,-0.319859,-0.274558,0.171877,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [429]:
#Passing the features it to the model
Kaggle_test_prediction = model.predict(Kaggle_test)

In [430]:
Kaggle_test_prediction

array([1, 0, 1, ..., 1, 1, 1], dtype=int64)

In [431]:
Kaggle_test_prediction_score = accuracy_score(Kaggle_test_prediction, model.predict(Kaggle_test))
print(Kaggle_test_prediction_score)

1.0


In [432]:
sample = pd.read_csv(r'Datasets\test.csv')

In [433]:
Final_prediction = pd.DataFrame({
    'PassengerId': sample['PassengerId'][:len(Kaggle_test_prediction)],  # Align lengths
    'Transported': Kaggle_test_prediction
})

# Map 0 to False and 1 to True in the Transported column
Final_prediction['Transported'] = Final_prediction['Transported'].map({0: False, 1: True})


In [434]:
Final_prediction

Unnamed: 0,PassengerId,Transported
0,0013_01,True
1,0018_01,False
2,0019_01,True
3,0021_01,True
4,0023_01,True
...,...,...
4272,9266_02,True
4273,9269_01,False
4274,9271_01,True
4275,9273_01,True


In [435]:
Final_prediction.to_csv("Datasets/Final_prediction.csv", index=False)