# Start

In [27]:
import pandas as pd

df = pd.read_csv("raw_test.csv")
df.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


In [28]:
df.isnull().sum()

PassengerId      0
Pclass           0
Name             0
Sex              0
Age             86
SibSp            0
Parch            0
Ticket           0
Fare             1
Cabin          327
Embarked         0
dtype: int64

# Modifying features for prediction

## Age

In [29]:
def categorize_age(x):
    if x <= 18:
        return 1
    elif x > 18 and x <= 40:
        return 2
    elif x > 40 and x <= 60:
        return 3
    else:
        return 4

df["Age_categorized"] = df["Age"].apply(categorize_age)

## Sex

In [30]:
df["Sex_encoded"] = df["Sex"].apply(lambda x: 1 if x == "male" else 0)

## Embarked

In [31]:
df["Embarked"] = df["Embarked"].astype("category")
df["Embarked_encoded"] = df["Embarked"].cat.codes

# Fixing NaN values in ``Age``

In [32]:
male_df = df[df["Sex_encoded"] == 1]
female_df = df[df["Sex_encoded"] == 0]

In [33]:
male_df["Age"] = male_df.groupby("Pclass")["Age"].transform(lambda x: x.fillna(round(x.mean(), 0)))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  male_df["Age"] = male_df.groupby("Pclass")["Age"].transform(lambda x: x.fillna(round(x.mean(), 0)))


In [34]:
female_df["Age"] = female_df.groupby("Pclass")["Age"].transform(lambda x: x.fillna(round(x.mean(), 0)))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  female_df["Age"] = female_df.groupby("Pclass")["Age"].transform(lambda x: x.fillna(round(x.mean(), 0)))


In [35]:
df_1 = pd.concat([male_df, female_df])
df_1.sort_index(axis=0, inplace=True)

In [37]:
df_1.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Age_categorized,Sex_encoded,Embarked_encoded
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q,2,1,1
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S,3,0,2
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q,4,1,1
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S,2,1,2
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S,2,0,2


In [38]:
df_1.isnull().sum()

PassengerId           0
Pclass                0
Name                  0
Sex                   0
Age                   0
SibSp                 0
Parch                 0
Ticket                0
Fare                  1
Cabin               327
Embarked              0
Age_categorized       0
Sex_encoded           0
Embarked_encoded      0
dtype: int64

# Getting required features

In [39]:
x = df_1[["Pclass", "Age_categorized", "SibSp", "Parch", "Sex_encoded", "Embarked_encoded"]]

# Importing model

In [36]:
import pickle as pkl

model = pkl.load(open("model.sav", 'rb'))

# Prediction

In [40]:
predictions = model.predict(x)

In [47]:
df_2 = x
df_2["Predicted survival"] = predictions
df_2.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2["Predicted survival"] = predictions


Unnamed: 0,Pclass,Age_categorized,SibSp,Parch,Sex_encoded,Embarked_encoded,Predicted survival
0,3,2,0,0,1,1,0
1,3,3,1,0,0,2,1
2,2,4,0,0,1,1,0
3,3,2,0,0,1,2,0
4,3,2,1,1,0,2,1
