In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
# Load dataset2

data = pd.read_csv('Titanic-Dataset.csv')

# Preprocessing
data['Age'].fillna(data['Age'].median(), inplace=True)
data['Embarked'].fillna(data['Embarked'].mode()[0], inplace=True)

# Encode categorical variables
le_sex = LabelEncoder()
le_embarked = LabelEncoder()
data['Sex'] = le_sex.fit_transform(data['Sex'])
data['Embarked'] = le_embarked.fit_transform(data['Embarked'])

X = data[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']]
y = data['Survived']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Decision Tree
dt_model = DecisionTreeClassifier(max_depth=3, random_state=42)
dt_model.fit(X_train, y_train)
dt_pred = dt_model.predict(X_test)

# Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)

# Accuracy
print(f"Decision Tree Accuracy: {accuracy_score(y_test, dt_pred)}")
print(f"Random Forest Accuracy: {accuracy_score(y_test, rf_pred)}")

# User Input for Prediction



Decision Tree Accuracy: 0.7988826815642458
Random Forest Accuracy: 0.8212290502793296


In [None]:
def predict_titanic():
    print("\nEnter the following features for prediction:")
    pclass = int(input("Passenger Class (1, 2, 3): "))
    sex = input("Sex (male/female): ")
    sex = le_sex.transform([sex])[0]
    age = float(input("Age: "))
    sibsp = int(input("Number of Siblings/Spouses Aboard: "))
    parch = int(input("Number of Parents/Children Aboard: "))
    fare = float(input("Fare: "))
    embarked = input("Port of Embarkation (C, Q, S): ")
    embarked = le_embarked.transform([embarked])[0]

    input_data = [[pclass, sex, age, sibsp, parch, fare, embarked]]
    dt_prediction = dt_model.predict(input_data)[0]
    rf_prediction = rf_model.predict(input_data)[0]

    print(f"Decision Tree Prediction: {'Survived' if dt_prediction == 1 else 'Not Survived'}")
    print(f"Random Forest Prediction: {'Survived' if rf_prediction == 1 else 'Not Survived'}")

# Call the prediction function
predict_titanic()


Enter the following features for prediction:
Passenger Class (1, 2, 3): 1
Sex (male/female): male
Age: 35
Number of Siblings/Spouses Aboard: 2
Number of Parents/Children Aboard: 2
Fare: 7.2
Port of Embarkation (C, Q, S): C
Decision Tree Prediction: Not Survived
Random Forest Prediction: Not Survived


#Result: Random forest has give best accuracy of 82%.

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import pandas as pd
import joblib  # For saving the model

# Load dataset
data = pd.read_csv('Titanic-Dataset.csv')

# Preprocessing
data['Age'].fillna(data['Age'].median(), inplace=True)
data['Embarked'].fillna(data['Embarked'].mode()[0], inplace=True)

# Encode categorical variables
le_sex = LabelEncoder()
le_embarked = LabelEncoder()
data['Sex'] = le_sex.fit_transform(data['Sex'])
data['Embarked'] = le_embarked.fit_transform(data['Embarked'])

X = data[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']]
y = data['Survived']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)

# Accuracy
print(f"Random Forest Accuracy: {accuracy_score(y_test, rf_pred)}")

def predict_titanic():
    print("\nEnter the following features for prediction:")
    pclass = int(input("Passenger Class (1, 2, 3): "))
    sex = input("Sex (male/female): ")
    sex = le_sex.transform([sex])[0]
    age = float(input("Age: "))
    sibsp = int(input("Number of Siblings/Spouses Aboard: "))
    parch = int(input("Number of Parents/Children Aboard: "))
    fare = float(input("Fare: "))
    embarked = input("Port of Embarkation (C, Q, S): ")
    embarked = le_embarked.transform([embarked])[0]

    input_data = [[pclass, sex, age, sibsp, parch, fare, embarked]]
    dt_prediction = dt_model.predict(input_data)[0]
    rf_prediction = rf_model.predict(input_data)[0]

    print(f"Decision Tree Prediction: {'Survived' if dt_prediction == 1 else 'Not Survived'}")
    print(f"Random Forest Prediction: {'Survived' if rf_prediction == 1 else 'Not Survived'}")

# Call the prediction function
predict_titanic()

# Save the Random Forest model
joblib.dump(rf_model, 'random_forest_titanic.pkl')
print("Random Forest model saved as 'random_forest_titanic.pkl'.")

# Save the LabelEncoders for Sex and Embarked
joblib.dump(le_sex, 'label_encoder_sex.pkl')
joblib.dump(le_embarked, 'label_encoder_embarked.pkl')
print("Label Encoders saved.")


Random Forest Accuracy: 0.8212290502793296

Enter the following features for prediction:
Passenger Class (1, 2, 3): 1
Sex (male/female): male
Age: 32
Number of Siblings/Spouses Aboard: 2
Number of Parents/Children Aboard: 2
Fare: 7.2
Port of Embarkation (C, Q, S): C
Decision Tree Prediction: Not Survived
Random Forest Prediction: Not Survived
Random Forest model saved as 'random_forest_titanic.pkl'.
Label Encoders saved.
