In [3]:
# Step 1: Import the necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Step 2: Load the Titanic dataset
titanic_data = pd.read_csv('/content/Titanic-Dataset.csv')

# Step 3: Preprocess the data
# Drop unnecessary columns like 'Name', 'Ticket', 'Cabin' (you can keep or drop others based on analysis)
titanic_data = titanic_data.drop(columns=['Name', 'Ticket', 'Cabin', 'Embarked'])

# Fill missing 'Age' values with the median of the 'Age' column
titanic_data['Age'] = titanic_data['Age'].fillna(titanic_data['Age'].median())

# Convert 'Sex' to numerical values (male = 0, female = 1)
titanic_data['Sex'] = titanic_data['Sex'].map({'male': 0, 'female': 1})

# Fill missing 'Fare' values with the median of the 'Fare' column
titanic_data['Fare'] = titanic_data['Fare'].fillna(titanic_data['Fare'].median())

# Step 4: Prepare features and target variable
X = titanic_data.drop(columns=['Survived'])
y = titanic_data['Survived']

# Step 5: Scale the features (important for models like SVM or Logistic Regression)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 6: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Step 7: Define the base classifiers
log_clf = LogisticRegression(max_iter=1000)
svm_clf = SVC(probability=True)  # Use probability=True for soft voting
tree_clf = DecisionTreeClassifier()

# Step 8: Define the Voting Classifier
voting_clf = VotingClassifier(estimators=[('lr', log_clf), ('svc', svm_clf), ('dt', tree_clf)], voting='soft')

# Step 9: Train the Voting Classifier
voting_clf.fit(X_train, y_train)

# Step 10: Evaluate the model on the test data
y_pred = voting_clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

# Step 11: Print the accuracy
print(f"Voting Classifier Accuracy: {accuracy:.4f}")



Voting Classifier Accuracy: 0.8284


In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Step 1: Load Titanic dataset
titanic_data = pd.read_csv('/content/Titanic-Dataset.csv')

# Step 2: Preprocessing data
# Handle missing data in Age, Embarked, and Fare
# Impute missing values in 'Age', 'Fare', and 'Embarked' columns
titanic_data['Age'] = titanic_data['Age'].fillna(titanic_data['Age'].median())
titanic_data['Fare'] = titanic_data['Fare'].fillna(titanic_data['Fare'].median())
titanic_data['Embarked'] = titanic_data['Embarked'].fillna(titanic_data['Embarked'].mode()[0])


# Convert categorical features to numerical (e.g., 'Embarked', 'Sex')
titanic_data = pd.get_dummies(titanic_data, columns=['Embarked', 'Sex'], drop_first=True)

# Step 3: Split the data into features (X) and target (y)
X = titanic_data.drop(columns=['Survived', 'Name', 'Ticket', 'Cabin'])  # Drop non-useful columns
y = titanic_data['Survived']  # Target variable

# Step 4: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Feature scaling (StandardScaler for numerical values)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 6: Define the base models
log_reg = LogisticRegression(max_iter=1000)  # Logistic Regression
svm = SVC(probability=True)  # Support Vector Machine
decision_tree = DecisionTreeClassifier()  # Decision Tree Classifier

# Step 7: Define the Voting Classifier
# Use the soft voting method, which predicts based on the probability of each class
voting_clf = VotingClassifier(estimators=[
    ('log_reg', log_reg),
    ('svm', svm),
    ('decision_tree', decision_tree)
], voting='soft')  # 'soft' uses predicted probabilities for majority voting

# Step 8: Train the Voting Classifier
voting_clf.fit(X_train, y_train)

# Step 9: Evaluate the Voting Classifier on the test data
y_pred = voting_clf.predict(X_test)

# Calculate the accuracy of the Voting Classifier
voting_accuracy = accuracy_score(y_test, y_pred)

# Print the accuracy
print(f"Voting Classifier Accuracy: {voting_accuracy:.4f}")

# Step 10: Compare with individual models' accuracy
log_reg.fit(X_train, y_train)
log_reg_accuracy = log_reg.score(X_test, y_test)

svm.fit(X_train, y_train)
svm_accuracy = svm.score(X_test, y_test)

decision_tree.fit(X_train, y_train)
decision_tree_accuracy = decision_tree.score(X_test, y_test)

# Print individual model accuracies for comparison
print(f"Logistic Regression Accuracy: {log_reg_accuracy:.4f}")
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print(f"Decision Tree Accuracy: {decision_tree_accuracy:.4f}")


Voting Classifier Accuracy: 0.7933
Logistic Regression Accuracy: 0.8045
SVM Accuracy: 0.8101
Decision Tree Accuracy: 0.7486
