<a href="https://colab.research.google.com/github/Moksha-nagraj/Marvel_tasks_lvl2/blob/main/Ensemble_Techniques.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


##**Ensemble Techniques**
Ensemble techniques in machine learning involve combining multiple models to make predictions that are generally more accurate and robust than using individual models. These methods aim to reduce the variance, bias, or improve predictions by leveraging the strengths of various models.

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

train['Age'].fillna(train['Age'].median(), inplace=True)
test['Age'].fillna(test['Age'].median(), inplace=True)
train['Embarked'].fillna(train['Embarked'].mode()[0], inplace=True)
test['Fare'].fillna(test['Fare'].median(), inplace=True)

train['Sex'] = train['Sex'].map({'male': 0, 'female': 1})
test['Sex'] = test['Sex'].map({'male': 0, 'female': 1})

train = pd.get_dummies(train, columns=['Embarked'], drop_first=True)
test = pd.get_dummies(test, columns=['Embarked'], drop_first=True)

train = train.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)
test = test.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)

X = train.drop('Survived', axis=1)
y = train['Survived']

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_val)
print("Random Forest Accuracy:", accuracy_score(y_val, rf_pred))

lr = LogisticRegression(max_iter=200, random_state=42)
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_val)
print("Logistic Regression Accuracy:", accuracy_score(y_val, lr_pred))

ada = AdaBoostClassifier(n_estimators=100, algorithm='SAMME', random_state=42)
ada.fit(X_train, y_train)
ada_pred = ada.predict(X_val)
print("AdaBoost Accuracy:", accuracy_score(y_val, ada_pred))

gb = GradientBoostingClassifier(n_estimators=100, random_state=42)
gb.fit(X_train, y_train)
gb_pred = gb.predict(X_val)
print("Gradient Boosting Accuracy:", accuracy_score(y_val, gb_pred))

ensemble = VotingClassifier(estimators=[('rf', rf), ('lr', lr), ('ada', ada), ('gb', gb)], voting='hard')
ensemble.fit(X_train, y_train)
ensemble_pred = ensemble.predict(X_val)
print("Ensemble Model Accuracy:", accuracy_score(y_val, ensemble_pred))

Random Forest Accuracy: 0.7988826815642458
Logistic Regression Accuracy: 0.8100558659217877
AdaBoost Accuracy: 0.7932960893854749
Gradient Boosting Accuracy: 0.8044692737430168
Ensemble Model Accuracy: 0.8100558659217877
