**Task:** Fake News Detection

**Ensemble:**

**Features:** BoW and TF-IDF

**Split:**  80:20

**Base Learners:** Logistic Regression, Naive Bayes, Support Vector Machines (SVM)
Ensemble Method: Voting Classifier (Hard or Soft voting)

**Example:** Combine predictions from Logistic Regression, Naive Bayes, and SVM to classify fake news.

In [None]:
    import pandas as pd
    from sklearn.model_selection import train_test_split
    from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
    from sklearn.linear_model import LogisticRegression
    from sklearn.naive_bayes import MultinomialNB
    from sklearn.svm import SVC
    from sklearn.ensemble import VotingClassifier
    from sklearn.metrics import accuracy_score

In [None]:
    data = pd.read_csv('your_data.csv')
    X = data['text']
    y = data['label']

In [None]:
    tfidf_vectorizer = TfidfVectorizer()
    bow_vectorizer = CountVectorizer()
    X_tfidf = tfidf_vectorizer.fit_transform(X)
    X_bow = bow_vectorizer.fit_transform(X)

In [None]:
    X_train_tfidf, X_test_tfidf, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)
    X_train_bow, X_test_bow, _, _ = train_test_split(X_bow, y, test_size=0.2, random_state=42)

In [None]:
    lr = LogisticRegression()
    nb = MultinomialNB()
    svm = SVC()

In [None]:
    # Hard Voting
    ensemble_hard = VotingClassifier(estimators=[('lr', lr), ('nb', nb), ('svm', svm)], voting='hard')
    ensemble_hard.fit(X_train_tfidf, y_train)

    # Soft Voting (requires probability estimates from base learners)
    lr_soft = LogisticRegression(probability=True)
    nb_soft = MultinomialNB()  # MultinomialNB inherently provides probabilities
    svm_soft = SVC(probability=True)
    ensemble_soft = VotingClassifier(estimators=[('lr', lr_soft), ('nb', nb_soft), ('svm', svm_soft)], voting='soft')
    ensemble_soft.fit(X_train_tfidf, y_train)

In [None]:
    y_pred_hard = ensemble_hard.predict(X_test_tfidf)
    y_pred_soft = ensemble_soft.predict(X_test_tfidf)
    accuracy_hard = accuracy_score(y_test, y_pred_hard)
    accuracy_soft = accuracy_score(y_test, y_pred_soft)
    print(f"Hard Voting Accuracy: {accuracy_hard}")
    print(f"Soft Voting Accuracy: {accuracy_soft}")

**Task:** Blood Cell Image Classification

**Ensemble:**

**Features:** Convert into 1D

**Split:**  80:20

**Base Learners:** RF, KNN, DT
Ensemble Method: Voting Classifier (Hard or Soft voting)

**Example:** RF, KNN, DT combine to classify blood cell images.

In [None]:
    import numpy as np
    import pandas as pd
    from sklearn.model_selection import train_test_split
    from sklearn.ensemble import RandomForestClassifier, VotingClassifier
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.metrics import accuracy_score
    # Add image processing libraries like OpenCV if needed: import cv2

In [None]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import cv2  # Make sure you have OpenCV installed: !pip install opencv-python

# Define the path to your image folder
image_folder = 'path/to/your/image/folder'

# Create lists to store image data and labels
X = []
y = []

# Iterate through class folders
for class_name in os.listdir(image_folder):
    class_folder = os.path.join(image_folder, class_name)
    if os.path.isdir(class_folder):
        # Iterate through images in the class folder
        for image_name in os.listdir(class_folder):
            image_path = os.path.join(class_folder, image_name)
            # Load and preprocess the image (e.g., resize, convert to grayscale)
            img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
            img_resized = cv2.resize(img, (64, 64))  # Example resizing
            img_flat = img_resized.flatten()
            # Append the image data and label to the lists
            X.append(img_flat)
            y.append(class_name)  # Use folder name as the label

# Convert lists to NumPy arrays
X = np.array(X)
y = np.array(y)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
    rf = RandomForestClassifier()
    knn = KNeighborsClassifier()
    dt = DecisionTreeClassifier()

In [None]:
    ensemble = VotingClassifier(estimators=[('rf', rf), ('knn', knn), ('dt', dt)], voting='hard')  # Or 'soft'
    ensemble.fit(X_train, y_train)

In [None]:
    y_pred = ensemble.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Ensemble Accuracy: {accuracy}")

**Task:** Audio Data Classification (Patient Health)

**Ensemble:**

**Features:** Use MFCC Features

**Split:**  80:20

**Base Learners:** RF, SVM, LR
Ensemble Method: Voting Classifier (Hard or Soft voting)

**Example:** RF, KNN, DT combine to classify blood cell images.

In [None]:
    import librosa
    import librosa.display
    import numpy as np
    import pandas as pd
    from sklearn.model_selection import train_test_split
    from sklearn.ensemble import RandomForestClassifier, VotingClassifier
    from sklearn.svm import SVC
    from sklearn.linear_model import LogisticRegression
    from sklearn.metrics import accuracy_score

In [None]:
    data = pd.read_csv('your_audio_data.csv')
    X = []
    y = data['label']

    for audio_path in data['audio_path']:
        # Load audio file
        signal, sr = librosa.load(audio_path)
        # Extract MFCCs
        mfccs = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
        mfccs_scaled = np.mean(mfccs.T, axis=0)
        X.append(mfccs_scaled)

    X = np.array(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
    rf = RandomForestClassifier()
    svm = SVC()
    lr = LogisticRegression()

In [None]:
    ensemble = VotingClassifier(estimators=[('rf', rf), ('svm', svm), ('lr', lr)], voting='hard')  # Or 'soft'
    ensemble.fit(X_train, y_train)

In [None]:
    y_pred = ensemble.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Ensemble Accuracy: {accuracy}")