**Task:** Fake News Detection

**Ensemble:**

**Features:** BoW and TF-IDF

**Split:**  80:20

**Base Learners:** Logistic Regression, Naive Bayes, Support Vector Machines (SVM)
Ensemble Method: Voting Classifier (Hard or Soft voting)

**Example:** Combine predictions from Logistic Regression, Naive Bayes, and SVM to classify fake news.

In [1]:
# Gerekli kütüphaneler
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score, classification_report

# Veri yükleme ve işleme
fake_df = pd.read_csv('Fake News Detection Datasets/Fake.csv')
true_df = pd.read_csv('Fake News Detection Datasets/True.csv')

# Etiketleme (Fake:1, True:0)
fake_df['label'] = 1
true_df['label'] = 0

# Veri birleştirme
df = pd.concat([fake_df, true_df])

# Metin temizleme fonksiyonu
def clean_text(text):
    text = text.lower()  # Küçük harfe çevirme
    text = ''.join([c for c in text if c not in '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'])  # Noktalama kaldırma
    return text

df['text'] = df['text'].apply(clean_text)

# Özellik çıkarımı (BoW + TF-IDF)
tfidf = TfidfVectorizer(max_features=5000)
bow = CountVectorizer(max_features=5000)

X_tfidf = tfidf.fit_transform(df['text']).toarray()
X_bow = bow.fit_transform(df['text']).toarray()

# Özellikleri birleştirme
X = pd.concat([pd.DataFrame(X_tfidf), pd.DataFrame(X_bow)], axis=1)
y = df['label']

# Veri bölme (80-20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Temel modeller
lr = LogisticRegression()
nb = MultinomialNB()
svm = SVC(probability=True)

# Ensemble model (Hard Voting)
voting_clf = VotingClassifier(
    estimators=[('lr', lr), ('nb', nb), ('svm', svm)],
    voting='hard')

# Model eğitimi
voting_clf.fit(X_train, y_train)

# Tahmin ve değerlendirme
y_pred = voting_clf.predict(X_test)

print("Doğruluk:", accuracy_score(y_test, y_pred))
print("\nSınıflandırma Raporu:\n", classification_report(y_test, y_pred))

Doğruluk: 0.9939866369710467

Sınıflandırma Raporu:
               precision    recall  f1-score   support

           0       0.99      1.00      0.99      4247
           1       1.00      0.99      0.99      4733

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980



**Task:** Blood Cell Image Classification

**Ensemble:**

**Features:** Convert into 1D

**Split:**  80:20

**Base Learners:** RF, KNN, DT
Ensemble Method: Voting Classifier (Hard or Soft voting)

**Example:** RF, KNN, DT combine to classify blood cell images.

In [None]:
import os
import cv2  # pip install opencv-python
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import classification_report

# ----------------------
# 1) LOAD & FLATTEN IMAGES
# ----------------------
# Example directory structure:
#   blood_cell_images/
#       RBC/        (images for RBC label)
#       WBC/        (images for WBC label)
#       Platelets/  (images for Platelets label)
# etc.

data_dir = "blood_cell_images"  # Adjust path as needed
X = []
y = []

# Traverse subfolders (labels)
for label in os.listdir(data_dir):
    label_path = os.path.join(data_dir, label)
    if os.path.isdir(label_path):
        for img_file in os.listdir(label_path):
            if img_file.lower().endswith((".png", ".jpg", ".jpeg")):
                img_path = os.path.join(label_path, img_file)
                img = cv2.imread(img_path)
                if img is None:
                    continue
                # Resize to a consistent size (e.g., 64x64)
                img = cv2.resize(img, (64, 64))
                # Flatten into 1D array
                img_flat = img.flatten()
                X.append(img_flat)
                y.append(label)

X = np.array(X)
y = np.array(y)

# ----------------------
# 2) TRAIN-TEST SPLIT (80:20)
# ----------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ----------------------
# 3) DEFINE BASE LEARNERS
# ----------------------
rf = RandomForestClassifier(n_estimators=100, random_state=42)
knn = KNeighborsClassifier(n_neighbors=5)
dt = DecisionTreeClassifier(random_state=42)

# ----------------------
# 4) VOTING CLASSIFIER
# ----------------------
voting_clf = VotingClassifier(
    estimators=[("rf", rf), ("knn", knn), ("dt", dt)],
    voting='hard'  # or 'soft'
)

# ----------------------
# 5) TRAIN & EVALUATE
# ----------------------
voting_clf.fit(X_train, y_train)
y_pred = voting_clf.predict(X_test)

print("=== Blood Cell Image Classification ===")
print(classification_report(y_test, y_pred))

**Task:** Audio Data Classification (Patient Health)

**Ensemble:**

**Features:** Use MFCC Features

**Split:**  80:20

**Base Learners:** RF, SVM, LR
Ensemble Method: Voting Classifier (Hard or Soft voting)

**Example:** RF, KNN, DT combine to classify blood cell images.

In [None]:
import os
import numpy as np
import pandas as pd
import librosa  # pip install librosa
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import classification_report

# ----------------------
# 1) LOAD & EXTRACT MFCC
# ----------------------
audio_dir = "audio_data"  # Adjust path as needed
X = []
y = []

for label in os.listdir(audio_dir):
    label_path = os.path.join(audio_dir, label)
    if os.path.isdir(label_path):
        for file in os.listdir(label_path):
            if file.lower().endswith(".wav"):
                file_path = os.path.join(label_path, file)
                try:
                    signal, sr = librosa.load(file_path, sr=22050)
                    mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
                    # Take mean across time axis
                    mfcc_mean = np.mean(mfcc, axis=1)
                    X.append(mfcc_mean)
                    y.append(label)
                except Exception as e:
                    print(f"Could not process {file_path}: {e}")

X = np.array(X)
y = np.array(y)

# ----------------------
# 2) TRAIN-TEST SPLIT (80:20)
# ----------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ----------------------
# 3) DEFINE BASE LEARNERS
# ----------------------
rf = RandomForestClassifier(n_estimators=100, random_state=42)
knn = KNeighborsClassifier(n_neighbors=5)
dt = DecisionTreeClassifier(random_state=42)

# ----------------------
# 4) VOTING CLASSIFIER
# ----------------------
voting_clf = VotingClassifier(
    estimators=[("rf", rf), ("knn", knn), ("dt", dt)],
    voting='hard'  # or 'soft'
)

# ----------------------
# 5) TRAIN & EVALUATE
# ----------------------
voting_clf.fit(X_train, y_train)
y_pred = voting_clf.predict(X_test)

print("=== Audio Data Classification (Patient Health) ===")
print(classification_report(y_test, y_pred))