<a href="https://colab.research.google.com/github/ThanmananR/Mymachinelearning/blob/main/FakeNewsCoTR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB

In [None]:

texts = [
    "Government announces new economic reforms",            # Real
    "Scientists discover water on Mars",                    # Real
    "Aliens seen walking in New York City",                 # Fake
    "Man turns into a werewolf under moonlight",            # Fake
    "New law passed to improve healthcare access",          # Real
    "Vaccines help reduce virus spread",                    # Real
    "Bill Gates controls minds with microchips",            # Fake
    "Cure for cancer found in garden herb",                 # Fake
    "New education policy praised by teachers",             # Real
    "Elvis Presley found alive in secret island",           # Fake
]

labels = [1, 1, 0, 0, 1, 1, 0, 0, 1, 0]  # 1 = Real, 0 = Fake

In [None]:

X_labeled_texts = [
    "Government announces new economic reforms",  # Real
    "Aliens seen walking in New York City",       # Fake
    "Vaccines help reduce virus spread",          # Real
    "Man turns into a werewolf under moonlight"   # Fake
]
y_labeled = [1, 0, 1, 0]

X_unlabeled_texts = [
    "Scientists discover water on Mars",
    "Bill Gates controls minds with microchips",
    "New law passed to improve healthcare access",
    "Cure for cancer found in garden herb",
    "New education policy praised by teachers",
    "Elvis Presley found alive in secret island"
]

In [None]:
# Create two feature views: CountVectorizer and TfidfVectorizer
vectorizer1 = CountVectorizer()
vectorizer2 = TfidfVectorizer()

X1_labeled = vectorizer1.fit_transform(X_labeled_texts)
X2_labeled = vectorizer2.fit_transform(X_labeled_texts)

X1_unlabeled = vectorizer1.transform(X_unlabeled_texts)
X2_unlabeled = vectorizer2.transform(X_unlabeled_texts)

In [None]:
# Initialize classifiers for both views
model1 = MultinomialNB()
model2 = MultinomialNB()

In [None]:
# Co-training process
for round_num in range(3):
    print(f"\n🔁 Round {round_num + 1}")
    model1.fit(X1_labeled, y_labeled)
    model2.fit(X2_labeled, y_labeled)

    probs1 = model1.predict_proba(X1_unlabeled)
    probs2 = model2.predict_proba(X2_unlabeled)

    confident_idx = []
    for i, (p1, p2) in enumerate(zip(probs1, probs2)):
        if max(p1) > 0.9 and max(p2) > 0.9 and np.argmax(p1) == np.argmax(p2):
            confident_idx.append(i)

    if not confident_idx:
        print("🔁 No confident samples this round.")
        break

    X1_new = X1_unlabeled[confident_idx]
    X2_new = X2_unlabeled[confident_idx]
    y_new = model1.predict(X1_new)

    X1_labeled = np.vstack([X1_labeled.toarray(), X2_new.toarray()])
    X2_labeled = np.vstack([X2_labeled.toarray(), X1_new.toarray()])
    y_labeled = np.concatenate([y_labeled, y_new])

    remaining = [i for i in range(X1_unlabeled.shape[0]) if i not in confident_idx]
    X1_unlabeled = X1_unlabeled[remaining]
    X2_unlabeled = X2_unlabeled[remaining]

    print(f"✅ Added {len(confident_idx)} new samples.")


🔁 Round 1
🔁 No confident samples this round.


In [None]:
# Test the trained model on new sample texts
test_texts = [
    "President announces plans for clean energy",
    "Time traveler arrested in 2030 by FBI",
    "Doctors recommend more vegetables in diet",
    "Alien spaceship seen flying over White House"
]

X1_test = vectorizer1.transform(test_texts)
y_pred = model1.predict(X1_test)

print("\n🔍 Predictions on new data:")
for text, pred in zip(test_texts, y_pred):
    print(f"{text} => {'REAL' if pred == 1 else 'FAKE'}")


🔍 Predictions on new data:
President announces plans for clean energy => REAL
Time traveler arrested in 2030 by FBI => FAKE
Doctors recommend more vegetables in diet => FAKE
Alien spaceship seen flying over White House => FAKE
