<a href="https://colab.research.google.com/github/Amit-sheikh/Amit-sheikh/blob/main/mamba_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ===============================================================
# 📦 SECTION 0: INSTALL LIBRARIES
# ===============================================================
!pip install torch torchvision torchaudio -q
!pip install transformers datasets accelerate evaluate scikit-learn -q
!pip install zipfile36 -q

print("✅ All libraries installed successfully!")


# ===============================================================
# 📂 SECTION 1: IMPORT LIBRARIES
# ===============================================================
import pandas as pd
import numpy as np
import zipfile
import os
import torch
from sklearn.model_selection import train_test_split
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, pipeline
import evaluate


# ===============================================================
# 📦 SECTION 2: EXTRACT ZIP FILE
# ===============================================================
zip_path = "/content/News-_dataset.zip"  # যদি অন্য নাম হয়, এখানে বদলে দাও
extract_folder = "/content/dataset"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_folder)

print("✅ Dataset extracted successfully!")


# ===============================================================
# 📑 SECTION 3: LOAD DATA (ধরা যাক 'True.csv' আর 'Fake.csv' আছে)
# ===============================================================
true_df = pd.read_csv(os.path.join(extract_folder, "True.csv"))
fake_df = pd.read_csv(os.path.join(extract_folder, "Fake.csv"))

true_df['label'] = 1
fake_df['label'] = 0

df = pd.concat([true_df, fake_df], axis=0).sample(frac=1).reset_index(drop=True)

df = df[['title', 'text', 'label']]
df['content'] = df['title'] + " " + df['text']
df = df[['content', 'label']]

print(df.head())


# ===============================================================
# 🧩 SECTION 4: TRAIN-TEST SPLIT
# ===============================================================
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)


# ===============================================================
# 🧠 SECTION 5: LOAD MAMBA MODEL
# ===============================================================
model_name = "state-spaces/mamba-1.4b"  # lightweight Mamba model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)


# ===============================================================
# 🔤 SECTION 6: TOKENIZE DATA
# ===============================================================
def tokenize_function(examples):
    return tokenizer(examples["content"], padding="max_length", truncation=True, max_length=256)

train_tokenized = train_dataset.map(tokenize_function, batched=True)
test_tokenized = test_dataset.map(tokenize_function, batched=True)


# ===============================================================
# ⚙️ SECTION 7: TRAINING SETUP
# ===============================================================
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

training_args = TrainingArguments(
    output_dir="/content/results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=2,
    weight_decay=0.01,
    logging_dir="/content/logs",
    push_to_hub=False
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tokenized,
    eval_dataset=test_tokenized,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)


# ===============================================================
# 🚀 SECTION 8: TRAIN MODEL
# ===============================================================
trainer.train()


# ===============================================================
# 🧾 SECTION 9: EVALUATE MODEL
# ===============================================================
eval_results = trainer.evaluate()
print("✅ Evaluation Results:", eval_results)


# ===============================================================
# 🧠 SECTION 10: TEST WITH EXAMPLE
# ===============================================================
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
example_text = "Breaking news: Scientists find life on Mars!"
print(classifier(example_text))
