## Install Dependencies

In [None]:
!pip install -U fastai pandas

In [None]:
!pip install numpy==1.24.4 --force-reinstall
import os
os.kill(os.getpid(), 9)  # Force-restart runtime (required after reinstall)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Set up Learner

In [10]:
from pathlib import Path
model_dir = Path('Models')
#model_dir.mkdir(parents=True, exist_ok=True)

In [1]:
import ssl
import urllib.request

ssl._create_default_https_context = ssl._create_unverified_context


In [2]:
from fastai.text.all import *

# Load dataset
df = pd.read_csv("Data/processed_ai_vs_human.csv")

# Use only first half of the data
df = df.iloc[:len(df)//2]

# Preview Data
print(df.head())
df = df.dropna(subset=["text", "generated"])  # safeguard, there aren't any missing values

# Create DataLoaders object for classification
dls = TextDataLoaders.from_df(
    df,
    text_col='text',
    label_col='generated',
    is_lm=False,                  # This is for classification, not language modeling
    valid_pct=0.15,                # Split for validation
    bs=4,                        # Batch size
    seq_len=72
)

                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      text  \
0  Cars Cars around since became famous 1900s Henry Ford created built first ModelT Cars played major role every day lives since people starting question limiting car usage would good thing limiting use cars might good thing like matter article German Suburb Life Goes Without Cars Elizabeth Rosenthal states automobiles linchpin suburbs middle class families either Shanghai Chicago tend make 

In [11]:
# Create classifier learner
learn = text_classifier_learner(
    dls,
    AWD_LSTM,
    drop_mult=0.5,               # Dropout multiplier; helps regularization
    metrics=[accuracy, F1Score()]
)

learn.model_dir = model_dir

## Finetune (Done with VM)

In [None]:
from fastai.callback.tracker import SaveModelCallback

# Fine-tune the model
learn.fine_tune(1, cbs=[
    SaveModelCallback(monitor='accuracy', comp=np.greater, fname='best_model')
])

# Evaluate
learn.show_results()

# Save model in working directory
learn.export('/content/drive/MyDrive/162/LSTM_text_classifier.pkl')

In [13]:
learn.load('best_model')
learn.export('Models/LSTM_text_classifier.pkl')

## Load Fine-tuned Model

In [None]:
# Load the model
learn = load_learner('Models/LSTM_text_classifier.pkl')

# Use it to make predictions
text = df[]
pred_class, pred_idx, probs = learn.predict(text)

print(f"Predicted class: {pred_class} (AI: 1, Human: 0)")
print(f"Probability distribution: {probs}")

If you only need to load model weights and optimizer state, use the safe `Learner.load` instead.
  warn("load_learner` uses Python's insecure pickle module, which can execute malicious arbitrary code when loading. Only load files you trust.\nIf you only need to load model weights and optimizer state, use the safe `Learner.load` instead.")


Predicted class: 1 (AI: 1, Human: 0)
Probability distribution: tensor([0.0223, 0.9777])


## Testing

Get Data

In [None]:
import json

def get_dev_data(file):
    entries = []
    with open(file, 'r') as f:
        for line in f:
            try:
                data = json.loads(line)
                entries.append({"text": data['human_text'], "label": 0})
                entries.append({"text": data['machine_text'], "label": 1})
            except Exception as e:
                print(f"Skipping malformed line: {e}")

    df = pd.DataFrame(entries)
    return df

arvix_GPT = get_dev_data('Data/arxiv_chatGPT.jsonl')
arvix_cohere = get_dev_data('Data/arxiv_cohere.jsonl')
reddit_GPT = get_dev_data('Data/reddit_chatGPT.jsonl')
reddit_cohere = get_dev_data('Data/reddit_cohere.jsonl')

In [119]:
our_data = pd.read_csv("Data/processed_ai_vs_human.csv")
our_data = our_data.iloc[len(our_data)//2:]
original_data_sample = our_data.sample(n=1000, random_state=42).reset_index(drop=True)
original_data_sample['label'] = original_data_sample['generated']

Get Predictions from Each Dev Set

In [106]:
# DataLoader from dev DataFrame
test_dl = dls.test_dl(arvix_GPT["text"].tolist(), bs=64)

# Get predictions in batch
arvix_GPT_predictions = learn.get_preds(dl=test_dl)


In [122]:
# DataLoader from dev DataFrame
test_dl = dls.test_dl(arvix_cohere["text"].tolist(), bs=64)

# Get predictions in batch
arvix_cohere_predictions = learn.get_preds(dl=test_dl)

In [130]:
# DataLoader from dev DataFrame
test_dl = dls.test_dl(reddit_GPT["text"].tolist(), bs=64)

# Get predictions in batch
reddit_GPT_predictions = learn.get_preds(dl=test_dl)

In [131]:
# DataLoader from dev DataFrame
test_dl = dls.test_dl(reddit_cohere["text"].tolist(), bs=64)

# Get predictions in batch
reddit_cohere_predictions = learn.get_preds(dl=test_dl)

In [None]:
# DataLoader from dev DataFrame
test_dl = dls.test_dl(our_data_sample["text"].tolist(), bs=64)

# Get predictions in batch
original_predictions = learn.get_preds(dl=test_dl)

Measure Accuracy of Predictions

In [108]:
from sklearn.metrics import accuracy_score, classification_report
def generate_metrics(truth, dev_preds, threshold):
    ai_probs_dev = dev_preds[0][:,1]
    custom_threshold = threshold

    pred_labels = (ai_probs_dev >= custom_threshold).int().numpy()

    # Evaluate
    acc = accuracy_score(truth['label'], pred_labels)
    report = classification_report(truth['label'], pred_labels, target_names=['Human', 'AI'])

    print(f"Accuracy: {acc:.4f}")
    print(report)

In [117]:
generate_metrics(arvix_GPT, arvix_GPT_predictions, 0.95)

Accuracy: 0.7065
              precision    recall  f1-score   support

       Human       0.79      0.56      0.66      3000
          AI       0.66      0.85      0.74      3000

    accuracy                           0.71      6000
   macro avg       0.72      0.71      0.70      6000
weighted avg       0.72      0.71      0.70      6000



In [129]:
generate_metrics(arvix_cohere, arvix_cohere_predictions, 0.95)

Accuracy: 0.5412
              precision    recall  f1-score   support

       Human       0.54      0.56      0.55      3000
          AI       0.54      0.52      0.53      3000

    accuracy                           0.54      6000
   macro avg       0.54      0.54      0.54      6000
weighted avg       0.54      0.54      0.54      6000



In [137]:
generate_metrics(reddit_GPT, reddit_GPT_predictions, 0.80)

Accuracy: 0.6818
              precision    recall  f1-score   support

       Human       0.69      0.67      0.68      3000
          AI       0.68      0.69      0.68      3000

    accuracy                           0.68      6000
   macro avg       0.68      0.68      0.68      6000
weighted avg       0.68      0.68      0.68      6000



In [146]:
generate_metrics(reddit_cohere, reddit_cohere_predictions, 0.95)

Accuracy: 0.8138
              precision    recall  f1-score   support

       Human       0.75      0.93      0.83      3000
          AI       0.91      0.69      0.79      3000

    accuracy                           0.81      6000
   macro avg       0.83      0.81      0.81      6000
weighted avg       0.83      0.81      0.81      6000



In [121]:
generate_metrics(original_data_sample, original_predictions, 0.5)

Accuracy: 0.9520
              precision    recall  f1-score   support

       Human       0.96      0.97      0.96       670
          AI       0.94      0.91      0.93       330

    accuracy                           0.95      1000
   macro avg       0.95      0.94      0.95      1000
weighted avg       0.95      0.95      0.95      1000

