In [None]:
# MIT License Copyright (c) 2024 Luis Axel Núñez Quintana
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
# OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

# Text classification with DistilBERT

## 0.- Dependencies

### 0.1- Downloads

In [None]:
import os

In [None]:
if not os.path.exists('demo_utils.py'):
  !wget https://raw.githubusercontent.com/LuisAxel/AI-Generated-Text-Catcher/main/demo_utils.py
if not os.path.exists('vocab_distilbert_writings.bin'):
  !wget https://github.com/LuisAxel/AI-Generated-Text-Catcher/raw/main/models/vocab_distilbert_writings.bin
if not os.path.exists('pytorch_distilbert_writings.bin'):
  !wget https://github.com/LuisAxel/AI-Generated-Text-Catcher/raw/main/models/pytorch_distilbert_writings.bin

### 0.2- Libraries

In [None]:
from demo_utils import load_model_and_tokenizer, DistilBERTClass, test_essay
import torch
import warnings
warnings.simplefilter('ignore')

## 1.- Loading model and tokenizer

In [None]:
vocab_file = 'vocab_distilbert_writings.bin'
model_file = 'pytorch_distilbert_writings.bin'

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


In [None]:
model, tokenizer = load_model_and_tokenizer(device, model_file, vocab_file)
model.to(device)
model.eval()

DistilBERTClass(
  (l1): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
            (lin1): Linear(in

## 2.- Using the model

Copy the text to be tested in the following section, copy it between the """**triple quotes**""""

In [None]:
essay = """They Kicked the boy out of his band and since it was the only thing he had as a job he was forced to look for a job in a school but he had no other speciality tan music so he had to pretend to be a math/sience teacher So he could work as a "teacher”. One day he saw that his students played instruments so well, and that's where it started all Would he prefer to teach them math/sience or music? After posing as a teacher and seeng how well his students played music, he decided to descover each one ability, literally this kids can play rock music.Maybe he thought “If they kicked me out of of the band I was in, why don't I make my own band"? Or maybe he just did it because music was his passion. But he has to continue keeping the secret were they going to find out what he did? what would happen if they did? The students were confused out first, but then they started to like it and with their imagination and ideas they create some Incredible things, each one has incredible qualities that can be seen during the movie, that "teacher" was crazy!! and everything is going well until... of course there has to be a problem, I think he didn't fully think about what parents say, or what EVERYONE Would say when they discovered what he was hiding. But when they demonstrated their talent a stage after secretly entering to on a a competition, the parents realized their children talent and had no problems."""
essay

'They Kicked the boy out of his band and since it was the only thing he had as a job he was forced to look for a job in a school but he had no other speciality tan music so he had to pretend to be a math/sience teacher So he could work as a "teacher”. One day he saw that his students played instruments so well, and that\'s where it started all Would he prefer to teach them math/sience or music? After posing as a teacher and seeng how well his students played music, he decided to descover each one ability, literally this kids can play rock music.Maybe he thought “If they kicked me out of of the band I was in, why don\'t I make my own band"? Or maybe he just did it because music was his passion. But he has to continue keeping the secret were they going to find out what he did? what would happen if they did? The students were confused out first, but then they started to like it and with their imagination and ideas they create some Incredible things, each one has incredible qualities that 

In [None]:
pred = test_essay(essay, model, tokenizer, 20, device)
print([tensor.item() for tensor in pred])

[0.987672746181488, 0.4002665877342224, 0.9660235047340393, 0.9878237247467041, 0.9740121960639954, 0.977770984172821, 0.9882457256317139, 0.920466423034668, 0.9891541600227356, 0.4550679624080658, 0.9806810021400452, 0.9547611474990845, 0.4412694573402405, 0.543067991733551]


In [None]:
average_prediction = sum(pred) / len(pred)
percentage = average_prediction * 100
print(f"Predicted percentages:\nAuthentic:{percentage:.2f}% Generated: {100 - percentage:.2f}%")
print("holi")

Predicted percentages:
Authentic:82.62% Generated: 17.38%
