In [14]:
import pandas as pd
import os
from pathlib import Path

import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

In [15]:
local_dir = Path(os.getcwd())
kaggle_dir = Path('kaggle/input/')

In [16]:
notebook_dir = local_dir

### Load data

In [17]:
data_dir = notebook_dir / 'llm-detect-ai-generated-text'
test_essays = pd.read_csv(data_dir / 'test_essays.csv')

### Make Predictions

In [18]:
MODEL_ARCHITECTURE = 'microsoft/deberta-v3-xsmall'
INPUT_LENGTH = 1024
EPOCHS = 5
BATCH_SIZE = 8

In [19]:
def predict_on_cpu(text, tokenizer, model, max_length):
    # Ensure the model is in evaluation mode and on CPU
    model.eval()
    model.to('cpu')

    inputs = tokenizer(text, return_tensors='pt', max_length=max_length, truncation=True, padding='max_length')
    inputs = {k: v.cpu() for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = model(**inputs)

    probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
    
    return probabilities

In [20]:
model = AutoModelForSequenceClassification.from_pretrained('models/test_model')
tokenizer = AutoTokenizer.from_pretrained('models/deberta_tokenizer')
for i, row in test_essays.iterrows():
    text_to_predict = row['text']
    probabilities = predict_on_cpu(text_to_predict, tokenizer, model, INPUT_LENGTH)
    test_essays.loc[i, 'generated'] = probabilities[0, 1].item()

In [23]:
text = """In a world where the hum of engines and the honk of car horns have become the background noise to our daily lives, it's time we confront the question: What if we let go of the steering wheel? Let's face it, cars, as convenient as they are, spell a trilogy of trouble‚Äîpollution, expense, and health concerns. It's high time we put our foot down, not on the gas pedal, but on the brakes of our car-centric culture.

Take Vauban, Germany, for instance, a beacon of car-less suburban life where residents' quality of life hasn't diminished; it's flourished. "When I had a car I was always tense. I'm much happier this way," says Heidrun Walter, a resident of Vauban. The simplicity of walking and cycling replaces hours of snailing in traffic and the mental stress that accompanies it. And it isn't just a single utopian example; cities worldwide are increasingly adopting similar "smart planning" approaches to build communities less dependent on cars. These methods not only promise a cleaner, quieter environment but bring back the joy of community that's often lost in the roars of engines.

Now, look at Paris, with its surreal sights often shrouded in smog so thick it chokes the romance right out of the air. The city's recent partial driving ban, while drastic, dramatically reduced congestion by 60 percent and, more importantly, cleared the pollution for a time. It's a stark reminder that the air we breathe is precious and points to the need for sustainable solutions. Even the USA, the land that practically added wheels to the American Dream, is facing a shift. Young Americans are straying away from the driver's seat, and studies suggest we might have hit the peak of our car culture.

But what does all this mean for the future? It means imagining a world with fewer parking garages and more green spaces, where the money saved on car maintenance fuels local economies or education instead. It means cleaner air and a chance for our planet to take a much-needed breath. It tells a tale of a society that puts wellness‚Äîboth environmental and personal‚Äîabove convenience.

To wrap it up, envisioning a less car-dependent world is not about stripping away freedom; it's about rediscovering it in the shared spaces between our destinations. It's about making choices that ensure our health, the environment's safety, and ultimately, the security of our wallets. The evidence is in front of us, and the road ahead is clear. Detaching from automobile dependency can lead to thriving communities, healthier lifestyles, and economies with room to grow. Let's shift gears from a car-fueled present to a sustainable, connected future. Consider this: the next time you reach for your car keys, maybe, just maybe, it's worth considering the alternative."""
predict_on_cpu(text, tokenizer, model, INPUT_LENGTH)

tensor([[4.2788e-04, 9.9957e-01]])

In [22]:
submission_df = test_essays[['id', 'generated']]
submission_df

Unnamed: 0,id,generated
0,0000aaaa,0.995194
1,1111bbbb,0.998916
2,2222cccc,0.997219
