# Predictions

## Imports

In [1]:
import joblib
import numpy as np
import pandas as pd
import json
import wandb

## Functions

In [2]:
def load_models():    
    with open('../models/modelv2.pkl', 'rb') as model_file:
        model = joblib.load(model_file)

    # Carregar o vectorizer
    with open('../models/vectorizer.pkl', 'rb') as vectorizer_file:
        vectorizer = joblib.load(vectorizer_file)
    return model,vectorizer

In [3]:
def predict(text):
    model, vectorizer = load_models()

    X = vectorizer.transform([text])

    if hasattr(X, "toarray"):  # Verifica se o método 'toarray' exist
        X = X.toarray()

    prediction = model.predict(X)
    return prediction


## Predicting

In [4]:
test = pd.read_csv('../data/preprocessed/test.csv')

In [5]:
test.head()

Unnamed: 0,text,label,clean_text,prompt
0,""" What's wrong with the Electoral College?"" To...",0,I write this letter in favor to inform you tha...,
1,"Hey, guys! So, for this essay, I had to resear...",1,"So, basically, praise is like this super power...",
2,Seeking multiple opinions when asking for advi...,1,"Secondly, discussing your dilemma with more th...","When people ask for advice, they sometimes tal..."
3,That some schools offer the option for student...,0,Online classes. This is one of the option for ...,Task: \n\n1. Research the advantages and disad...
4,Some people may view car limitations as bad or...,0,Pollution is a major problem in big cities lik...,


In [6]:
predictions = []
test_data = test.sample(n=500)
for text in test_data['clean_text']:
    predictions.append(predict(text))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72

In [7]:
comparision_data = test_data[:500]
comparision_data["predictions"] = [np.round(pred) for pred in predictions]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  comparision_data["predictions"] = [np.round(pred) for pred in predictions]


In [18]:
comparision_data.head()

Unnamed: 0,text,label,clean_text,prompt,predictions
3825,"Hey there! So, you wanna know about this whol...",1,"So, you know how sometimes you're like, ""Ugh, ...",Task: \n\nInvestigate the potential benefits o...,[[1.0]]
2274,I think driverless cars would be a very cool t...,0,"But your paying $500,000 for some thing that d...",,[[0.0]]
4110,Students should be able to design their own Su...,0,Students had to do the work that teachers desi...,,[[0.0]]
1595,Online classes have been a hot topic in educat...,1,One of the biggest advantages of online classe...,,[[1.0]]
102,The author brings up many great ideas and fact...,0,I dont think we should travel to venus my reas...,,[[0.0]]


## Weights and Biases

In [11]:
with open("../wandb.json", "r") as f:
    params = json.load(f)

In [12]:
!wandb login

wandb: Currently logged in as: pedro_miguel (pedro_miguel-universidade-federal-do-rio-grande-do-norte). Use `wandb login --relogin` to force relogin


In [13]:
run = wandb.init(project=params["WANDB_PROJECT"])

[34m[1mwandb[0m: Currently logged in as: [33mpedro_miguel[0m ([33mpedro_miguel-universidade-federal-do-rio-grande-do-norte[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


In [15]:
predictions_table = wandb.Table(dataframe=comparision_data)

In [16]:
run.log({"Predictions Table": predictions_table})

In [17]:
run.finish()