In [None]:
!pip install transformers
!pip install tensorflow
!pip install json
!pip install ijson
!pip install datasets
!pip install evaluate

In [None]:
from transformers import LongformerTokenizer, TFBertForSequenceClassification, TFLongformerForSequenceClassification
import tensorflow as tf
import json
import ijson
import evaluate
import numpy as np
from transformers.keras_callbacks import KerasMetricCallback

In [None]:
tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096')
id2label = {0: "Neteisingas", 1: "Teisingas"}
label2id = {"Neteisingas": 0, "Teisingas": 1}
model = TFLongformerForSequenceClassification.from_pretrained(
    'allenai/longformer-base-4096', num_labels=2, id2label=id2label, label2id=label2id
  )

NEW DATASET CREATION

In [None]:
def create_dataset_from_json(json_path, tokenizer, batch_size):
    with open(json_path, 'r') as file:
        data = json.load(file)

    def gen():
        for item in data:
            tokenized_input = tokenizer(
                item['input'],
                truncation=True,
                padding='longest',
                return_tensors='tf'
            )
            yield {'input_ids': tokenized_input['input_ids'][0],
                   'attention_mask': tokenized_input['attention_mask'][0]}, item['label']

    full_dataset = tf.data.Dataset.from_generator(
        gen,
        output_signature=(
            {'input_ids': tf.TensorSpec(shape=(None,), dtype=tf.int32),
             'attention_mask': tf.TensorSpec(shape=(None,), dtype=tf.int32)},
            tf.TensorSpec(shape=(), dtype=tf.int32)
        )
    )

    full_dataset = full_dataset.shuffle(buffer_size=len(data)).padded_batch(batch_size)

    return full_dataset

In [None]:
batch_size = 16
train_dataset_1024 = create_dataset_from_json('./drive/MyDrive/Utils/train_data.json', tokenizer, batch_size)
test_dataset_1024 = create_dataset_from_json('./drive/MyDrive/Utils/test_data.json', tokenizer, batch_size)
val_dataset_1024 = create_dataset_from_json('./drive/MyDrive/Utils/val_data.json', tokenizer, batch_size)

In [None]:
num_epochs=5
optimizer = tf.keras.optimizers.Adam(learning_rate=2e-5)
sparce_categorical_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
precision = tf.keras.metrics.Precision(name='precision')
recall = tf.keras.metrics.Recall(name='recall')
model.compile(
              optimizer=optimizer,
              metrics=[sparce_categorical_accuracy, precision, recall]
              )

In [None]:
model.fit(
          train_dataset_1024,
          validation_data=val_dataset_1024,
          epochs=num_epochs,
          verbose=1,
          )

In [None]:
model.evaluate(test_dataset_1024)

In [None]:
model_save_path = './drive/MyDrive/saved_longformer_model'
model_save_path_hugging = './drive/MyDrive/saved_longformer_model_hugging'
model.save(model_save_path)
model.save_pretrained(model_save_path_hugging)

In [None]:
from tensorflow.keras.models import load_model

model = load_model(model_save_path)

In [None]:
model_path = './drive/MyDrive/saved_longformer_model_hugging'
model = TFLongformerForSequenceClassification.from_pretrained(model_path)

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=2e-5)
sparce_categorical_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
precision = tf.keras.metrics.Precision(name='precision')
recall = tf.keras.metrics.Recall(name='recall')
model.compile(
              optimizer=optimizer,
              metrics=[sparce_categorical_accuracy]
              )

In [None]:
full_dataset_complex = create_dataset_from_json('./drive/MyDrive/utils/complex-data.json', tokenizer, batch_size)

In [None]:
model.evaluate(full_dataset_complex)

In [None]:
train_size = int(0.70 * 23)
val_size = int(0.15 * 23)
test_size = 23 - train_size - val_size

train_dataset_complex = full_dataset_complex.take(train_size)
test_dataset_complex = full_dataset_complex.skip(train_size)
val_dataset_complex = test_dataset.skip(test_size)
test_dataset_complex = test_dataset.take(test_size)

In [None]:
model.fit(
          train_dataset_complex,
          validation_data=val_dataset_complex,
          epochs=num_epochs,
          verbose=1,
          )

In [None]:
model.evaluate(test_dataset_complex)

FEEDBACK GENERATION

In [None]:
!pip install openai

In [None]:
from google.colab import drive
drive.mount('/content/drive')

CHAT GPT

In [None]:
from openai import OpenAI

In [None]:
client = OpenAI(api_key='')

file_path = './drive/MyDrive/utils/GPTPrompt.txt'
with open(file_path, 'r') as file:
  prompt = file.read().strip()

def get_gpt_response(user_input):
    try:
      response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "user", "content": prompt},
            {"role": "user", "content": user_input}
        ]
      )
      return response.choices[0].message.content
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

In [None]:
json_path = './drive/MyDrive/utils/complex-data.json'
with open(json_path, 'r') as file:
        complex_inputs = json.load(file)

In [None]:
def get_user_request (data):
  user_request = data['input'].replace("[CLS]", "")
  user_request = user_request.replace("</s>", "#")
  processed_data = {
      'user_request': user_request,
      'evaluation': data['label']
  }
  return processed_data;

In [None]:
to_be_saved = []
for complex_input in complex_inputs:
  final = get_user_request(complex_input)
  response = get_gpt_response(str(final))
  print(response)
  final['feedback'] = response;
  to_be_saved.append(final)

filename = 'gptFeedback.json'
with open(filename, 'w') as file:
    json.dump(to_be_saved, file, indent=4)

GEMINI

In [None]:
!pip install --upgrade google-cloud-aiplatform

In [None]:
from google.colab import auth
from vertexai.preview.generative_models import GenerativeModel
import vertexai
auth.authenticate_user()
vertexai.init(project='mythical-zodiac-422612-h3')

In [None]:
gemini = GenerativeModel("gemini-1.0-pro")
file_path = './drive/MyDrive/utils/GPTPrompt.txt'
user_query = str(get_user_request(false_input))
with open(file_path, 'r') as file:
  prompt = file.read().strip()

In [None]:
to_be_saved = []
for complex_input in complex_inputs:
  final =  get_user_request(complex_input)
  response = gemini.generate_content([prompt, str(final)])
  print(response.text)
  final['feedback'] = response.text;
  to_be_saved.append(final)

filename = 'geminiFeedback.json'
with open(filename, 'w') as file:
    json.dump(to_be_saved, file, indent=4)