In [1]:
api_key="AIzaSyD2kihDZELKb2JAk-uXri-GI2lhL2GuR34"

In [3]:
import google.generativeai as genai
from datasets import load_dataset ,Dataset ,DatasetDict

import torch
from torch.utils.data import DataLoader
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

import time
import random
import re
import copy
from tqdm.auto import tqdm

In [4]:
data_files={"data":"dataset.json"}
raw_dataset=load_dataset("json" ,data_files=data_files)
raw_dataset

Generating data split: 0 examples [00:00, ? examples/s]

DatasetDict({
    data: Dataset({
        features: ['paper_id', 'text', 'label'],
        num_rows: 150
    })
})

In [5]:
train_data = [item for item in raw_dataset['data'] if item['paper_id'].startswith('R')]
test_data = [item for item in raw_dataset['data'] if item['paper_id'].startswith('P')]

train_dataset = Dataset.from_dict({
    'paper_id': [item['paper_id'] for item in train_data],
    'text': [item['text'] for item in train_data],
    'label': [item['label'] for item in train_data]
})

test_dataset = Dataset.from_dict({
    'paper_id': [item['paper_id'] for item in test_data],
    'text': [item['text'] for item in test_data],
})

final_raw_dataset = DatasetDict({
    'reference': train_dataset.shuffle(seed=42),
    'papers': test_dataset.shuffle(seed=42),
})
final_raw_dataset
final_raw_dataset

DatasetDict({
    reference: Dataset({
        features: ['paper_id', 'text', 'label'],
        num_rows: 15
    })
    papers: Dataset({
        features: ['paper_id', 'text'],
        num_rows: 135
    })
})

## Task 1

In [6]:
raw_dataset_for_task1 = copy.deepcopy(final_raw_dataset)

def label_transform(example):
    example['label'] = 0 if example['label'] == 'Non-Publishable' else 1
    return example

raw_dataset_for_task1['reference']=raw_dataset_for_task1['reference'].map(label_transform)
raw_dataset_for_task1

Map:   0%|          | 0/15 [00:00<?, ? examples/s]

DatasetDict({
    reference: Dataset({
        features: ['paper_id', 'text', 'label'],
        num_rows: 15
    })
    papers: Dataset({
        features: ['paper_id', 'text'],
        num_rows: 135
    })
})

In [None]:
raw_dataset_for_task1['reference']['label'][0]

1

In [None]:
def generate_optimized_prompt(model_id, user_prompt):
 genai.configure(api_key=api_key)
 model=genai.GenerativeModel(model_name=model_id)
 response=model.generate_content(user_prompt)
 time.sleep(5)
 return response.text

In [None]:
def score_paper(paper_text, sub_task, model_id, max_retries=5, delay=5):

    prompt = f"""
    Sub-task: {sub_task}

    Evaluate the following research papers for content text for its {sub_task} on a scale of 1 to 10 (You can include fractional too).
    Just give scores. No need for explanation.

    Paper Text:
      {paper_text}

    Please provide a score on a scale from 1 to 10, with no further explanation or text.
    Just the score:
    """

    attempt = 0
    while attempt < max_retries:
        try:

            response = generate_optimized_prompt(model_id, prompt)


            if response is None:
                print(f"Attempt {attempt + 1}: No response received for subtask: {sub_task}. Retrying...")
                attempt += 1
                time.sleep(delay)
                continue


            if "error" in response:
                print(f"Attempt {attempt + 1}: Error in API response: {response['error']}. Retrying...")
                attempt += 1
                time.sleep(delay)
                continue


            match = re.search(r"(\d+(\.\d+)?)", response)
            if match:
                score = float(match.group(1))
                return score


            print(f"Attempt {attempt + 1}: Could not extract a score from response: {response}. Retrying...")
            attempt += 1
            time.sleep(delay)
        except Exception as e:
            print(f"Attempt {attempt + 1}: Exception occurred: {e}. Retrying...")
            attempt += 1
            time.sleep(delay)


    print(f"All {max_retries} attempts failed for subtask: {sub_task}. Returning -1.")
    return -1


In [None]:
def generate_scores(paper_text, model_id="gemini-1.5-flash"):
  scores={}
  criteria_dict={
      "Originality" : """"Criteria :
                          - How novel is the approach presented in this paper?
                          - Does the paper introduce new methods or significantly improve existing ones?
                          - Can you identify any gaps or limitations in the related work section?""" ,
      "Methodology" : """Criteria :
                          - How well-structured and transparent are the experimental design, data collection methods, and analysis procedures?
                          - Are the results accurately interpreted and supported by the data?
                          - Can you identify any potential biases or methodological flaws?""",
      "Clarity and Concision" : """Criteria :
                                    - How clear and concise is the writing style throughout the paper?
                                    - Are the figures, tables, and illustrations effectively used to communicate complex information?
                                    - Can you easily follow the logical flow of the arguments presented in the paper?""",
      "Impact" : """Criteria :
                      - How significant are the implications of the findings for practice, policy, or future research?
                      - Does the paper address a pressing problem or gap in the field?
                      - Can you identify any potential applications or extensions of the research?""",
      "Novel application or extension" : """Criteria :
                      - How effectively does the paper apply existing methods to a new domain, problem, or dataset?
                      - Does the paper extend current techniques in meaningful ways?
                      - Can you identify any potential limitations or areas for future work?
      """

      }
  for sub_task,criteria in criteria_dict.items():
    score_response = score_paper(paper_text ,sub_task, model_id)
    scores[sub_task]=score_response

  print(scores)
  return scores

In [None]:
model_id="gemini-1.5-flash"

In [None]:
data_scores=[]
for paper_text in tqdm(raw_dataset_for_task1['reference']['text']):
    scores=generate_scores(paper_text,model_id)
    data_scores.append(scores)

  0%|          | 0/15 [00:00<?, ?it/s]

{'Originality': 7.5, 'Methodology': 8.5, 'Clarity and Concision': 7.5, 'Impact': 8.5, 'Novel application or extension': 8.5}
{'Originality': 8.5, 'Methodology': 8.5, 'Clarity and Concision': 6.5, 'Impact': 8.5, 'Novel application or extension': 7.5}
{'Originality': 8.5, 'Methodology': 8.5, 'Clarity and Concision': 7.5, 'Impact': 8.5, 'Novel application or extension': 8.5}
{'Originality': 8.5, 'Methodology': 8.5, 'Clarity and Concision': 6.5, 'Impact': 9.5, 'Novel application or extension': 8.5}
{'Originality': 7.5, 'Methodology': 8.5, 'Clarity and Concision': 6.5, 'Impact': 8.5, 'Novel application or extension': 8.5}
{'Originality': 3.5, 'Methodology': 8.5, 'Clarity and Concision': 3.5, 'Impact': 8.5, 'Novel application or extension': 8.5}
{'Originality': 3.5, 'Methodology': 3.5, 'Clarity and Concision': 3.5, 'Impact': 3.5, 'Novel application or extension': 8.5}
{'Originality': 1.0, 'Methodology': 1.0, 'Clarity and Concision': 1.0, 'Impact': 1.0, 'Novel application or extension': 1.0}


In [None]:
data_scores

[{'Originality': 7.5,
  'Methodology': 8.5,
  'Clarity and Concision': 6.5,
  'Impact': 8.5,
  'Novel application or extension': 8.5},
 {'Originality': 8.5,
  'Methodology': 8.5,
  'Clarity and Concision': 6.5,
  'Impact': 8.5,
  'Novel application or extension': 7.5},
 {'Originality': 8.5,
  'Methodology': 8.5,
  'Clarity and Concision': 7.5,
  'Impact': 9.5,
  'Novel application or extension': 8.5},
 {'Originality': 8.5,
  'Methodology': 8.5,
  'Clarity and Concision': 6.5,
  'Impact': 9.5,
  'Novel application or extension': 8.5},
 {'Originality': 7.5,
  'Methodology': 8.5,
  'Clarity and Concision': 6.5,
  'Impact': 9.5,
  'Novel application or extension': 8.5},
 {'Originality': 3.5,
  'Methodology': 8.5,
  'Clarity and Concision': 3.5,
  'Impact': 8.5,
  'Novel application or extension': 8.5},
 {'Originality': 3.5,
  'Methodology': 3.5,
  'Clarity and Concision': 4.5,
  'Impact': 3.5,
  'Novel application or extension': 8.5},
 {'Originality': 1.0,
  'Methodology': 1.0,
  'Clarity 

In [None]:
import pandas as pd
import numpy as np

reference_dataset=raw_dataset_for_task1['reference']


training_data_scores = {
    "paper_id": reference_dataset['paper_id'],
    "is_publishable": np.zeros((len(reference_dataset['text']),), dtype=bool),
    "Originality": np.zeros((len(reference_dataset['text']),)),
    "Methodology": np.zeros((len(reference_dataset['text']),)),
    "Clarity and Concision": np.zeros((len(reference_dataset['text']),)),
    "Impact": np.zeros((len(reference_dataset['text']),)),
    "Novel application or extension": np.zeros((len(reference_dataset['text']),))
}

for i in range(len(reference_dataset['text'])):
    training_data_scores['paper_id'][i] = reference_dataset['paper_id'][i]
    training_data_scores['is_publishable'][i] = reference_dataset['label'][i]
    for sub_task,score in data_scores[i].items():
        training_data_scores[sub_task][i] = score

training_data_df = pd.DataFrame(training_data_scores)
print(training_data_df)

   paper_id  is_publishable  Originality  Methodology  Clarity and Concision  \
0      R012            True          7.5          8.5                    7.5   
1      R013            True          8.5          8.5                    6.5   
2      R007            True          8.5          8.5                    7.5   
3      R015            True          8.5          8.5                    6.5   
4      R011            True          7.5          8.5                    6.5   
5      R005           False          3.5          8.5                    3.5   
6      R004           False          3.5          3.5                    3.5   
7      R001           False          1.0          1.0                    1.0   
8      R008            True          7.5          9.5                    7.5   
9      R014            True          8.5          7.5                    7.5   
10     R009            True          7.5          8.5                    6.5   
11     R002           False          1.0

In [None]:
df=training_data_df

In [None]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(df['is_publishable'])
df['is_publishable'] = encoded_labels
df.head(1)

Unnamed: 0,paper_id,is_publishable,Originality,Methodology,Clarity and Concision,Impact,Novel application or extension
0,R012,1,7.5,8.5,7.5,8.5,8.5


In [None]:
df2=df

In [None]:
df2.shape

(15, 7)

In [None]:
X = df2.drop(columns=['paper_id','is_publishable'])
y = df2['is_publishable']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf_classifier = RandomForestClassifier(n_estimators=50, random_state=42)

rf_classifier.fit(X_train, y_train)

y_pred = rf_classifier.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Accuracy: 1.0

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         2

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3


Confusion Matrix:
[[1 0]
 [0 2]]


In [None]:
from joblib import dump

dump(rf_classifier, 'decisionclassifier.joblib')

['decisionclassifier.joblib']

In [None]:
raw_dataset_for_task1['papers']

Dataset({
    features: ['paper_id', 'text'],
    num_rows: 135
})

In [None]:
data_scores_unlabel=[]
for paper_text in tqdm(raw_dataset_for_task1['papers']['text']):
    scores=generate_scores(paper_text,model_id)
    data_scores_unlabel.append(scores)

  0%|          | 0/135 [00:00<?, ?it/s]

{'Originality': 7.5, 'Methodology': 9.5, 'Clarity and Concision': 6.5, 'Impact': 9.5, 'Novel application or extension': 8.5}
{'Originality': 3.0, 'Methodology': 3.5, 'Clarity and Concision': 3.5, 'Impact': 7.5, 'Novel application or extension': 8.5}
{'Originality': 6.5, 'Methodology': 3.5, 'Clarity and Concision': 6.5, 'Impact': 7.5, 'Novel application or extension': 3.0}
{'Originality': 7.5, 'Methodology': 7.5, 'Clarity and Concision': 6.5, 'Impact': 8.5, 'Novel application or extension': 7.5}
{'Originality': 2.5, 'Methodology': 7.5, 'Clarity and Concision': 3.5, 'Impact': 7.5, 'Novel application or extension': 7.5}
{'Originality': 7.5, 'Methodology': 7.5, 'Clarity and Concision': 7.5, 'Impact': 7.5, 'Novel application or extension': 7.5}
{'Originality': 7.5, 'Methodology': 3.5, 'Clarity and Concision': 3.5, 'Impact': 7.5, 'Novel application or extension': 9.5}
{'Originality': 3.5, 'Methodology': 3.5, 'Clarity and Concision': 4.5, 'Impact': 7.5, 'Novel application or extension': 3.5}


In [None]:
reference_dataset_unlabel=raw_dataset_for_task1['papers']


unlabeled_data_scores = {
    "paper_id": reference_dataset_unlabel['paper_id'],
    "Originality": np.zeros((len(reference_dataset_unlabel['text']),)),
    "Methodology": np.zeros((len(reference_dataset_unlabel['text']),)),
    "Clarity and Concision": np.zeros((len(reference_dataset_unlabel['text']),)),
    "Impact": np.zeros((len(reference_dataset_unlabel['text']),)),
    "Novel application or extension": np.zeros((len(reference_dataset_unlabel['text']),))
}

for i in range(len(reference_dataset_unlabel['text'])):
    unlabeled_data_scores['paper_id'][i] = reference_dataset_unlabel['paper_id'][i]
    for sub_task,score in data_scores_unlabel[i].items():
        unlabeled_data_scores[sub_task][i] = score

unlabeled_data_df = pd.DataFrame(unlabeled_data_scores)
unlabeled_data_df.head(10)

Unnamed: 0,paper_id,Originality,Methodology,Clarity and Concision,Impact,Novel application or extension
0,P128,7.5,9.5,6.5,9.5,8.5
1,P132,3.0,3.5,3.5,7.5,8.5
2,P085,6.5,3.5,6.5,7.5,3.0
3,P120,7.5,7.5,6.5,8.5,7.5
4,P107,2.5,7.5,3.5,7.5,7.5
5,P065,7.5,7.5,7.5,7.5,7.5
6,P078,7.5,3.5,3.5,7.5,9.5
7,P035,3.5,3.5,4.5,7.5,3.5
8,P111,8.5,8.5,7.5,9.5,9.5
9,P123,8.5,9.5,6.5,9.5,8.5


In [None]:
unlabeled_data_df.shape

(135, 6)

In [None]:
df4= unlabeled_data_df.drop(columns=['paper_id'])

In [None]:
y_pred_unlabeled=rf_classifier.predict(df4)
unlabeled_data_df['is_publishable_pred']=y_pred_unlabeled


In [None]:
count_0 = np.count_nonzero(y_pred_unlabeled == 0)
count_1 = np.count_nonzero(y_pred_unlabeled == 1)
print(f"Number of publishible papers predicted {count_1}")
print(f"Number of non-publishible papers predicted {count_0}")

Number of publishible papers predicted 88
Number of non-publishible papers predicted 47


In [None]:
unlabeled_data_df.head(10)

Unnamed: 0,paper_id,Originality,Methodology,Clarity and Concision,Impact,Novel application or extension,is_publishable_pred
0,P128,7.5,9.5,6.5,9.5,8.5,1
1,P132,3.0,3.5,3.5,7.5,8.5,0
2,P085,6.5,3.5,6.5,7.5,3.0,1
3,P120,7.5,7.5,6.5,8.5,7.5,1
4,P107,2.5,7.5,3.5,7.5,7.5,0
5,P065,7.5,7.5,7.5,7.5,7.5,1
6,P078,7.5,3.5,3.5,7.5,9.5,0
7,P035,3.5,3.5,4.5,7.5,3.5,0
8,P111,8.5,8.5,7.5,9.5,9.5,1
9,P123,8.5,9.5,6.5,9.5,8.5,1


In [None]:
results_df=unlabeled_data_df[['paper_id','is_publishable_pred']]

In [None]:
path_saving="/content/drive/My Drive/Colab Notebooks"
unlabeled_data_df.to_csv(path_saving+"/unlabeled_data_df.csv",index=False)
results_df.to_csv(path_saving+"/results_df.csv",index=False)