# Loading Google Drive and fetching Data

In [141]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [142]:
import pandas as pd
import numpy as np
import json
from sklearn.metrics import cohen_kappa_score
from statsmodels.stats.inter_rater import fleiss_kappa

In [143]:
CV_Pranav=pd.read_csv("/content/drive/MyDrive/Annotations/CV_253.csv")
CV_Soham=pd.read_csv("/content/drive/MyDrive/Annotations/CV_Soham.csv")
CV_Vansh=pd.read_csv("/content/drive/MyDrive/Annotations/CV_Vansh.csv")
NLP_Pranav=pd.read_csv("/content/drive/MyDrive/Annotations/NLP_253.csv")
NLP_Soham=pd.read_csv("/content/drive/MyDrive/Annotations/soham_annotations.csv")

# Categories

In [144]:
cats = [
    "NOUN",
    "PROPN",
    "VERB",
    "ADJ",
    "ADV",
    "ADP",
    "PRON",
    "DET",
    "CONJ",
    "PART",
    "PRON_WH",
    "PART_NEG",
    "NUM",
    "X"
]

# Preprocessing of Data

In [145]:
def preprocess(data1,data2,cats):
  return_list=[[],[]]
  for i in range(len(data1)):
    dic1={}
    dic2={}
    lbl1=json.loads(data1[i])
    lbl2=json.loads(data2[i])
    for i in lbl1:
      dic1[i["text"].strip()]=i["labels"][0].strip('"')
    for i in lbl2:
      dic2[i["text"].strip()]=i["labels"][0].strip('"')
    for word,lbl1 in dic1.items():
      if word in dic2:
        lbl2=dic2[word]
        dic2.pop(word)
      else:
        lbl2="X"
      return_list[0].append(cats.index(lbl1))
      return_list[1].append(cats.index(lbl2))
    for word,lbl2 in dic2.items():
      lbl1="X"
      return_list[0].append(cats.index(lbl1))
      return_list[1].append(cats.index(lbl2))
  return return_list



# NLP Cohen's Kappa

In [146]:
prep_data= preprocess(NLP_Pranav["label"].tolist(), NLP_Soham["label"].tolist(),cats)
print("Rater Data:")
print("Pranav:")
print(prep_data[0])
print("Soham:")
print(prep_data[1])


Rater Data:
Pranav:
[0, 0, 0, 1, 1, 1, 2, 2, 2, 4, 5, 5, 6, 6, 12, 8, 13, 13, 13, 13, 13, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 3, 3, 4, 5, 5, 6, 6, 8, 13, 13, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 5, 5, 5, 5, 5, 8, 12, 13, 13, 13, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 5, 8, 12, 13, 13, 13, 13, 13, 13, 0, 0, 1, 1, 1, 2, 2, 2, 2, 5, 5, 12, 13, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 3, 5, 5, 5, 5, 6, 6, 8, 13, 0, 1, 1, 1, 1, 2, 3, 3, 5, 5, 5, 6, 13, 0, 0, 0, 1, 1, 1, 2, 3, 3, 5, 5, 5, 6, 13, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 5, 5, 5, 6, 6, 6, 13, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 3, 3, 5, 5, 6, 6, 13, 13, 13, 13, 13, 13, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 5, 5, 5, 7, 8, 12, 12, 5, 13, 13, 1, 1, 0, 0, 0, 0, 0, 0, 0, 2, 2, 3, 3, 5, 5, 5, 5, 5, 6, 7, 2, 13, 13, 13, 13, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 4, 4, 5, 5, 5, 6, 0, 9, 13, 13, 13, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 5, 5, 5, 5, 5, 5, 2, 5, 13, 13, 13, 0, 1, 1, 1, 1, 1, 2, 3, 5, 5, 12, 13, 13, 13, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 5, 5, 5, 5, 5, 

Cohen's Kappa

In [147]:
print("Cohen's Kappa",cohen_kappa_score(prep_data[0],prep_data[1]))

Cohen's Kappa 0.6338404688747817


# CV

In [148]:
def cv_prep(df1,df2,df3):
  return_list=[]
  for i in range(len(df1)):
    truck_count=0
    no_truck_count=0
    if df1[i]=="Truck":
      truck_count+=1
    else:
      no_truck_count+=1
    if df2[i]=="Truck":
      truck_count+=1
    else:
      no_truck_count+=1
    if df3[i]=="Truck":
      truck_count+=1
    else:
      no_truck_count+=1
    return_list.append([truck_count,no_truck_count])
  return return_list

In [149]:
prep_Pranav=CV_Pranav["choice"].tolist()
prep_Soham=CV_Soham["choice"].tolist()
prep_Vansh=CV_Vansh["choice"].tolist()

In [150]:
prep_list=cv_prep(prep_Pranav,prep_Soham,prep_Vansh)
prep_arr=np.array(prep_list)
f_kappa=fleiss_kappa(prep_arr,method="fleiss")

In [151]:
print(f_kappa)

0.8382749326145548


#Summary



1. Pre-Processed the annotated data of NLP. The data was annotated by Pranav Jigar Thakkar and Soham Srivastva.
2. Used the Function "cohen_kappa_score" to calculate Cohen's Kappa of NLP data.
3. Got Cohen's Kappa as 0.6338404688747817 which signifies good agreement in the labelling of annotators and a relatively consistent labeling process .
4. Pre-Processed the annotated data of CV dataset. The data was annotated by Pranav Jigar Thakkar, Soham Srivastva and Vansh.
5. Used the Function "fleiss_kappa" from "statsmodels.stats.inter_rater" to calculate Fleiss Kappa of CV data.
6. Got Cohen's Kappa as 0.8382749326145548 which suggests strong agreement in the labelling of annotators and a consistent and reliable labeling process across the dataset.




