In [2]:
import pandas as pd
import numpy as np
from sklearn.utils import shuffle
import glob

In [5]:
# 1. load your full data
df = pd.read_csv('../data/training.csv')

# 2. take a reproducible random sample of 100 rows
df100 = df.sample(n=100, random_state=42).reset_index(drop=True)

# 3. drop the existing label column
df100_nolabel = df100[["id","sentence"]]
df100_nolabel["label"] = ""        # ← empty column for your teammates

# 4. split into four 25-row chunks
chunks = np.array_split(df100_nolabel, 4)

# 5. write each chunk
for i, chunk in enumerate(chunks, start=1):
    chunk.to_csv(f"group_member_{i}_to_label.csv", index=False)

# Now you have:
#   group_member_1_to_label.csv
#   group_member_2_to_label.csv
#   group_member_3_to_label.csv
#   group_member_4_to_label.csv
#
# Each file contains only “id” and “sentence” and exactly 25 rows.

  return bound(*args, **kwds)


In [4]:
# AFTER your teammates label their 25 sentences, you’ll get back four CSVs of form:
#   id,sentence,label
# with 25 rows each. To merge them back into one “100-row with labels” CSV:
# pick up the 4 returned files
files = glob.glob("group_member_*_to_label.csv")

# read & concatenate
labeled = pd.concat((pd.read_csv(f) for f in files), ignore_index=True)

# lowercase the labels
labeled["label"] = labeled["label"].str.lower().str.strip()

# rename the teammates’ “label” column to “pred_label”
labeled = labeled.rename(columns={"label": "pred_label"})

# now save
labeled.to_csv("100_labeled_by_team.csv", index=False)

In [9]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# 1. Load the “100_labeled_by_team.csv” (your teammates’ labels + id, sentence)
pred = pd.read_csv("100_labeled_by_team.csv")  
#    columns: id, sentence, pred_label

# 2. Load the original data (which has the true “label”)
true = pd.read_csv("../data/training.csv")[["id","label"]]

# 3. Merge on “id” to align predictions and ground-truth
df = pred.merge(true, on="id", how="left")
#    df now has columns: id, sentence, pred_label, label

# 4. Compute simple accuracy
acc = accuracy_score(df["label"], df["pred_label"])
print(f"Accuracy: {acc:.2%}")
# 5. (Optional) Get precision/recall/F1 per class
print("\nClassification Report:")
print(classification_report(df["label"], df["pred_label"], zero_division=0))

# 6. (Optional) Confusion matrix
cm = confusion_matrix(df["label"], df["pred_label"], labels=["negative","neutral","positive"])
cm_df = pd.DataFrame(cm, index=["true_neg","true_neu","true_pos"], 
                         columns=["pred_neg","pred_neu","pred_pos"])
print("\nConfusion Matrix:")
print(cm_df)

Accuracy: 62.00%

Classification Report:
              precision    recall  f1-score   support

    negative       0.46      0.71      0.56        17
     neutral       0.71      0.62      0.66        55
    positive       0.62      0.57      0.59        28

    accuracy                           0.62       100
   macro avg       0.60      0.63      0.60       100
weighted avg       0.64      0.62      0.62       100


Confusion Matrix:
          pred_neg  pred_neu  pred_pos
true_neg        12         5         0
true_neu        11        34        10
true_pos         3         9        16


In [18]:
print(df)

       id                                           sentence pred_label  \
0   84457                                       she blurted.   negative   
1   33315               $10 is the minimum tip they ask for.   negative   
2   95755            Once arriving home, we opened our food.    neutral   
3   99353  Please take my word for this: I eat some grimy...   positive   
4   23628  I call him at 9:48 and hey said they were on t...    neutral   
..    ...                                                ...        ...   
95  64678  I'll make sure to walk a block south over to S...   positive   
96  21666  I will admit that the food is all presented ve...   positive   
97  82393  They kind of reminded me of the ones served at...    neutral   
98  29891  A friend and I decided to come here for dinner...   positive   
99  64529                           My truck didnt start up.   negative   

       label  
0    neutral  
1    neutral  
2    neutral  
3   positive  
4    neutral  
..       