<a href="https://colab.research.google.com/github/Ishan1440/Survey-Response-Quality-Analysis/blob/main/Response_Analysis_NLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
sample_filepath = "/content/drive/MyDrive/sampleData_a.csv"

In [None]:
df = pd.read_csv(sample_filepath)
# df.head()

In [None]:
oe_cols = df.columns[2:-1]
df_oe = df[oe_cols]
# df_oe.head()
oe_cols

Index(['Q16A. What is the most important thing you LIKE about the shown concept}?     This can include anything you would want kept for sure or aspects that might drive you to buy or try it…       Please type a detailed response in the space below',
       'Q16B. What is the most important thing you DISLIKE about the shown concept}?    This can include general concerns, annoyances, or any aspects of the product that need fixed for this to be more appealing to you...     Please type a detailed response in the space below.',
       'Q18_1 What specific product that you are currently using would the shown product replace?\n Please type in ONE specific brand or product per space provided.',
       'Q18_2 What specific product that you are currently using would the shown concept replace?\n Please type in ONE specific brand or product per space provided.',
       'Q18_3 What specific product that you are currently using would the shown concept replace?\n Please type in ONE specific brand or 

Computing Weights for the oe questions

In [None]:
import google.generativeai as genai
from google.colab import userdata
genai.configure(api_key=userdata.get('gem_key1'))

In [None]:
def evaluate_question_importance(questions):
    prompt = f"""
    You are a survey expert tasked with evaluating the importance of open-ended questions.
    Consider the following questions:
    {questions}

    Assign a weight to each question based on its potential impact on the overall quality of survey responses.
    A higher weight indicates a greater impact on quality.
    Consider factors such as the depth of thought required, the richness of information provided, and the potential for revealing insights.

    Between 0 and 1, where 1 means the question is highly effective in determining response quality and 0 means it provides no value, how would you rate its importance?
    Provide the weights as a JSON list of floating point numbers of precision upto 3 , where each number corresponds to the weight of the question in the same order as provided.
    Dont provide any chain of reasoning, also dont want any output formatting and carefully weigh the question based on the above criteria as a floating point number upto 3 decimal places at least.
    """

    model = genai.GenerativeModel('gemini-1.5-pro')
    response = model.generate_content(prompt)
    return eval(response.text.strip())

In [None]:
weights = evaluate_question_importance(df_oe.columns)
weights

[0.9, 0.85, 0.6, 0.6, 0.6]

In [None]:
r, c = df_oe.shape

In [None]:
from transformers import pipeline

# Load the fine-tuned model
classifier = pipeline("text-classification", model="/content/drive/MyDrive/fine-tuned-model")

Device set to use cuda:0


In [None]:
# import torch
# from transformers import BertTokenizer, BertForSequenceClassification

# # Load model and tokenizer
# model_path = "/content/drive/MyDrive/superficiality_model"
# tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')  # Load base tokenizer
# model = BertForSequenceClassification.from_pretrained(model_path).to("cuda")

In [None]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification

# Load model and tokenizer
model_path = "/content/drive/MyDrive/superficiality_model"
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')  # Load base tokenizer

# Load the model with the correct number of classes and ignoring mismatched sizes
model = BertForSequenceClassification.from_pretrained(model_path, num_labels=1, ignore_mismatched_sizes=True).to("cuda")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [None]:
def predict_superficiality(question: str, answer: str) -> float:
    # Combine question and answer
    text_pair = [question, answer]

    # Tokenize with proper parameters
    inputs = tokenizer(
        text_pair[0],
        text_pair[1],
        return_tensors="pt",
        padding="max_length",
        truncation=True,
        max_length=512
    ).to("cuda")  # Ensure inputs match model device

    # Inference
    with torch.no_grad():
        outputs = model(**inputs)

    # Convert raw logits to 0-1 score
    score = torch.sigmoid(outputs.logits).item()
    return round(score, 4)

In [None]:
def evaluate_plagscore(text):
    prompt = f"""
    You are an AI assistant that evaluates the plagiarism score of a given text.
    Analyze the text and provide a plagiarism score as a floating-point number between 0 and 1, where:
    - 0.0 means completely original.
    - 1.0 means fully plagiarized.

    Text: {text}

    Provide the score **only as a floating-point number**.
    """

    model = genai.GenerativeModel('gemini-1.5-flash')
    response = model.generate_content(prompt)

    # Extract and convert response to float
    try:
        plag_score = float(response.text.strip())
    except ValueError:
        plag_score = None  # Handle unexpected output

    return plag_score

In [None]:
import re

In [None]:
def detect_ai_content(text):
    """
    Calculate AI generation probability score (0-1) for text responses.

    Parameters:
    text (str): Text response to analyze

    Returns:
    float: Score between 0-1 where 0 = likely human, 1 = likely AI
    """
    # Handle non-text or empty inputs
    if not isinstance(text, str) or not text.strip():
        return 0.0

    text = text.strip()
    score = 0.0

    # 1. Basic text characteristics
    word_count = len(text.split())
    char_count = len(text)

    # Very short responses are likely human
    if word_count <= 5:
        score += 0.1
    elif word_count <= 15:
        score += 0.2
    elif word_count <= 30:
        score += 0.3
    else:
        score += 0.4  # Longer responses more likely AI

    # 2. Grammar and sentence structure
    sentences = [s.strip() for s in re.split(r'[.!?]+', text) if s.strip()]

    # Check for proper capitalization at start of sentences
    if sentences:
        proper_caps = sum(1 for s in sentences if s and s[0].isupper())
        score += 0.1 * (proper_caps / len(sentences))

    # Check for punctuation ending sentences
    if re.search(r'[.!?]$', text):
        score += 0.05

    # 3. Human indicators (reduce score)
    # Check for typos, slang, informal language
    informal_patterns = [
        r'\b[uU][rR]\b',  # "ur" instead of "your"
        r'\bcuz\b',        # "cuz" instead of "because"
        r'\bidk\b',        # "idk"
        r'\blol\b',        # "lol"
        r'\btbh\b',        # "tbh"
        r'\bomg\b'         # "omg"
    ]

    informal_count = sum(1 for pattern in informal_patterns if re.search(pattern, text.lower()))
    score -= min(0.2, informal_count * 0.05)

    # Check for all caps words (emphasis)
    all_caps = len(re.findall(r'\b[A-Z]{3,}\b', text))
    score -= min(0.15, all_caps * 0.05)

    # Check for excessive punctuation
    excessive_punct = len(re.findall(r'[!?]{2,}', text))
    score -= min(0.1, excessive_punct * 0.05)

    # 4. AI indicators
    # Check for balanced perspectives
    positive_words = ['good', 'great', 'best', 'excellent', 'love', 'reliable', 'recommend']
    negative_words = ['bad', 'expensive', 'issues', 'problem', 'poor', 'sucks', 'terrible']

    has_positive = any(word in text.lower() for word in positive_words)
    has_negative = any(word in text.lower() for word in negative_words)

    if has_positive and has_negative:
        score += 0.1  # Balanced perspective more common in AI

    # Look for formal transition words
    transition_words = ['however', 'therefore', 'furthermore', 'additionally', 'moreover']
    transition_count = sum(1 for word in transition_words if word in text.lower())
    score += min(0.15, transition_count * 0.05)

    # Check for complex sentence structure
    if "," in text:
        score += 0.05
    if ";" in text:
        score += 0.05

    # Ensure score stays in 0-1 range
    score = max(0.0, min(1.0, score))

    return score

In [None]:
# result_df = pd.DataFrame(columns=['Copy-Paste', 'AI-Generated', 'Superficial score', 'QA relevance', 'Repetitiveness score'])
result_df = pd.DataFrame(columns=['Plag score', 'Superficial score', 'QA relevance'])
col_analysis = []
for row in range(r):
  CP, AI, SS, QA = (0, )*4
  responses = []
  answered_wt = 0
  for col in range(c):
    # print(weights[col])
    res = df_oe.iloc[row, col]
    if(res == "Unanswered"):
      continue
    answered_wt += 1

    qa_result = classifier(f"{oe_cols[col]} [SEP] {res}")
    label = qa_result[0]["label"]
    score = qa_result[0]["score"]
    if label == "LABEL_1":
        qa = score  # Sensical answer
    else:
        qa = 1 - score  # Nonsensical answer

    if(col < 2):
      ss = predict_superficiality(oe_cols[col], res)
      ss = (ss - 0.48)*(0.7)/(0.75-0.48) + 0.2
    else :
      ss = 0

    cp = detect_ai_content(res)
    if(df.iloc[row, -1] == 0 and len(res.split(' ')) > 10) :
      cp = (cp - 0.1)/(0.6-0.1)
      # print(res, cp)

    # qa, ss = tuple(random.random() for _ in range(3))
    # ai = random.random()
    # ns, cp, ai, ss, qa = process(res)
    cp, ss, qa = cp * weights[col], ss * weights[col], qa * weights[col]
    CP += cp
    # AI += ai
    SS += ss
    QA += qa

    # responses.append(res)
  if(answered_wt > 0) :
     SS /= answered_wt
  else :
    # print(answered_wt, row, df.loc[row, :])
    col_analysis.append(row)

  result_df.loc[row] = [CP, SS, QA]
  # RS += similarity(responses)

In [None]:
df1 = df.iloc[col_analysis, :]
df1

Unnamed: 0,Unique ID,Response Duration,Q16A. What is the most important thing you LIKE about the shown concept}? This can include anything you would want kept for sure or aspects that might drive you to buy or try it… Please type a detailed response in the space below,"Q16B. What is the most important thing you DISLIKE about the shown concept}? This can include general concerns, annoyances, or any aspects of the product that need fixed for this to be more appealing to you... Please type a detailed response in the space below.",Q18_1 What specific product that you are currently using would the shown product replace?\n Please type in ONE specific brand or product per space provided.,Q18_2 What specific product that you are currently using would the shown concept replace?\n Please type in ONE specific brand or product per space provided.,Q18_3 What specific product that you are currently using would the shown concept replace?\n Please type in ONE specific brand or product per space provided.,OE_Quality_Flag
98,153,16.0,Unanswered,Unanswered,Unanswered,Unanswered,Unanswered,0
147,225,21.0,Unanswered,Unanswered,Unanswered,Unanswered,Unanswered,1
151,230,10.0,Unanswered,Unanswered,Unanswered,Unanswered,Unanswered,0
186,276,9.0,Unanswered,Unanswered,Unanswered,Unanswered,Unanswered,0
721,1031,14.0,Unanswered,Unanswered,Unanswered,Unanswered,Unanswered,0
1127,1575,14.0,Unanswered,Unanswered,Unanswered,Unanswered,Unanswered,0
1129,1579,9.0,Unanswered,Unanswered,Unanswered,Unanswered,Unanswered,1
2360,2660,11.0,Unanswered,Unanswered,Unanswered,Unanswered,Unanswered,1


In [None]:
result_df = result_df.drop(col_analysis)
df = df.drop(col_analysis)
result_df.head(15)

In [None]:
for i in [0, 1] :
  col = df.columns[i]
  result_df.insert(i, col, df[col])
result_df.insert(5, df.columns[-1], df.iloc[ : , -1])
result_df.head()

Unnamed: 0,Unique ID,Response Duration,Plag score,Superficial score,QA relevance,OE_Quality_Flag
0,3,10.0,0.175,0.743633,1.106384,0
1,5,13.0,0.8875,0.110979,1.810645,0
2,6,10.0,0.61,0.234869,0.897222,0
3,8,10.0,0.415,0.195515,2.843587,0
4,10,12.0,0.71,0.297127,2.905401,0


In [None]:
result_df.to_csv('sampleResult.csv', index = False)