In [None]:
import torch
from transformers import BertTokenizer, BertForQuestionAnswering, BertModel
import pandas as pd
import getpass
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torch.nn.functional import cosine_similarity

In [None]:
df = pd.read_csv("/content/drive/MyDrive/Subjective_Ques.csv")

In [None]:
df.head()

Unnamed: 0,Question,Keyword 1,Keyword 2,Keyword 3,Keyword 4
0,What is the importance of data preprocessing i...,preprocessing,cleaning,normalization,transformation
1,Explain the difference between classification ...,classification,regression,discrete,continuous
2,How does principal component analysis (PCA) wo...,pca,dimensionality,variance,eigenvalues
3,What are the different types of activation fun...,activation,relu,sigmoid,tanh
4,Describe the bias-variance tradeoff in machine...,bias,variance,overfitting,underfitting


In [None]:
students_df = pd.read_csv("/content/drive/MyDrive/Kerala_Students_Dataset.csv")

In [None]:
import torch
import pandas as pd
import getpass
from transformers import BertTokenizer, BertModel

# Dictionary to store student responses
responses = {}
results = []  # List to store evaluation results

# Load pre-trained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

def get_bert_embedding(text):
    """Generates BERT embedding for the given text."""
    text = text.lower().strip()
    if not text:
        return torch.zeros((1, 768))  # Return zero tensor for empty text
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1)  # Average pooling

def count_keywords(student_answer, keyword_list):
    """Counts the number of keywords present in the student's answer."""
    student_answer = student_answer.lower().strip()
    return sum(1 for keyword in keyword_list if keyword.lower().strip() in student_answer)

def calculate_similarity(student_answer, keyword_list):
    """Computes similarity between student's answer and keywords using BERT embeddings."""
    if not keyword_list or not student_answer.strip():
        return 0.0  # No keywords or empty answer should give minimum score

    keyword_count = count_keywords(student_answer, keyword_list)
    if keyword_count > 0:
        return keyword_count / len(keyword_list)  # Directly use keyword presence ratio

    student_embedding = get_bert_embedding(student_answer)
    total_similarity = 0
    for keyword in keyword_list:
        keyword_embedding = get_bert_embedding(keyword.lower().strip())
        similarity = torch.nn.functional.cosine_similarity(student_embedding, keyword_embedding)
        total_similarity += max(0, similarity.item())  # Ensure similarity is non-negative

    return total_similarity / len(keyword_list)  # Normalize over keyword count

def assign_marks(keyword_count):
    """Assigns marks based on keyword count."""
    if keyword_count >= 4:
        return 10
    elif keyword_count == 3:
        return 8
    elif keyword_count == 2:
        return 6
    elif keyword_count == 1:
        return 4
    else:
        return 2  # Minimum marks for attempt

def student_login():
    """Handles student login authentication."""
    user_id = input("Enter your Student ID: ")
    password = getpass.getpass("Enter your Password: ")

    if ((students_df['Login ID'].astype(str) == user_id) & (students_df['Password'] == password)).any():
        print("Login Successful!")
        return user_id
    else:
        print("Invalid login credentials!")
        return None

def attend_exam(student_id):
    """Handles subjective exam submission."""
    print("\nAnswer the following questions:")
    responses[student_id] = []

    for i, row in df.iterrows():
        answer = input(f"Q{i+1}: {row['Question']}\nYour Answer: ")
        responses[student_id].append(answer.strip())

def evaluate_responses():
    """Evaluates student responses using keyword checking and BERT-based similarity."""
    student_id = input("Enter Student ID to evaluate: ")

    if student_id in responses:
        total_marks = 0

        for i, ans in enumerate(responses[student_id]):
            key_list = df.iloc[i][['Keyword 1', 'Keyword 2', 'Keyword 3', 'Keyword 4']].dropna().tolist()

            keyword_count = count_keywords(ans, key_list)
            marks = assign_marks(keyword_count)

            total_marks += marks
            print(f"Marks for Answer {i+1}: {marks} (Keywords Matched: {keyword_count})")

        grade = assign_grade(total_marks)
        print(f"Total Marks: {total_marks}, Grade: {grade}")

        results.append({
            "Student ID": student_id,
            "Answers": responses[student_id],
            "Total Marks": total_marks,
            "Grade": grade
        })
    else:
        print("No responses found for this student.")

def assign_grade(marks):
    """Assigns grades based on total marks."""
    if marks >= 90:
        return 'A+'
    elif marks >= 80:
        return 'A'
    elif marks >= 70:
        return 'B'
    elif marks >= 60:
        return 'C'
    else:
        return 'F'

def save_results():
    """Saves evaluation results to a CSV file."""
    df = pd.DataFrame(results)
    df.to_csv('Student_Results.csv', index=False)
    print("Results saved successfully!")

# Main execution
student_id = student_login()
if student_id:
    attend_exam(student_id)
    print("\nExam Submitted! Awaiting evaluation.")

evaluate_responses()
save_results()


Enter your Student ID: FYUGP2025100001
Enter your Password: ··········
Login Successful!

Answer the following questions:
Q1: What is the importance of data preprocessing in machine learning?
Your Answer: it is the Preprocessing, Cleaning, Normalization and Transformation
Q2: Explain the difference between classification and regression with examples.
Your Answer: the Classification and Regression is discrete and Continuous
Q3: How does principal component analysis (PCA) work, and when should it be used?
Your Answer: PCA is the Dimensionality of Variance and Eigenvalues
Q4: What are the different types of activation functions in neural networks?
Your Answer: the Activation of ReLU is Sigmoid and Tanh
Q5: Describe the bias-variance tradeoff in machine learning.
Your Answer: the Bias and Variance is Overfitting and Underfitting
Q6: What are hyperparameters in machine learning, and how do they differ from model parameters?
Your Answer: Hyperparameters are the Parameters it Learning rate an

In [None]:
data = pd.read_csv("/content/Student_Results.csv")

In [None]:
data.head()

Unnamed: 0,Student ID,Answers,Total Marks,Grade
0,FYUGP2025100001,"['it is the Preprocessing, Cleaning, Normaliza...",86,A
