In [6]:
import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load and clean the dataset
file_path = '/content/CSE_Dataset.csv'
dataset = pd.read_csv(file_path)
dataset = dataset[['Question', 'Option 1', 'Option 2', 'Option 3', 'Option 4', 'Answer', 'Answer.1']].dropna()

# Feature engineering: using the length of the question text as a proxy for difficulty
dataset['Question_Length'] = dataset['Question'].apply(len)

# Binarize difficulty: questions with length > 100 characters considered 'hard'
dataset['Difficulty'] = dataset['Question_Length'].apply(lambda x: 'hard' if x > 100 else 'easy')

# Text preprocessing
X = dataset['Question']
y = dataset['Difficulty']

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert text data into numerical data
vectorizer = CountVectorizer()
X_train_vect = vectorizer.fit_transform(X_train)
X_test_vect = vectorizer.transform(X_test)

# Train a machine learning model
model = RandomForestClassifier()
model.fit(X_train_vect, y_train)

# Make predictions and evaluate the model
y_pred = model.predict(X_test_vect)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

# Function to run the quiz
def run_quiz():
    score = 0
    # Randomly select 5 questions
    questions = dataset.sample(5).reset_index(drop=True)

    for i, row in questions.iterrows():
        while True:
            # Predict difficulty of the question
            difficulty_prediction = model.predict(vectorizer.transform([row['Question']]))[0]
            print(f"Q{i+1} [{difficulty_prediction}]: {row['Question']}")
            print(f"A. {row['Option 1']}")
            print(f"B. {row['Option 2']}")
            print(f"C. {row['Option 3']}")
            print(f"D. {row['Option 4']}")

            # Get user's answer
            answer = input("Your answer (A/B/C/D): ").strip().upper()

            # Check if the input is valid
            if answer in ['A', 'B', 'C', 'D']:
                # If the answer is correct
                if answer == row['Answer.1']:
                    print("Correct!\n")
                    score += 1
                else:
                    print(f"Wrong. The correct answer is {row['Answer.1']}.\n")
                break  # Exit the loop to move on to the next question
            else:
                # If invalid input, inform the user and re-ask the question
                print(f"Invalid option '{answer}'. Please choose A, B, C, or D.\n")

    # Display the final score
    print(f"Your total score is: {score} out of 5")

    # Give feedback based on the score
    if score == 5:
        print("Excellent! You nailed it.")
    elif 3 <= score < 5:
        print("Better luck next time. You're getting there!")
    elif 1 <= score < 3:
        print("You have to work harder. Keep practicing.")
    else:
        print("Phadle Bhai")

# Run the quiz
run_quiz()


Model Accuracy: 80.95%
Q1 [hard]: Which machine learning algorithm is used for recommendation systems and can predict the preferences of users based on their past behavior?
A. Collaborative Filtering
B. Content-Based Filtering
C. Matrix Factorization
D. All of the above
Your answer (A/B/C/D): A
Wrong. The correct answer is D.

Q2 [easy]: Which of the following is not a type of search algorithm?
A. Binary search
B. Linear search
C. Depth-first search
D. None of the above
Your answer (A/B/C/D): F
Invalid option 'F'. Please choose A, B, C, or D.

Q2 [easy]: Which of the following is not a type of search algorithm?
A. Binary search
B. Linear search
C. Depth-first search
D. None of the above
Your answer (A/B/C/D): A
Wrong. The correct answer is D.

Q3 [easy]: Which of the following is not a valid access specifier in Java?
A. public
B. private
C. protected
D. internal
Your answer (A/B/C/D): V
Invalid option 'V'. Please choose A, B, C, or D.

Q3 [easy]: Which of the following is not a valid a