<a href="https://colab.research.google.com/github/InduYadav689/Personalized-Student-Recommendations/blob/main/ai_assignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

# Load the datasets
quiz_data = pd.read_csv('/content/Cleaned_Quiz_Data.csv')
hist_data = pd.read_csv('/content/cleaned_histdata.csv')

# Display the first few rows of each dataset
quiz_data_head = quiz_data.head()
hist_data_head = hist_data.head()

quiz_data_head, hist_data_head


(   Question ID                                        Description  \
 0         1827  The tissue which has free surface that faces e...   
 1         1828  Epithelial tissue is distinguished from connec...   
 2         1829  The ciliated columnar epithelial cells in huma...   
 3         1830                The squamous epithelium is found in   
 4         1831  The kind of epithelium which forms the inner w...   
 
                                  Topic                   Correct Answer  \
 0  structural organisation in animals                 Epithelial tissue   
 1  structural organisation in animals                 basement membrane   
 2  structural organisation in animals   bronchioles and fallopian tubes   
 3  structural organisation in animals                 air sacs of lungs   
 4  structural organisation in animals               squamous epithelium   
 
                                              Options  
 0  Muscular tissue, Fluid connective tissue, Epit...  
 1  larg

In [None]:
# Analyze user performance by grouping data by user_id and quiz_topic
user_performance = hist_data.groupby(['user_id', 'quiz_topic']).agg({
    'score': 'mean',
    'accuracy': 'mean',
    'correct_answers': 'sum',
    'incorrect_answers': 'sum',
    'questions_count': 'sum'
}).reset_index()

# Calculate the percentage of correct answers per topic for each user
user_performance['correct_percentage'] = (user_performance['correct_answers'] / user_performance['questions_count']) * 100

# Identify weak areas: topics where correct_percentage is below a threshold (e.g., 50%)
weak_areas = user_performance[user_performance['correct_percentage'] < 50]

weak_areas.head()


Unnamed: 0,user_id,quiz_topic,score,accuracy,correct_answers,incorrect_answers,questions_count,correct_percentage
0,7ZXdz3zHuNcdg9agb5YpaOGLQqw2,Structural Organisation in Animals,32.0,80.0,8,2,128,6.25
1,YcDFSO4ZukTJnnFMgRNVwZTE4j42,Body Fluids and Circulation,86.666667,72.333333,65,24,300,21.666667
3,YcDFSO4ZukTJnnFMgRNVwZTE4j42,Human Reproduction,40.0,38.0,10,16,89,11.235955
4,YcDFSO4ZukTJnnFMgRNVwZTE4j42,Reproductive Health,52.0,43.0,13,17,55,23.636364
5,YcDFSO4ZukTJnnFMgRNVwZTE4j42,Respiration and Gas Exchange,24.0,66.0,6,3,100,6.0


In [None]:
# Merge weak areas with quiz data to get relevant questions for weak topics
recommendations = pd.merge(weak_areas, quiz_data, left_on='quiz_topic', right_on='Topic', how='inner')

# Select relevant columns for recommendations
personalized_recommendations = recommendations[['user_id', 'quiz_topic', 'Question ID', 'Description', 'Correct Answer', 'Options']]

# Display the first few personalized recommendations
personalized_recommendations.head()


Unnamed: 0,user_id,quiz_topic,Question ID,Description,Correct Answer,Options


In [None]:
# Extract unique topic names from both datasets
quiz_topics = quiz_data['Topic'].unique()
hist_topics = hist_data['quiz_topic'].unique()

quiz_topics, hist_topics


(array(['structural organisation in animals '], dtype=object),
 array(['Body Fluids and Circulation', 'Body Fluids and Circulation ',
        'Human Reproduction', 'principles of inheritance and variation ',
        'microbes in human welfare', 'reproductive health ',
        'human health and disease ', 'Reproductive Health',
        'Respiration and Gas Exchange',
        'Structural Organisation in Animals'], dtype=object))

In [None]:
# Standardize topic names by stripping spaces and converting to lowercase
quiz_data['Topic'] = quiz_data['Topic'].str.strip().str.lower()
hist_data['quiz_topic'] = hist_data['quiz_topic'].str.strip().str.lower()

# Re-identify weak areas after cleaning
user_performance['quiz_topic'] = user_performance['quiz_topic'].str.strip().str.lower()
weak_areas = user_performance[user_performance['correct_percentage'] < 50]

# Merge again to get personalized recommendations
recommendations = pd.merge(weak_areas, quiz_data, left_on='quiz_topic', right_on='Topic', how='inner')
personalized_recommendations = recommendations[['user_id', 'quiz_topic', 'Question ID', 'Description', 'Correct Answer', 'Options']]

# Display the first few personalized recommendations
personalized_recommendations.head()


Unnamed: 0,user_id,quiz_topic,Question ID,Description,Correct Answer,Options
0,7ZXdz3zHuNcdg9agb5YpaOGLQqw2,structural organisation in animals,1827,The tissue which has free surface that faces e...,Epithelial tissue,"Muscular tissue, Fluid connective tissue, Epit..."
1,7ZXdz3zHuNcdg9agb5YpaOGLQqw2,structural organisation in animals,1828,Epithelial tissue is distinguished from connec...,basement membrane,"large extracellular matrix, contractibility, a..."
2,7ZXdz3zHuNcdg9agb5YpaOGLQqw2,structural organisation in animals,1829,The ciliated columnar epithelial cells in huma...,bronchioles and fallopian tubes,"Eustachian tube and stomach lining, bronchiole..."
3,7ZXdz3zHuNcdg9agb5YpaOGLQqw2,structural organisation in animals,1830,The squamous epithelium is found in,air sacs of lungs,"stomach, intestine, trachea, air sacs of lungs"
4,7ZXdz3zHuNcdg9agb5YpaOGLQqw2,structural organisation in animals,1831,The kind of epithelium which forms the inner w...,squamous epithelium,"cuboidal epithelium, columnar epithelium, cili..."


In [None]:
# Step 1: Rank students based on average score and accuracy
# We'll calculate an overall performance score as a weighted combination of score and accuracy

# Calculate overall performance score (70% weight on accuracy, 30% on score)
user_performance['performance_score'] = (0.7 * user_performance['accuracy']) + (0.3 * user_performance['score'])

# Rank users based on performance_score (higher score = better rank)
user_performance['rank'] = user_performance['performance_score'].rank(ascending=False, method='min')

# Step 2: Generate personalized feedback based on performance
def generate_feedback(row):
    if row['performance_score'] >= 85:
        return "Excellent work! Keep up the great performance. Continue practicing to maintain your top rank."
    elif 60 <= row['performance_score'] < 85:
        return f"Good job! To improve further, focus on the topic '{row['quiz_topic']}' where you can boost your score."
    else:
        return f"You have potential! Focus on improving in '{row['quiz_topic']}'. Consider revisiting related quizzes to strengthen your understanding."

user_performance['feedback'] = user_performance.apply(generate_feedback, axis=1)

# Display the ranked users with feedback
user_performance[['user_id', 'quiz_topic', 'performance_score', 'rank', 'feedback']].sort_values(by='rank').head(10)


Unnamed: 0,user_id,quiz_topic,performance_score,rank,feedback
6,YcDFSO4ZukTJnnFMgRNVwZTE4j42,human health and disease,98.7,1.0,Excellent work! Keep up the great performance....
7,YcDFSO4ZukTJnnFMgRNVwZTE4j42,microbes in human welfare,92.8,2.0,Excellent work! Keep up the great performance....
9,YcDFSO4ZukTJnnFMgRNVwZTE4j42,reproductive health,80.0,3.0,"Good job! To improve further, focus on the top..."
1,YcDFSO4ZukTJnnFMgRNVwZTE4j42,body fluids and circulation,76.633333,4.0,"Good job! To improve further, focus on the top..."
2,YcDFSO4ZukTJnnFMgRNVwZTE4j42,body fluids and circulation,72.4,5.0,"Good job! To improve further, focus on the top..."
0,7ZXdz3zHuNcdg9agb5YpaOGLQqw2,structural organisation in animals,65.6,6.0,"Good job! To improve further, focus on the top..."
5,YcDFSO4ZukTJnnFMgRNVwZTE4j42,respiration and gas exchange,53.4,7.0,You have potential! Focus on improving in 'res...
4,YcDFSO4ZukTJnnFMgRNVwZTE4j42,reproductive health,45.7,8.0,You have potential! Focus on improving in 'rep...
3,YcDFSO4ZukTJnnFMgRNVwZTE4j42,human reproduction,38.6,9.0,You have potential! Focus on improving in 'hum...
8,YcDFSO4ZukTJnnFMgRNVwZTE4j42,principles of inheritance and variation,24.6,10.0,You have potential! Focus on improving in 'pri...


In [None]:
# Convert 'submitted_at' to datetime for filtering
hist_data['submitted_at'] = pd.to_datetime(hist_data['submitted_at'], errors='coerce')

# Function to filter data based on time and provide rank & feedback
def get_feedback_by_time(start_date, end_date):
    # Filter historical data within the time range
    filtered_data = hist_data[(hist_data['submitted_at'] >= start_date) & (hist_data['submitted_at'] <= end_date)]

    if filtered_data.empty:
        return "No quiz data found in the given time range."

    # Recalculate user performance based on filtered data
    user_perf = filtered_data.groupby(['user_id', 'quiz_topic']).agg({
        'score': 'mean',
        'accuracy': 'mean',
        'correct_answers': 'sum',
        'incorrect_answers': 'sum',
        'questions_count': 'sum'
    }).reset_index()

    user_perf['performance_score'] = (0.7 * user_perf['accuracy']) + (0.3 * user_perf['score'])
    user_perf['rank'] = user_perf['performance_score'].rank(ascending=False, method='min')

    # Generate personalized feedback
    def feedback(row):
        if row['performance_score'] >= 85:
            return "Excellent work! Keep up the great performance."
        elif 60 <= row['performance_score'] < 85:
            return f"Good job! To improve further, focus on '{row['quiz_topic']}'."
        else:
            return f"You have potential! Focus on improving in '{row['quiz_topic']}'."

    user_perf['feedback'] = user_perf.apply(feedback, axis=1)

    return user_perf[['user_id', 'quiz_topic', 'performance_score', 'rank', 'feedback']].sort_values(by='rank')

# Example run: Replace with your desired dates (format: 'YYYY-MM-DD')
#get_feedback_by_time('2023-01-01', '2023-06-30').head()


In [None]:
# Check the minimum and maximum dates in the 'submitted_at' column
date_range = hist_data['submitted_at'].agg(['min', 'max'])
date_range


Unnamed: 0,submitted_at
min,2024-12-11 20:36:44.822000+05:30
max,2025-01-17 15:51:29.859000+05:30


In [None]:
# Run the feedback function using the actual date range in the data
get_feedback_by_time('2024-12-11', '2025-01-17').head()


Unnamed: 0,user_id,quiz_topic,performance_score,rank,feedback
5,YcDFSO4ZukTJnnFMgRNVwZTE4j42,human health and disease,98.7,1.0,Excellent work! Keep up the great performance.
6,YcDFSO4ZukTJnnFMgRNVwZTE4j42,microbes in human welfare,92.8,2.0,Excellent work! Keep up the great performance.
8,YcDFSO4ZukTJnnFMgRNVwZTE4j42,reproductive health,80.0,3.0,"Good job! To improve further, focus on 'reprod..."
0,YcDFSO4ZukTJnnFMgRNVwZTE4j42,Body Fluids and Circulation,67.25,4.0,"Good job! To improve further, focus on 'Body F..."
1,YcDFSO4ZukTJnnFMgRNVwZTE4j42,Body Fluids and Circulation,59.8,5.0,You have potential! Focus on improving in 'Bod...
