In [12]:
from data_gathering import historical_quiz_data, historical_quiz_df, combined_df, current_quiz_df, current_quiz_data
import pandas as pd



In [13]:
print(historical_quiz_df)

        id  quiz_id                       user_id  \
0   336497       51  YcDFSO4ZukTJnnFMgRNVwZTE4j42   
1   336448        6  YcDFSO4ZukTJnnFMgRNVwZTE4j42   
2   333330       51  YcDFSO4ZukTJnnFMgRNVwZTE4j42   
3   333242        6  YcDFSO4ZukTJnnFMgRNVwZTE4j42   
4   329504       51  YcDFSO4ZukTJnnFMgRNVwZTE4j42   
5   328488       57  YcDFSO4ZukTJnnFMgRNVwZTE4j42   
6   328414        6  YcDFSO4ZukTJnnFMgRNVwZTE4j42   
7   321514       20  YcDFSO4ZukTJnnFMgRNVwZTE4j42   
8   320963       24  YcDFSO4ZukTJnnFMgRNVwZTE4j42   
9   320916       18  YcDFSO4ZukTJnnFMgRNVwZTE4j42   
10  315179       25  YcDFSO4ZukTJnnFMgRNVwZTE4j42   
11  315081       18  YcDFSO4ZukTJnnFMgRNVwZTE4j42   
12  257774       58  YcDFSO4ZukTJnnFMgRNVwZTE4j42   
13  195808       50  YcDFSO4ZukTJnnFMgRNVwZTE4j42   

                     submitted_at                     created_at  \
0   2025-01-17T15:30:18.027+05:30  2025-01-17T15:30:18.044+05:30   
1   2025-01-17T15:17:44.042+05:30  2025-01-17T15:17:44.056+05:30   


In [14]:
#to analyze trends based on topic column we must extract it from the nested dict quiz
historical_quiz_df['topic'] = historical_quiz_df['quiz'].apply(lambda x: x['topic'])
historical_quiz_df['total_questions'] = historical_quiz_df['quiz'].apply(lambda x: x['questions_count'])

#convert % to a numeric typr for easier calcualtion
historical_quiz_df['accuracy'] = historical_quiz_df['accuracy'].apply(lambda x: float(str(x).replace('%', '')) if isinstance(x, str) else x)


#calculate trends in historical data

#make sure to handle non numeric or missing values so it doesnt intefere with the calcualtion

numeric_columns = ['score', 'accuracy', 'speed', 'correct_answers', 'incorrect_answers', 'total_questions']
for column in numeric_columns:
    historical_quiz_df[column] = pd.to_numeric(historical_quiz_df[column], errors='coerce')


trends = historical_quiz_df.groupby('topic').agg({
    'score':'mean',
    'accuracy':'mean',
    'speed':'mean',
    'correct_answers':'mean',
    'incorrect_answers':'mean',
    'total_questions':'mean'
    }).reset_index()

trends.rename(columns={
    'score':'avg_score',
    'accuracy': 'avg_accuracy',
    'speed': 'avg_speed',
    'correct_answers': 'avg_correct_answers',
    'incorrect_answers': 'avg_incorrect_answers',
    'total_questions':'avg_total_questions'
    }, inplace=True)

print("performance trends by topic:")
print(trends)

trends.to_csv('trends_data.csv')

performance trends by topic:
                                      topic   avg_score  avg_accuracy  \
0               Body Fluids and Circulation   86.666667     72.333333   
1              Body Fluids and Circulation    54.666667     80.000000   
2                        Human Reproduction   40.000000     38.000000   
3                       Reproductive Health   52.000000     43.000000   
4              Respiration and Gas Exchange   24.000000     66.000000   
5                 human health and disease   112.000000     93.000000   
6                 microbes in human welfare   76.000000    100.000000   
7  principles of inheritance and variation    12.000000     30.000000   
8                      reproductive health    52.000000     92.000000   

    avg_speed  avg_correct_answers  avg_incorrect_answers  avg_total_questions  
0   98.666667            21.666667               8.000000                100.0  
1   92.666667            13.666667               3.333333                 23.0

In [15]:
# we must compare the recent one to previous trends 

def compare_recent_to_trends(recent_submission, trends):
    # extract the topic of the recent submission
    recent_topic = recent_submission['quiz']['topic']
    
    # find the corresponding row in the trends dataFrame
    topic_trend = trends[trends['topic'] == recent_topic]
    
    if topic_trend.empty:
        print(f"No historical data available for topic: {recent_topic}")
        return None
    
    # create a comparison dictionary
    comparison = {
        'topic': recent_topic,
        'recent_score': recent_submission['score'],
        'avg_score': topic_trend['avg_score'].values[0],
        'recent_accuracy': float(recent_submission['accuracy'].replace('%', '')),
        'avg_accuracy': topic_trend['avg_accuracy'].values[0],
        'recent_speed': recent_submission['speed'],
        'avg_speed': topic_trend['avg_speed'].values[0],
        'recent_correct_answers': recent_submission['correct_answers'],
        'avg_correct_answers': topic_trend['avg_correct_answers'].values[0],
        'recent_incorrect_answers': recent_submission['incorrect_answers'],
        'avg_incorrect_answers': topic_trend['avg_incorrect_answers'].values[0],
    }
    
    return comparison

# compare recent quiz submission to historical trends
comparison_result = compare_recent_to_trends(current_quiz_df, trends)

# display the comparison
print("\nComparison of Recent Submission with Historical Trends:")
if comparison_result:
    for key, value in comparison_result.items():
        print(f"{key}: {value}")


No historical data available for topic: Structural Organisation in Animals

Comparison of Recent Submission with Historical Trends:


In [16]:
# Check for non-numeric values in accuracy
print(historical_quiz_df['accuracy'].unique())
print(trends['avg_accuracy'].unique())


[ 90. 100.  96.  31.  38.  50.  30.  93.  84.  43.  66.]
[ 72.33333333  80.          38.          43.          66.
  93.         100.          30.          92.        ]


In [17]:
# suggest areas of improvement based on comparison
def suggest_improvements(comparison):
    if not comparison:
        return None
    
    suggestions = []
    
    # analyze performance and provide feedback
    if comparison['recent_score'] < comparison['avg_score']:
        suggestions.append(f"Try to improve your score. Recent: {comparison['recent_score']}, Avg: {comparison['avg_score']}")
    
    if comparison['recent_accuracy'] < comparison['avg_accuracy']:
        suggestions.append(f"Focus on improving accuracy. Recent: {comparison['recent_accuracy']}%, Avg: {comparison['avg_accuracy']}%")
    
    if comparison['recent_speed'] < comparison['avg_speed']:
        suggestions.append(f"Work on increasing speed. Recent: {comparison['recent_speed']}, Avg: {comparison['avg_speed']}")
    
    if comparison['recent_correct_answers'] < comparison['avg_correct_answers']:
        suggestions.append(f"Practice more to increase the number of correct answers. Recent: {comparison['recent_correct_answers']}, Avg: {comparison['avg_correct_answers']}")
    
    if comparison['recent_incorrect_answers'] > comparison['avg_incorrect_answers']:
        suggestions.append(f"Try to reduce mistakes. Recent: {comparison['recent_incorrect_answers']}, Avg: {comparison['avg_incorrect_answers']}")
    
    return suggestions

# generate suggestions for improvement
improvement_suggestions = suggest_improvements(comparison_result)

# display improvement suggestions
print("\nSuggestions for Improvement:")
if improvement_suggestions:
    for suggestion in improvement_suggestions:
        print(f"- {suggestion}")
else:
    print("No suggestions needed. Keep up the good work!")



Suggestions for Improvement:
No suggestions needed. Keep up the good work!


In [18]:
def generate_recommendations(comparison_result):
    if not comparison_result:
        return {"message": "No data available for recommendations."}

    recommendations = {}
    weak_topics = []
    strong_topics = []

    # analyze weak and strong points
    if comparison_result['recent_score'] < comparison_result['avg_score']:
        weak_topics.append({
            "topic": comparison_result['topic'],
            "reason": f"Recent score ({comparison_result['recent_score']}) is below the average ({comparison_result['avg_score']}).",
            "action": "Revise topic fundamentals and attempt related MCQs."
        })

    if comparison_result['recent_accuracy'] < comparison_result['avg_accuracy']:
        weak_topics.append({
            "topic": comparison_result['topic'],
            "reason": f"Accuracy ({comparison_result['recent_accuracy']}%) is below the average ({comparison_result['avg_accuracy']}%).",
            "action": "Review incorrect answers and focus on weak question types."
        })

    if comparison_result['recent_speed'] > comparison_result['avg_speed']:
        weak_topics.append({
            "topic": comparison_result['topic'],
            "reason": f"Speed ({comparison_result['recent_speed']}) is slower than the average ({comparison_result['avg_speed']}).",
            "action": "Practice timed quizzes to improve response speed."
        })

    # strong topics
    if comparison_result['recent_score'] >= comparison_result['avg_score']:
        strong_topics.append({
            "topic": comparison_result['topic'],
            "reason": f"Consistent or above-average score ({comparison_result['recent_score']}).",
            "action": "Maintain focus on this topic to keep improving."
        })

    recommendations['weak_topics'] = weak_topics
    recommendations['strong_topics'] = strong_topics

    return recommendations


In [19]:
def define_student_persona(historical_data):
    total_quizzes = len(historical_data)
    avg_score = historical_data['score'].mean()
    avg_accuracy = historical_data['accuracy'].mean()

    # label by behavior
    if avg_score > 80 and avg_accuracy > 90:
        persona = "Top Performer"
        description = "Consistently excels with high scores and accuracy."
    elif avg_score > 60 and avg_accuracy > 70:
        persona = "Strategic Learner"
        description = "Steady performer with room for improvement in specific areas."
    elif avg_score < 50 or avg_accuracy < 50:
        persona = "Needs Improvement"
        description = "Struggles with performance consistency. Needs to work on fundamentals."

    # identify dominant subject
    dominant_topic = historical_data.groupby('topic')['score'].mean().idxmax()

    return {
        "persona": persona,
        "description": description,
        "dominant_topic": dominant_topic,
        "avg_score": avg_score,
        "avg_accuracy": avg_accuracy
    }


In [20]:
import matplotlib.pyplot as plt
import seaborn as sns

def visualize_performance(historical_quiz_df, trends):
    # average score by topic
    plt.figure(figsize=(10, 6))
    sns.barplot(data=trends, x='topic', y='avg_score', palette='coolwarm')
    plt.xticks(rotation=45, ha='right')
    plt.title('Average Score by Topic')
    plt.xlabel('Topic')
    plt.ylabel('Average Score')
    plt.tight_layout()
    plt.show()

    # trends in performance over time
    plt.figure(figsize=(10, 6))
    sns.lineplot(data=historical_quiz_df, x='date', y='score', marker='o', label='Score')
    sns.lineplot(data=historical_quiz_df, x='date', y='accuracy', marker='o', label='Accuracy (%)')
    plt.title('Performance Trends Over Time')
    plt.xlabel('Date')
    plt.ylabel('Value')
    plt.legend()
    plt.tight_layout()
    plt.show()


In [21]:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import pandas as pd
import logging

logging.basicConfig(level=logging.DEBUG)

# FastAPI app instance
app = FastAPI()

# Historical trends (example data)
trends = pd.DataFrame({
    "topic": ["Body Fluids and Circulation", "Respiration and Gas Exchange"],
    "avg_score": [85, 70],
    "avg_accuracy": [80.0, 75.0],
    "avg_speed": [10, 15],
    "avg_correct_answers": [20, 18],
    "avg_incorrect_answers": [5, 7],
})

# Pydantic models
class Quiz(BaseModel):
    topic: str

class QuizSubmission(BaseModel):
    quiz: Quiz
    score: float
    accuracy: str
    speed: int
    correct_answers: int
    incorrect_answers: int

@app.post("/analyze")
def analyze_performance(data: QuizSubmission):
    logging.debug(f"Received data: {data}")
    
    try:
        # Extract and clean recent submission
        recent_topic = data.quiz.topic
        recent_accuracy = float(data.accuracy.replace('%', '').strip())

        # Find the corresponding topic in trends
        topic_trend = trends[trends["topic"] == recent_topic]
        if topic_trend.empty:
            raise HTTPException(status_code=404, detail=f"No historical data available for topic: {recent_topic}")

        # Compare with trends
        comparison = {
            "topic": recent_topic,
            "recent_score": data.score,
            "avg_score": topic_trend["avg_score"].values[0],
            "recent_accuracy": recent_accuracy,
            "avg_accuracy": topic_trend["avg_accuracy"].values[0],
            "recent_speed": data.speed,
            "avg_speed": topic_trend["avg_speed"].values[0],
            "recent_correct_answers": data.correct_answers,
            "avg_correct_answers": topic_trend["avg_correct_answers"].values[0],
            "recent_incorrect_answers": data.incorrect_answers,
            "avg_incorrect_answers": topic_trend["avg_incorrect_answers"].values[0],
        }

        return {"comparison": comparison}

    except Exception as e:
        logging.error(f"Error occurred: {e}")
        raise HTTPException(status_code=500, detail=str(e))


In [22]:
#from uvicorn import Config, Server

#config = Config(app=app, host="127.0.0.1", port=8000)
#server = Server(config)
#await server.serve()