In [3]:
import pandas as pd


def load_data(file_path):                                                                                 # set path to Load the data 
    data = pd.read_csv(file_path)
    return data


def preprocess_data(data):                                                                                # Clean and Preprocess Data for analysis

    if 'accuracy' in data.columns:                                                                            #Convert accuracy to numeric
        data['accuracy'] = data['accuracy'].str.rstrip(' %').astype(float) / 100 

    relevant_columns = ['id', 'user_id', 'quiz_id', 'accuracy', 'score', 'speed', 'quiz__questions_count']    #Extract relevant columns
    processed_data = data[relevant_columns]
    return processed_data


def analyze_performance(data):                                                                       #Analyze performance metrics by accuracy and score
    user_performance = data.groupby('user_id').agg(                                                  #Aggregate performance by user
        total_quizzes=('quiz_id', 'count'),
        avg_accuracy=('accuracy', 'mean'),
        avg_score=('score', 'mean'),
        avg_speed=('speed', 'mean')
    ).reset_index()

    overall_performance = {                                                                          #Claculate overall performance
        'average_accuracy': data['accuracy'].mean(),
        'average_score': data['score'].mean(),
        'average_speed': data['speed'].mean()
    }
    return user_performance, overall_performance


def generate_insights(user_performance):                                                            #Generate insights based on performance metrics
 
    weak_threshold = 0.6                                                                            #Adjusted threshold for weak performers
    strong_threshold = 0.7                                                                          #Adjusted threshold for strong performers

    weak_users = user_performance[user_performance['avg_accuracy'] < weak_threshold]                #Identify weak performers (e.g., accuracy < 60%)
    strong_users = user_performance[user_performance['avg_accuracy'] > strong_threshold]            #Identify strong performers (e.g., accuracy > 70%)

    return weak_users, strong_users


def create_recommendations(data, weak_users):                  #Provide actionable recommendations for weak performers based on topics and difficulty levels
    
    recommendations = []
    
    if weak_users.empty:
        recommendations.append("All users are performing well! Keep up the good work.")
    else:
        if 'topic' in data.columns and 'difficulty_level' in data.columns:
            for _, user in weak_users.iterrows():
                user_data = data[data['user_id'] == user['user_id']]
                weak_topics = (
                    user_data.groupby('topic')['accuracy']
                    .mean()
                    .sort_values()
                    .head(3)
                    .index.tolist()
                )
                
                weak_difficulties = (
                    user_data.groupby('difficulty_level')['accuracy']
                    .mean()
                    .sort_values()
                    .head(1)
                    .index.tolist()
                )
                
                recommendation = f"User {user['user_id']} should focus on topics {weak_topics} at difficulty levels {weak_difficulties} to improve accuracy."
                recommendations.append(recommendation)
        else:
     
            for _, user in weak_users.iterrows():                                                            #Generate feedback for weak users
                recommendations.append(
                    f"User {user['user_id']} should focus on improving overall accuracy and speed by reviewing past quizzes."
                )
    return recommendations


def display_results(user_performance, overall_performance, weak_users, strong_users, recommendations):         #Display results and insights

    print("\n=== Overall Performance ===")
    print(overall_performance)

    print("\n=== User Performance ===")
    print(user_performance)

    print("\n=== Weak Performers ===")
    print(weak_users)

    print("\n=== Strong Performers ===")
    print(strong_users)

    print("\n=== Recommendations ===")
    for rec in recommendations:
        print(rec)


def main(file_path):                                                                                    # Main Function
    data = load_data(file_path)
    data = preprocess_data(data)

    user_performance, overall_performance = analyze_performance(data)

    weak_users, strong_users = generate_insights(user_performance)

    recommendations = create_recommendations(data, weak_users)

    display_results(user_performance, overall_performance, weak_users, strong_users, recommendations)

file_path = "C://Users//haree//Downloads//historicaldataset.csv"

if __name__ == "__main__":
    main(file_path)


=== Overall Performance ===
{'average_accuracy': 0.7221428571428571, 'average_score': 60.285714285714285, 'average_speed': 95.07142857142857}

=== User Performance ===
                        user_id  total_quizzes  avg_accuracy  avg_score  \
0  YcDFSO4ZukTJnnFMgRNVwZTE4j42             14      0.722143  60.285714   

   avg_speed  
0  95.071429  

=== Weak Performers ===
Empty DataFrame
Columns: [user_id, total_quizzes, avg_accuracy, avg_score, avg_speed]
Index: []

=== Strong Performers ===
                        user_id  total_quizzes  avg_accuracy  avg_score  \
0  YcDFSO4ZukTJnnFMgRNVwZTE4j42             14      0.722143  60.285714   

   avg_speed  
0  95.071429  

=== Recommendations ===
All users are performing well! Keep up the good work.
