# Career Quiz Model Development

This notebook creates a career quiz model using the RIASEC framework based on the provided CSV data.

## RIASEC Model Overview
- **R**ealistic: Practical, hands-on work
- **I**nvestigative: Research, analysis, problem-solving
- **A**rtistic: Creative, expressive work
- **S**ocial: Helping, teaching, serving others
- **E**nterprising: Leading, persuading, managing
- **C**onventional: Organizing, data management, detail-oriented

In [None]:
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict

# Set up plotting style
plt.style.use('default')
sns.set_palette("husl")

print("Libraries imported successfully!")

## 1. Load and Explore the Data

In [None]:
# Load the CSV data
df = pd.read_csv('expanded_career_quiz_questions.csv')

print("Dataset shape:", df.shape)
print("\nColumn names:")
print(df.columns.tolist())
print("\nFirst few rows:")
df.head()

In [None]:
# Explore the data structure
print("Unique question categories:")
print(df['category'].unique())

print("\nNumber of unique questions:")
print(df['questionId'].nunique())

print("\nQuestions per category:")
print(df.groupby('category')['questionId'].nunique())

print("\nRIASEC score ranges:")
riasec_cols = ['R', 'I', 'A', 'S', 'E', 'C']
for col in riasec_cols:
    print(f"{col}: {df[col].min()} to {df[col].max()}")

## 2. Data Analysis and Visualization

In [None]:
# Visualize RIASEC score distributions
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.flatten()

for i, col in enumerate(riasec_cols):
    axes[i].hist(df[col], bins=20, alpha=0.7, edgecolor='black')
    axes[i].set_title(f'{col} Score Distribution')
    axes[i].set_xlabel('Score')
    axes[i].set_ylabel('Frequency')

plt.tight_layout()
plt.show()

# Summary statistics for RIASEC scores
print("\nRIASEC Score Statistics:")
df[riasec_cols].describe()

## 3. Build the Quiz Model

In [None]:
class CareerQuizModel:
    def __init__(self, csv_data):
        self.df = csv_data
        self.questions = self._parse_questions()
        self.riasec_careers = self._define_career_mappings()
        
    def _parse_questions(self):
        """Parse CSV data into structured question format"""
        questions = {}
        
        for _, row in self.df.iterrows():
            q_id = row['questionId']
            
            if q_id not in questions:
                questions[q_id] = {
                    'id': q_id,
                    'text': row['questionText'],
                    'category': row['category'],
                    'options': []
                }
            
            option = {
                'id': row['optionId'],
                'text': row['optionText'],
                'scores': {
                    'R': int(row['R']),
                    'I': int(row['I']),
                    'A': int(row['A']),
                    'S': int(row['S']),
                    'E': int(row['E']),
                    'C': int(row['C'])
                }
            }
            
            questions[q_id]['options'].append(option)
        
        return list(questions.values())
    
    def _define_career_mappings(self):
        """Define career mappings for each RIASEC type"""
        return {
            'R': {
                'name': 'Realistic',
                'description': 'Practical, hands-on, mechanical',
                'careers': [
                    'Mechanical Engineer', 'Electrician', 'Carpenter', 'Pilot',
                    'Automotive Technician', 'Civil Engineer', 'Architect',
                    'Agricultural Engineer', 'Construction Manager', 'Surveyor'
                ]
            },
            'I': {
                'name': 'Investigative',
                'description': 'Analytical, scientific, research-oriented',
                'careers': [
                    'Data Scientist', 'Research Scientist', 'Software Engineer',
                    'Doctor', 'Pharmacist', 'Laboratory Technician',
                    'Statistician', 'Biologist', 'Chemist', 'Physicist'
                ]
            },
            'A': {
                'name': 'Artistic',
                'description': 'Creative, expressive, innovative',
                'careers': [
                    'Graphic Designer', 'Writer', 'Musician', 'Artist',
                    'Interior Designer', 'Fashion Designer', 'Photographer',
                    'Film Director', 'Art Therapist', 'Creative Director'
                ]
            },
            'S': {
                'name': 'Social',
                'description': 'Helping, teaching, caring for others',
                'careers': [
                    'Teacher', 'Counselor', 'Social Worker', 'Nurse',
                    'Therapist', 'Human Resources Manager', 'Community Worker',
                    'Psychologist', 'Rehabilitation Counselor', 'School Principal'
                ]
            },
            'E': {
                'name': 'Enterprising',
                'description': 'Leading, persuading, managing',
                'careers': [
                    'Business Manager', 'Sales Representative', 'Entrepreneur',
                    'Marketing Manager', 'Lawyer', 'Real Estate Agent',
                    'Financial Advisor', 'Project Manager', 'CEO', 'Politician'
                ]
            },
            'C': {
                'name': 'Conventional',
                'description': 'Organizing, detail-oriented, systematic',
                'careers': [
                    'Accountant', 'Bookkeeper', 'Administrative Assistant',
                    'Bank Teller', 'Data Entry Clerk', 'Office Manager',
                    'Auditor', 'Tax Preparer', 'Secretary', 'Librarian'
                ]
            }
        }
    
    def calculate_riasec_scores(self, selected_options):
        """Calculate RIASEC scores based on selected options"""
        scores = {'R': 0, 'I': 0, 'A': 0, 'S': 0, 'E': 0, 'C': 0}
        
        for option_id in selected_options:
            # Find the option in questions
            for question in self.questions:
                for option in question['options']:
                    if option['id'] == option_id:
                        for riasec_type in scores.keys():
                            scores[riasec_type] += option['scores'][riasec_type]
                        break
        
        return scores
    
    def get_career_recommendations(self, riasec_scores, top_n=5):
        """Get career recommendations based on RIASEC scores"""
        # Sort RIASEC scores in descending order
        sorted_scores = sorted(riasec_scores.items(), key=lambda x: x[1], reverse=True)
        
        recommendations = []
        
        # Get careers from top RIASEC types
        for riasec_type, score in sorted_scores:
            if score > 0:  # Only consider types with positive scores
                career_info = self.riasec_careers[riasec_type]
                for career in career_info['careers'][:3]:  # Top 3 from each type
                    if career not in [r['career'] for r in recommendations]:
                        recommendations.append({
                            'career': career,
                            'riasec_type': riasec_type,
                            'type_name': career_info['name'],
                            'description': career_info['description'],
                            'score': score,
                            'match_percentage': min(100, (score / max(riasec_scores.values())) * 100)
                        })
        
        return recommendations[:top_n]
    
    def export_model_data(self):
        """Export model data for frontend/backend use"""
        return {
            'questions': self.questions,
            'riasec_careers': self.riasec_careers,
            'model_info': {
                'version': '1.0',
                'total_questions': len(self.questions),
                'categories': list(set([q['category'] for q in self.questions])),
                'created_date': pd.Timestamp.now().isoformat()
            }
        }

# Initialize the model
quiz_model = CareerQuizModel(df)
print(f"Model initialized with {len(quiz_model.questions)} questions")

## 4. Export Model for Integration

In [None]:
# Export the complete model data
model_data = quiz_model.export_model_data()

# Save to JSON file for backend integration
with open('career_quiz_model.json', 'w', encoding='utf-8') as f:
    json.dump(model_data, f, indent=2, ensure_ascii=False)

print("Model exported to 'career_quiz_model.json'")
print(f"Total questions: {model_data['model_info']['total_questions']}")
print(f"Categories: {model_data['model_info']['categories']}")