In [21]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

def generate_crew_data(num_crew_members=50):
    # Define possible skills and training modules
    skills = ['Technical Knowledge', 'Safety Procedures', 'Equipment Operation', 
              'Communication', 'Leadership', 'Problem Solving', 'Emergency Response',
              'Quality Control', 'Team Collaboration']
    
    training_modules = [
        'Basic Safety Training',
        'Advanced Technical Operations',
        'Leadership Development',
        'Emergency Response Protocol',
        'Quality Management Systems',
        'Communication Workshop',
        'Equipment Handling',
        'Risk Assessment',
        'Team Building'
    ]
    
    # Generate crew member base data
    crew_data = []
    start_date = datetime(2023, 1, 1)
    
    for i in range(num_crew_members):
        crew_member = {
            'crew_id': f'C-{i+1:03d}',
            'join_date': (start_date + timedelta(days=random.randint(0, 365))).strftime('%Y-%m-%d'),
            'experience_years': random.uniform(0.5, 15)
        }
        
        # Generate skill scores (1-100)
        for skill in skills:
            crew_member[f'{skill.lower().replace(" ", "_")}_score'] = random.randint(60, 100)
        
        crew_data.append(crew_member)
    
    crew_df = pd.DataFrame(crew_data)
    
    # Generate training history
    training_history = []
    for crew_id in crew_df['crew_id']:
        num_trainings = random.randint(2, 8)
        for _ in range(num_trainings):
            training = {
                'crew_id': crew_id,
                'training_module': np.random.choice(training_modules),
                'completion_date': (start_date + timedelta(days=random.randint(0, 450))).strftime('%Y-%m-%d'),
                'score': random.randint(70, 100),
                'feedback': np.random.choice([
                    'Excellent understanding of concepts',
                    'Needs improvement in practical application',
                    'Strong theoretical knowledge but requires practice',
                    'Outstanding performance and engagement',
                    'Good progress but needs more confidence',
                    'Demonstrates great potential for advancement'
                ])
            }
            training_history.append(training)
    
    training_df = pd.DataFrame(training_history)
    
    # Generate performance feedback data
    feedback_data = []
    for crew_id in crew_df['crew_id']:
        num_feedbacks = random.randint(3, 10)
        for _ in range(num_feedbacks):
            feedback = {
                'crew_id': crew_id,
                'date': (start_date + timedelta(days=random.randint(0, 450))).strftime('%Y-%m-%d'),
                'task_completion_rate': random.uniform(75, 100),
                'quality_score': random.randint(70, 100),
                'feedback_text': np.random.choice([
                    'Shows strong initiative in problem-solving',
                    'Could improve communication with team members',
                    'Consistently follows safety procedures',
                    'Needs additional support with technical tasks',
                    'Demonstrates excellent leadership qualities',
                    'Required more training on equipment operation'
                ])
            }
            feedback_data.append(feedback)
    
    feedback_df = pd.DataFrame(feedback_data)
    
    return crew_df, training_df, feedback_df

# Generate the datasets
crew_df, training_df, feedback_df = generate_crew_data()

# Save to CSV files
crew_df.to_csv('crew_data.csv', index=False)
training_df.to_csv('training_history.csv', index=False)
feedback_df.to_csv('performance_feedback.csv', index=False)

# Display sample of each dataset
print("\nCrew Data Sample:")
print(crew_df.head())
print("\nTraining History Sample:")
print(training_df.head())
print("\nPerformance Feedback Sample:")
print(feedback_df.head())


Crew Data Sample:
  crew_id   join_date       department  experience_years  \
0   C-001  2023-03-13  Quality Control          1.912828   
1   C-002  2023-02-26           Safety         12.031863   
2   C-003  2023-01-09      Maintenance          7.235138   
3   C-004  2023-12-25           Safety         13.123613   
4   C-005  2023-01-03       Operations          7.740939   

   technical_knowledge_score  safety_procedures_score  \
0                         98                       61   
1                         99                       93   
2                         63                       75   
3                         72                       82   
4                         83                       89   

   equipment_operation_score  communication_score  leadership_score  \
0                         98                   89                87   
1                         79                   64                92   
2                        100                   95               