In [4]:
import pandas as pd

def check_csv_format(file_path):
    try:
        # Try to read the CSV file
        df = pd.read_csv('CBF11th_data.csv')
        
        # Check if all required columns exist
        required_columns = ['id', 'name', 'team']
        missing_columns = [col for col in required_columns if col not in df.columns]
        if missing_columns:
            print(f"Error: Missing columns: {missing_columns}")
            return False
        
        # Check valid teams
        valid_teams = [
            'Core Team',
            'Event Facilitator',
            'Operation',
            'Procurement',
            'Sales',
            'Public Relations'
        ]
        
        # Check for valid teams
        invalid_teams = df[~df['team'].isin(valid_teams)]['team'].unique()
        if len(invalid_teams) > 0:
            print(f"Error: Invalid teams found: {invalid_teams}")
            print(f"Valid teams are: {valid_teams}")
            return False
        
        # Check ID format
        id_prefixes = {
            'Core Team': 'CT',
            'Event Facilitator': 'EF',
            'Operation': 'OP',
            'Procurement': 'PO',
            'Sales': 'SL',
            'Public Relations': 'PR'
        }
        
        # Check each row for correct ID format
        for index, row in df.iterrows():
            expected_prefix = id_prefixes[row['team']]
            if not row['id'].startswith(expected_prefix):
                print(f"Warning: Row {index + 1}: ID {row['id']} should start with {expected_prefix} for team {row['team']}")
        
        # Check for duplicate IDs
        duplicate_ids = df[df['id'].duplicated()]['id'].unique()
        if len(duplicate_ids) > 0:
            print(f"Error: Duplicate IDs found: {duplicate_ids}")
            return False
        
        # Check for empty values
        empty_checks = {
            'ID': df[df['id'].isna() | (df['id'] == '')].index,
            'Name': df[df['name'].isna() | (df['name'] == '')].index,
            'Team': df[df['team'].isna() | (df['team'] == '')].index
        }
        
        for field, empty_rows in empty_checks.items():
            if len(empty_rows) > 0:
                print(f"Error: Empty {field} found in rows: {list(empty_rows + 1)}")
                return False
        
        # All checks passed
        print("CSV format is valid!")
        print(f"\nFound {len(df)} volunteers")
        print("\nTeam distribution:")
        for team in valid_teams:
            count = len(df[df['team'] == team])
            print(f"{team}: {count} volunteers")
            
        print("\nID number ranges by team:")
        for team in valid_teams:
            team_ids = df[df['team'] == team]['id']
            if not team_ids.empty:
                print(f"{team}: {min(team_ids)} to {max(team_ids)}")
        
        return True
        
    except Exception as e:
        print(f"Error reading CSV file: {str(e)}")
        return False

# Example usage
print("CSV Format Checker")
print("=================")
file_path = 'CBF11th_data.csv'  # Replace with your CSV file name
check_csv_format(file_path)

CSV Format Checker
CSV format is valid!

Found 88 volunteers

Team distribution:
Core Team: 13 volunteers
Event Facilitator: 40 volunteers
Operation: 13 volunteers
Procurement: 9 volunteers
Sales: 9 volunteers
Public Relations: 4 volunteers

ID number ranges by team:
Core Team: CT01 to CT13
Event Facilitator: EF01 to EF40
Operation: OP01 to OP13
Procurement: PO01 to PO09
Sales: SL01 to SL09
Public Relations: PR01 to PR04


True