In [None]:
import pandas as pd

# =============================================================================
# CONFIGURATION - SET YOUR FILE NAMES HERE
# =============================================================================
INPUT_FILE =  r"Brigade_ Workshop Registration (Responses) - Form Responses 1.csv" # Your input CSV file
OUTPUT_FILE = "Workshop_Registration_Cleaned.csv"          # Your output CSV file
# =============================================================================

def process_workshop_registration_csv(input_file, output_file):
    """
    Process workshop registration CSV file:
    1. Remove timestamp column
    2. Remove duplicates based on Name and Registration Number
    
    Args:
        input_file (str): Path to input CSV file
        output_file (str): Path to output CSV file
    """
    
    try:
        # Read the CSV file
        print(f"Reading CSV file: {input_file}")
        df = pd.read_csv(input_file)
        
        # Display original data info
        print(f"Original data shape: {df.shape}")
        print(f"Original columns: {list(df.columns)}")
        
        # Remove timestamp column if it exists
        if 'Timestamp' in df.columns:
            df = df.drop('Timestamp', axis=1)
            print("✓ Removed Timestamp column")
        
        # Remove duplicates based on Name and Registration Number
        # Keep the first occurrence of each duplicate
        original_count = len(df)
        
        # Remove duplicates considering both Name and Registration Number
        df_cleaned = df.drop_duplicates(subset=['Name', 'Registration Number'], keep='first')
        
        duplicate_count = original_count - len(df_cleaned)
        print(f"✓ Removed {duplicate_count} duplicate entries")
        print(f"Final data shape: {df_cleaned.shape}")
        
        # Display final columns
        print(f"Final columns: {list(df_cleaned.columns)}")
        
        # Sort by Section and then by Name for better organization
        df_cleaned = df_cleaned.sort_values(['Section', 'Name']).reset_index(drop=True)
        
        # Display section-wise count
        print("\nSection-wise distribution:")
        section_counts = df_cleaned['Section'].value_counts().sort_index()
        for section, count in section_counts.items():
            print(f"Section {section}: {count} students")
        
        # Save to output file
        df_cleaned.to_csv(output_file, index=False)
        print(f"\n✓ Cleaned data saved to: {output_file}")
        
        return df_cleaned
        
    except FileNotFoundError:
        print(f"Error: File '{input_file}' not found.")
        return None
    except Exception as e:
        print(f"Error processing file: {str(e)}")
        return None

# RUN THE PROCESSOR
print("=== Workshop Registration CSV Processor ===")
print("This script will:")
print("1. Remove the Timestamp column")
print("2. Remove duplicate entries based on Name and Registration Number")
print("3. Sort data by Section and Name")
print("=" * 50)

# Process the CSV file
cleaned_data = process_workshop_registration_csv(INPUT_FILE, OUTPUT_FILE)

if cleaned_data is not None:
    print(f"\n✓ Processing completed successfully!")
    print(f"Total unique registrations: {len(cleaned_data)}")
else:
    print("\n✗ Processing failed!")

# Optional: Display the cleaned dataframe
# Uncomment the line below if you want to see the full cleaned data in Jupyter
# cleaned_data

=== Workshop Registration CSV Processor ===
This script will:
1. Remove the Timestamp column
2. Remove duplicate entries based on Name and Registration Number
3. Sort data by Section and Name
Reading CSV file: Brigade_ Workshop Registration (Responses) - Form Responses 1.csv
Original data shape: (48, 7)
Original columns: ['Timestamp', 'Name', 'Phone Number', 'Email', 'Registration Number', 'Section', 'Email Address']
✓ Removed Timestamp column
✓ Removed 8 duplicate entries
Final data shape: (40, 6)
Final columns: ['Name', 'Phone Number', 'Email', 'Registration Number', 'Section', 'Email Address']

Section-wise distribution:
Section A: 11 students
Section B: 10 students
Section C: 8 students
Section D: 11 students

✓ Cleaned data saved to: Workshop_Registration_Cleaned.csv

✓ Processing completed successfully!
Total unique registrations: 40
