<a href="https://colab.research.google.com/github/Rahafsjob/Capstone_Project_WeCloudData/blob/main/Stage1/AttendanceRegister.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import pandas as pd
from datetime import datetime

# Directory containing your files
directory = r"/Attendance"

# Initialize an empty list to store DataFrames
df_list = []

# Function to standardize date format
def standardize_date(date_str):
    # Convert the input to a string (in case it's a float or other type)
    date_str = str(date_str)

    try:
        # Try parsing the date in "DD/MM/YY" format (e.g., 19/12/23)
        date_obj = datetime.strptime(date_str, "%d/%m/%y")
    except ValueError:
        try:
            # Try parsing the date in "D/M/YYYY" format (e.g., 1/10/2024)
            date_obj = datetime.strptime(date_str, "%d/%m/%Y")
        except ValueError:
            try:
                # Try parsing the date in "DD-B-YYYY" format (e.g., 26-August-2024)
                date_obj = datetime.strptime(date_str, "%d-%B-%Y")
            except ValueError:
                # If the date is already in the correct format or cannot be parsed, return as-is
                return date_str
    # Format the datetime object into the desired format (DD-MMM-YYYY)
    return date_obj.strftime("%d-%b-%Y")

# Loop through all files in the directory
for filename in os.listdir(directory):
    file_path = os.path.join(directory, filename)

    # Process only CSV files
    if filename.endswith(".csv"):
        print(f"Processing CSV file: {filename}")

        # Read the CSV file
        df = pd.read_csv(file_path)

        # Standardize column names (strip whitespace and ensure consistent case)
        df.columns = df.columns.str.strip().str.replace(" ", "_")

        # Rename columns as per requirements
        df = df.rename(columns={
            "Bootcamp_name": "name",
            "CohortNo": "cohort",
            "Date": "date",
            "Student_ID": "studentid",
            "Attendance": "attendance_flag"
        })

        # Check if 'studentid' column exists
        if "studentid" not in df.columns:
            print(f"⚠️ 'studentid' column not found in file: {filename}. Skipping this file.")
            continue

        # Check if 'attendance_flag' column already exists
        if "attendance_flag" in df.columns:
            # Rename the existing 'attendance_flag' column to avoid conflicts
            df = df.rename(columns={"attendance_flag": "Attendance_Original"})

        # Standardize date columns
        date_columns = [col for col in df.columns if col not in ["studentid", "name", "cohort"]]
        for col in date_columns:
            df[col] = df[col].astype(str).apply(standardize_date)  # Convert to string before processing

        # Convert the DataFrame to long format
        df_long = df.melt(id_vars=["studentid", "name", "cohort"], var_name="date", value_name="attendance_flag")

        # Append the long format DataFrame to the list
        df_list.append(df_long)

# Combine all DataFrames
if df_list:
    final_df = pd.concat(df_list, ignore_index=True)

    # Save merged data to a new CSV file
    output_path = os.path.join(directory, "merged_attendance_csv_only.csv")
    final_df.to_csv(output_path, index=False)

    print(f"✅ Merged CSV files saved at: {output_path}")
else:
    print("⚠️ No valid CSV files found in the directory.")

Processing CSV file: Attendance Register - Introduction to AI - Cohort 6.csv
Processing CSV file: Attendance Register - Generative AI Introduction&Large Language Model Introduction - Cohort 5.csv
Processing CSV file: Attendance Register - Python Fundamentals - Cohort 1.csv
Processing CSV file: Attendance Register - Generative AI Introduction&Large Language Model Introduction - Cohort 3.csv
Processing CSV file: Attendance Register - Computer Vision - Cohort 2.csv
Processing CSV file: Attendance Register - Introduction to AI - Cohort 8.csv
Processing CSV file: Attendance Register - ML Fundamentals - Cohort 5.csv
Processing CSV file: Attendance Register - ML Fundamentals - Cohort 1.csv
Processing CSV file: Attendance Register - NLP and LLM Finetuning - Cohort 1.csv
Processing CSV file: Attendance Register - Introduction to AI - Cohort 5.csv
Processing CSV file: Attendance Register - Data Preparation for GenAI - Cohort 2.csv
Processing CSV file: Attendance Register - Introduction to AI - C

In [2]:
final_df['cohort'].unique()

array(['C6', 'C5', 'C1', 'C3', 'C2', 'C8', 'C4', 'C7', 'C10', 'C9'],
      dtype=object)

In [3]:
final_df['name'].unique()

array(['Introduction to AI',
       'Generative AI Introduction & Large Language Model Introduction',
       'Python Fundamentals', 'Computer Vision', 'ML Fundamentals',
       'NLP and LLM Finetuning', 'Data Preparation for GenAI',
       'Computation Distribution'], dtype=object)

In [4]:
final_df[final_df['name'] == 'Python Fundamentals']

Unnamed: 0,studentid,name,cohort,date,attendance_flag
422,Stud0010,Python Fundamentals,C1,15-January-2024,P
423,Stud0013,Python Fundamentals,C1,15-January-2024,P
424,Stud0025,Python Fundamentals,C1,15-January-2024,P
425,Stud0046,Python Fundamentals,C1,15-January-2024,P
426,Stud0059,Python Fundamentals,C1,15-January-2024,P
...,...,...,...,...,...
6197,Stud0311,Python Fundamentals,C2,18-January-2024,P\r
6198,Stud0319,Python Fundamentals,C2,18-January-2024,P\r
6199,Stud0352,Python Fundamentals,C2,18-January-2024,P\r
6200,Stud0366,Python Fundamentals,C2,18-January-2024,P\r


In [5]:
from google.colab import files
files.download('/Attendance/merged_attendance_csv_only.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>