In [1]:
import pandas as pd
from datetime import datetime
from google.colab import files
import io
import openpyxl  # Required for pandas to read .xlsx files

# This function prompts the user to upload a file and returns its data as a DataFrame.
def upload_file_and_read():
    """Uploads an Excel file and reads it into a pandas DataFrame."""
    try:
        # Prompt the user to upload a file.
        print("Please upload your 'Book2.xlsx' file.")
        uploaded = files.upload()

        # Get the filename of the uploaded file.
        # The keys of the 'uploaded' dictionary are the filenames.
        filename = next(iter(uploaded))
        print(f"File '{filename}' uploaded successfully!")

        # Read the uploaded Excel file's content into a DataFrame.
        # This will correctly handle the .xlsx file format.
        df = pd.read_excel(io.BytesIO(uploaded[filename]))
        return df, filename
    except Exception as e:
        print(f"An error occurred during file upload or reading: {e}")
        return None, None

# Define the output filename.
output_file = 'cleaned_workfolio_summary_v2.csv'

# Upload and read the input file.
df, input_filename = upload_file_and_read()

if df is not None:
    try:
        print(f"\nProcessing data from '{input_filename}'...")

        # 1. Add a space to the end of the 'Employee' column names to match the pattern.
        df['Employee'] = df['Employee'] + ' '

        # 2. Reformat the 'Date' column from YYYY-MM-DD to DD Month YYYY.
        df['Date'] = pd.to_datetime(df['Date']).dt.strftime('%d %B %Y')

        # 3. Reformat the 'Start Time' and 'End Time' columns to HH:MM AM/PM.
        # We use `errors='coerce'` to handle inconsistent entries like "Working On"
        # and turn them into 'NaT' (Not a Time) values, which prevents the script from crashing.
        df['Start Time'] = pd.to_datetime(
            df['Start Time'],
            format='%H:%M:%S',
            errors='coerce'
        ).dt.strftime('%I:%M %p')

        df['End Time'] = pd.to_datetime(
            df['End Time'],
            format='%H:%M:%S',
            errors='coerce'
        ).dt.strftime('%I:%M %p')

        # 4. Correct the 'Activity Duration' string to include a space.
        df['Activity Duration'] = df['Activity Duration'].str.replace('Worked for', 'Worked for ')

        # 5. Save the cleaned DataFrame to a new CSV file in the Colab session.
        df.to_csv(output_file, index=False)

        print(f"\nSuccess! The cleaned data has been saved to '{output_file}'.")
        print("You can download this file from the left sidebar.")
        print("\nHere is a preview of the newly formatted data:")
        print(df.head())

        # 6. Automatically download the new CSV file to your computer.
        files.download(output_file)

    except Exception as e:
        print(f"An unexpected error occurred during data processing: {e}")


Please upload your 'Book2.xlsx' file.


Saving Book2.xlsx to Book2.xlsx
File 'Book2.xlsx' uploaded successfully!

Processing data from 'Book2.xlsx'...

Success! The cleaned data has been saved to 'cleaned_workfolio_summary_v2.csv'.
You can download this file from the left sidebar.

Here is a preview of the newly formatted data:
         Employee            Date Start Time  End Time  \
0  Iqbal Hossain   09 August 2025   08:28 AM  10:22 AM   
1  Iqbal Hossain   09 August 2025   10:22 AM  10:30 AM   
2  Iqbal Hossain   09 August 2025   10:30 AM  10:49 AM   
3  Iqbal Hossain   09 August 2025   10:49 AM  10:55 AM   
4  Iqbal Hossain   09 August 2025   10:55 AM  11:21 AM   

                         Activity Duration  
0                   Worked for 01h 54m 02s  
1  System locked and clocked out from work  
2                   Worked for 00h 19m 04s  
3  System locked and clocked out from work  
4                   Worked for 00h 25m 23s  


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>