# New Section

Here's the code to upload files from your local computer to your Colab environment. After running the cell, you will see a button to select and upload files.

In [47]:
import os
import pandas as pd
from google.colab import files

# Get the list of all files in the current directory
files_in_directory = os.listdir()

print("Files in the current directory before potential deletion:")
for filename in files_in_directory:
    print(filename)

# Iterate through the files and remove each one (optional, based on previous user interaction)
# This block was likely from a previous step to clear files.
# Keeping it for now but could be removed or commented out if not needed.
files_deleted_count = 0
files_to_delete = ['AF.xlsx'] # Example: list files to delete if needed
for filename in files_to_delete: # Changed to iterate over a specific list if needed
    if filename in files_in_directory:
        try:
            os.remove(filename)
            print(f"Successfully removed '{filename}'.")
            files_deleted_count += 1
        except Exception as e:
            print(f"Error removing file '{filename}': {e}")

# print(f"\nAttempted to delete {len(files_in_directory)} files. Successfully deleted {files_deleted_count} files.") # This line might be misleading if not deleting all files

# List files again to confirm deletion (optional)
files_in_directory_after = os.listdir()
print("\nFiles in the current directory after potential deletion:")
if files_in_directory_after:
    for filename in files_in_directory_after:
        print(filename)
else:
    print("Directory is empty.")


print("\nUploading files...")
uploaded = files.upload()

for filename in uploaded.keys():
  print(f'User uploaded file "{filename}" with length {len(uploaded[filename])} bytes')


# Assuming the uploaded file is an Excel file and its name is the first uploaded file
# If you uploaded multiple files or the name is different, adjust this.
if uploaded:
    excel_file_path = list(uploaded.keys())[0] # Get the name of the first uploaded file
    print(f"\nProcessing uploaded file: {excel_file_path}")

    try:
        # Read the Excel file to get the sheet names
        excel_file = pd.ExcelFile(excel_file_path)
        excel_sheets = excel_file.sheet_names

        print(f"\nSheet names in '{excel_file_path}':")
        for i, sheet_name in enumerate(excel_sheets):
            print(f"{i+1}. {sheet_name}")

        # Prompt the user to select a sheet
        while True:
            try:
                choice = input("\nEnter the number or name of the sheet you want to process: ")
                if choice.isdigit():
                    choice_index = int(choice) - 1
                    if 0 <= choice_index < len(excel_sheets):
                        selected_sheet_name = excel_sheets[choice_index]
                        print(f"You selected sheet: {selected_sheet_name}")
                        # The selected_sheet_name variable now holds the name of the chosen sheet.
                        # You can use this variable in the next steps (e.g., to load the sheet into a DataFrame).
                        break # Exit the loop if selection is valid
                    else:
                        print("Invalid number. Please enter a number corresponding to a sheet.")
                elif choice in excel_sheets:
                    selected_sheet_name = choice
                    print(f"You selected sheet: {selected_sheet_name}")
                    # The selected_sheet_name variable now holds the name of the chosen sheet.
                    # You can use this variable in the next steps (e.g., to load the sheet into a DataFrame).
                    break # Exit the loop if selection is valid
                else:
                    print("Invalid input. Please enter a valid sheet number or name.")
            except ValueError:
                print("Invalid input. Please enter a number or the exact sheet name.")

    except FileNotFoundError:
        print(f"Error: The file '{excel_file_path}' was not found. Please make sure the file is uploaded and the name is correct.")
    except Exception as e:
        print(f"An error occurred: {e}")
else:
    print("\nNo file was uploaded. Please upload a file to proceed.")

Files in the current directory before potential deletion:
AF.xlsx
Successfully removed 'AF.xlsx'.

Files in the current directory after potential deletion:
Directory is empty.

Uploading files...


Saving AF.xlsx to AF.xlsx
User uploaded file "AF.xlsx" with length 28676 bytes

Processing uploaded file: AF.xlsx

Sheet names in 'AF.xlsx':
1. Sheet 1
2. Sheet 2

Enter the number or name of the sheet you want to process: 2
You selected sheet: Sheet 2


In [54]:
import pandas as pd
import re

# Assuming 'selected_df' is your DataFrame loaded from the Excel sheet

# Iterate through each column in the DataFrame
for column in selected_df.columns:
    print(f"\nChecking column: '{column}'")
    special_chars_found = set() # Use a set to store unique special characters

    # Check each value in the column
    for value in selected_df[column]:
        # Convert value to string to handle different data types, and handle potential NaN values
        value_str = str(value) if pd.notna(value) else ""
        # Find all characters that are NOT alphanumeric or whitespace
        # This regex should capture most special characters
        found_chars = re.findall(r'[^\w\s]', value_str)
        # Add found special characters to the set
        special_chars_found.update(found_chars)

    if special_chars_found:
        print(f"  Special characters found: {', '.join(sorted(list(special_chars_found)))}")
    else:
        print("  No special characters found.")


Checking column: 'Geography'
  No special characters found.

Checking column: 'Period'
  Special characters found: -, :

Checking column: 'Total_Sell_Out_Units_G_Unit'
  Special characters found: .

Checking column: 'test'
  No special characters found.


In [55]:
# Display the entire DataFrame
display(selected_df)

Unnamed: 0,Geography,Period,Total_Sell_Out_Units_G_Unit,test
0,Brazil,2021-01-04,89750.630168,0
1,Brazil,2021-01-11,82282.157953,0
2,Brazil,2021-01-18,80732.996677,0
3,Brazil,2021-01-25,89686.288068,0
4,Brazil,2021-02-01,84366.418743,0
...,...,...,...,...
169,Brazil,2024-04-01,89588.400000,27086955
170,Brazil,2024-04-08,96290.400000,20610666
171,Brazil,2024-04-15,87927.400000,15827706
172,Brazil,2024-04-22,86316.400000,13266383
