In [None]:
# 📌 STEP 1: Install required package
!pip install openpyxl

# 📌 STEP 2: Upload your file
from google.colab import files
uploaded = files.upload()

import pandas as pd
import io

# 📌 STEP 3: Read the uploaded file (supports CSV or Excel)
file_name = list(uploaded.keys())[0]

if file_name.endswith('.csv'):
    df_raw = pd.read_csv(io.BytesIO(uploaded[file_name]), header=None)
elif file_name.endswith(('.xls', '.xlsx')):
    df_raw = pd.read_excel(io.BytesIO(uploaded[file_name]), header=None)
else:
    raise ValueError("Please upload a CSV or Excel file.")

# 📌 STEP 4: Use second row as header
df = df_raw[1:].copy()
df.columns = df_raw.iloc[1]

# 📌 STEP 5: Drop rows where any column contains forbidden labels
remove_keywords = [
    "Country", "Created", "Rank",
    "Estimated Page Views (monthly)",
    "Estimated Sales (monthly)", "Domain"
]

# Remove rows where any cell has a keyword to be deleted
df = df[~df.apply(lambda row: any(str(cell).strip() in remove_keywords for cell in row), axis=1)]

# 📌 STEP 6: Remove duplicates
df = df.drop_duplicates()

# 📌 STEP 7: Write to Excel with bold headers
from openpyxl import Workbook
from openpyxl.styles import Font
from openpyxl.utils.dataframe import dataframe_to_rows

wb = Workbook()
ws = wb.active
ws.title = "Cleaned Data"

# Write bold headers
bold_font = Font(bold=True)
for i, col in enumerate(df.columns, 1):
    cell = ws.cell(row=1, column=i, value=col)
    cell.font = bold_font

# Write the cleaned data
for row in dataframe_to_rows(df, index=False, header=False):
    ws.append(row)

# 📌 STEP 8: Save and download
output_file = "/content/Cleaned_Data.xlsx"
wb.save(output_file)
files.download(output_file)