# 1/ Import Packages

In [13]:
#!pip install pandas

In [76]:
import pandas as pd
import re
from datetime import datetime

# 2/ Import Data

In [6]:
df = pd.read_csv('input/CEO.csv')

In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 8 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   Quarter                      10 non-null     object 
 1   Overall Revenue Growth       10 non-null     float64
 2   Market Share                 10 non-null     float64
 3   Employee Engagement Score    10 non-null     float64
 4   Customer Satisfaction Score  10 non-null     float64
 5   Revenue                      10 non-null     int64  
 6   Cost                         10 non-null     int64  
 7   Profit                       10 non-null     int64  
dtypes: float64(4), int64(3), object(1)
memory usage: 768.0+ bytes


# 3/ Clean Column Names

In [31]:
def sanitize_column_names(df):
    """
    Takes a DataFrame and sanitizes column names to be lowercase, without special characters,
    and replaces spaces with underscores. The changes are made directly to the DataFrame.

    Parameters:
        df (pd.DataFrame): Input DataFrame

    Returns:
        pd.DataFrame: DataFrame with sanitized column names
    """
    def sanitize(name):
        # Replace spaces with underscores
        name = name.replace(" ", "_")
        # Remove special characters except underscores
        name = re.sub(r'[^a-zA-Z0-9_]', '', name)
        # Convert to lowercase
        return name.lower()

    df.columns = [sanitize(col) for col in df.columns]
    return df

In [37]:
sanitize_column_names(df)

Unnamed: 0,quarter,overall_revenue_growth,market_share,employee_engagement_score,customer_satisfaction_score,revenue,cost,profit
0,2022-Q1,0.05,0.15,0.7,0.85,105000,70000,35000
1,2022-Q2,0.07,0.16,0.72,0.87,112350,73500,38850
2,2022-Q3,0.06,0.17,0.74,0.89,119091,77000,42091
3,2022-Q4,0.08,0.18,0.76,0.9,128618,80500,48118
4,2023-Q1,0.1,0.19,0.78,0.91,141480,84000,57480
5,2023-Q2,0.09,0.2,0.8,0.92,154213,87500,66713
6,2023-Q3,0.11,0.21,0.82,0.93,171177,91000,80177
7,2023-Q4,0.12,0.22,0.84,0.94,191718,94500,97218
8,2024-Q1,0.13,0.23,0.86,0.95,216641,98000,118641
9,2024-Q2,0.14,0.24,0.88,0.96,246971,101500,145471


In [39]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 8 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   quarter                      10 non-null     object 
 1   overall_revenue_growth       10 non-null     float64
 2   market_share                 10 non-null     float64
 3   employee_engagement_score    10 non-null     float64
 4   customer_satisfaction_score  10 non-null     float64
 5   revenue                      10 non-null     int64  
 6   cost                         10 non-null     int64  
 7   profit                       10 non-null     int64  
dtypes: float64(4), int64(3), object(1)
memory usage: 768.0+ bytes


# 4/ Create Summary

In [66]:
df_report = df.groupby('quarter').sum().tail().transpose()
df_report

quarter,2023-Q2,2023-Q3,2023-Q4,2024-Q1,2024-Q2
overall_revenue_growth,0.09,0.11,0.12,0.13,0.14
market_share,0.2,0.21,0.22,0.23,0.24
employee_engagement_score,0.8,0.82,0.84,0.86,0.88
customer_satisfaction_score,0.92,0.93,0.94,0.95,0.96
revenue,154213.0,171177.0,191718.0,216641.0,246971.0
cost,87500.0,91000.0,94500.0,98000.0,101500.0
profit,66713.0,80177.0,97218.0,118641.0,145471.0


# 5/ Export Report

In [93]:
def export_dataframe_with_timestamp(df, filename_prefix="ceo_report"):
    """
    Exports the given DataFrame to a CSV file in the output folder with a timestamped filename.

    Parameters:
        df (pd.DataFrame): The DataFrame to export.
        filename_prefix (str): The prefix for the output file name.

    Returns:
        None
    """
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_path = f'output/{timestamp}_{filename_prefix}.csv'
    df.to_csv(output_path)
    print(f"File exported to {output_path}")

In [95]:
export_dataframe_with_timestamp(df_report, "ceo_report")

File exported to output/20250105_162622_ceo_report.csv
