In [65]:
import pandas as pd

def extract_first_week_data(file_path, output_file='first_week_data.csv'):
    """
    Load dataset, rename 'Unnamed: 0' to 'Date-Time' if necessary,
    and extract data for the first week of October 2024.
    
    Parameters:
        file_path (str): Path to the input CSV file.
        output_file (str): Path to save the filtered data.
    
    Returns:
        None
    """
    # Load dataset with 'Date-Time' as strings
    df = pd.read_csv(file_path, dtype={'Unnamed: 0': str})
    
    # Rename 'Unnamed: 0' to 'Date-Time' if not done yet
    if 'Unnamed: 0' in df.columns:
        df.rename(columns={'Unnamed: 0': 'Date-Time'}, inplace=True)

    # Attempt to convert 'Date-Time' to datetime format
    # Checking the format of the first non-empty entry
    sample_entry = df['Date-Time'].dropna().iloc[0]
    if '/' in sample_entry:  # This suggests the format is likely %d/%m/%Y
        df['Date-Time'] = pd.to_datetime(df['Date-Time'].str.strip(), format='%d/%m/%Y %H:%M', errors='coerce')
    else:  # Assume the format is likely %Y-%m-%d
        df['Date-Time'] = pd.to_datetime(df['Date-Time'].str.strip(), format='%Y-%m-%d %H:%M:%S', errors='coerce')
    
    # Check for any conversion errors
    if df['Date-Time'].isnull().any():
        print("Conversion issues with 'Date-Time' detected. Rows with NaT:")
        # print(df[df['Date-Time'].isnull()])
        # # Additionally, print the original 'Date-Time' entries that caused NaT
        # print("Original 'Date-Time' entries causing issues:")
        # problematic_rows = df[df['Date-Time'].isnull()]['Date-Time']
        # print(problematic_rows)

    # Filter for the first week of October 2024 (October 1 to October 7)
    first_week_data = df[(df['Date-Time'] >= '2024-10-01') & (df['Date-Time'] < '2024-10-08')]
    
    # Check if any data was found
    if not first_week_data.empty:
        # Save to output file
        first_week_data.to_csv(output_file, index=False)
        print(f"Data for the first week of October 2024 saved to '{output_file}'")
    else:
        print("No data found for the first week of October 2024.")


In [66]:
extract_first_week_data('data/channel_A_schedule.csv', 'data/FIRST_WEEK_channel_A_schedule.csv')

Data for the first week of October 2024 saved to 'data/FIRST_WEEK_channel_A_schedule.csv'


In [67]:
extract_first_week_data('data/channel_0_schedule.csv', 'data/FIRST_WEEK_channel_0_schedule.csv')

Data for the first week of October 2024 saved to 'data/FIRST_WEEK_channel_0_schedule.csv'


In [68]:
extract_first_week_data('data/channel_1_schedule.csv', 'data/FIRST_WEEK_channel_1_schedule.csv')

Data for the first week of October 2024 saved to 'data/FIRST_WEEK_channel_1_schedule.csv'


In [69]:
extract_first_week_data('data/channel_2_schedule.csv', 'data/FIRST_WEEK_channel_2_schedule.csv')

Data for the first week of October 2024 saved to 'data/FIRST_WEEK_channel_2_schedule.csv'


In [70]:
extract_first_week_data('data/channel_0_conversion_rates.csv', 'data/FIRST_WEEK_channel_0_conversion_rates.csv')

Data for the first week of October 2024 saved to 'data/FIRST_WEEK_channel_0_conversion_rates.csv'


In [71]:
extract_first_week_data('data/channel_1_conversion_rates.csv', 'data/FIRST_WEEK_channel_1_conversion_rates.csv')

Data for the first week of October 2024 saved to 'data/FIRST_WEEK_channel_1_conversion_rates.csv'


In [72]:
extract_first_week_data('data/channel_2_conversion_rates.csv', 'data/FIRST_WEEK_channel_2_conversion_rates.csv')

Data for the first week of October 2024 saved to 'data/FIRST_WEEK_channel_2_conversion_rates.csv'
