## USER PARAMETERS

In [25]:
############################
# SCENARIO
############################

selected_scenario = "Rare-class Customer Identification" # Options "Custom" , # "Age-based Spending Patterns", "Rare-class Customer Identification", "Seasonal Product Preferences"

############################
# CUSTOM Parameters Section 
### Note: selected_scenario = "Custom" (above)
### Note: Run EITHER Forecasting or Classification (whichever where _columns is not "None")
############################

# Common Parameters
noise_level = "No Noise"  # Options: No Noise, Low, Medium, High

# Forecasting Parameters
Forecast_Columns = ["None"] # Options: None, Random, Quantity, Net Price, Unit Cost 
                              # Note: (1) None = No Forecasting (2) multiple values wtih ["Col1","Col2"], (3) Brackets are a requirement
trend = "None"  # Options: None, Linear, Exponential
seasonality = "None"  # Frequency: None, Weekly, Monthly, Quarterly, Yearly
missing_data = "None"  # Percentage: None, Low, Medium, High
cyclicity = "None"  # Options: None, Short Cycles, Long Cycles

# Classification & Pattern Parameters (Note: uses Forecasting results above)
Classifier_Columns = ["None"] # Options: None, Random, Gender, City, State, Country, Continent, Age
                            # Note: (1) None = No Forecasting (2) multiple values wtih ["Col1","Col2"], (3) Brackets are a requirement
class_imbalance = "No Imbalance"  # Options: High Imbalance, Medium Imbalance, Low Imbalance, No Imbalance
num_classes = 2  # Number of Classes: 2 (Binary), 3, 4, ...
class_distribution = "Even"  # Distribution: Even, Skewed
feature_relations = "Linearly Separable"  # Options: Linearly Separable, Non-linearly Separable
pattern_type = "None"
    # None: No specific pattern applied.
    # Trend: Introduce a trending pattern.
    # Seasonal: Introduce a seasonal pattern.
    # Cyclic: Introduce a cyclic pattern.
    # Customer Behavior: Simulate specific customer behavior patterns.
    # Promotional Impact: Simulate sales spikes due to promotions.
    # Inventory Impact: Simulate sales variations due to stock levels.
    # Anomalies: Introduce outliers or anomalies.
pattern_intensity = "Mild" #(if applicable):
    # Mild: The pattern is subtle and not very pronounced.
    # Moderate: The pattern is clearly visible but not too strong.
    # Strong: The pattern is very pronounced.
pattern_direction = "Increasing" #(for Trend only):
    # Increasing: Sales or chosen metric increases over time.
    # Decreasing: Sales or chosen metric decreases over time.
pattern_frequency = "Monthly" #(for Seasonal and Cyclic):
    # Weekly: The pattern repeats every week.
    # Monthly: The pattern repeats every month.
    # Quarterly: The pattern repeats every quarter.
    # Yearly: The pattern repeats every year.

############################
# Othere
############################
output_format = "pipe" #"csv" or "pipe"

"Parameters Loaded!"


'Parameters Loaded!'

## Load Data

In [26]:
import pandas as pd
import numpy as np
import os

np.random.seed(12345)

# Load each flat file into a DataFrame
#FACTS
orders = pd.read_csv('Contoso Files\Orders.txt', sep='|', encoding='ISO-8859-1')
order_rows = pd.read_csv('Contoso Files\OrderRows.txt', sep='|', encoding='ISO-8859-1')
#DIMS
geo_locations = pd.read_csv('Contoso Files\GeoLocations.txt', sep='|', encoding='ISO-8859-1')
product = pd.read_csv('Contoso Files\Product.txt', sep='|', encoding='ISO-8859-1')
store = pd.read_csv('Contoso Files\Store.txt', sep='|', encoding='ISO-8859-1')
currency_exchange = pd.read_csv('Contoso Files\CurrencyExchange.txt', sep='|', encoding='ISO-8859-1')
customer = pd.read_csv('Contoso Files\Customer.txt', sep='|', encoding='ISO-8859-1')
date_data = pd.read_csv('Contoso Files\Date.txt', sep='|', encoding='ISO-8859-1')

# Display the first ten rows of each DataFrame
dataframes = {
    "GeoLocations": geo_locations,
    "OrderRows": order_rows,
    "Orders": orders,
    "Product": product,
    "Store": store,
    "CurrencyExchange": currency_exchange,
    "Customer": customer, 
    "Date": date_data
}

# # Fetching the first 10 rows of each dataframe
# first_ten_rows = {name: df.head(10) for name, df in dataframes.items()}
# first_ten_rows

# UPDATE A SALES SCHEMA
sales_df = pd.merge(orders, order_rows, on='OrderKey')
sales_df['Order Date'] = pd.to_datetime(sales_df['Order Date'])

"Data Loaded"

'Data Loaded'

## SCENARIO FUNCTIONS

In [27]:

## SCENARIO 1 - ENRICHMENT 1
def S1E1_adjust_spending_by_age(row, adjustments):
    """Adjust the spending based on the age group."""
    age = row['Age']
    spending = row['Net Price']
    
    # Define the spending adjustments
    if 18 <= age <= 30:
        factor = adjustments['18-30']
    elif 31 <= age <= 45:
        factor = adjustments['31-45']
    elif 46 <= age <= 60:
        factor = adjustments['46-60']
    else:
        factor = adjustments['60+']
    
    # Adjust the spending
    return spending * factor

## SCENARIO 1 - ENRICHMENT 2: Redefining the adjust_order_time function
def S1E2_adjust_order_time(row):
    """Adjust order time based on age group."""
    age = row['Age']
    order_date = pd.to_datetime(row['Order Date'])
    
    # Define time-of-day shopping patterns
    if 18 <= age <= 30:
        hour_adjustment = np.random.choice([20, 21, 22, 23])  # Late night shopping
    elif 31 <= age <= 45:
        hour_adjustment = np.random.choice([12, 13, 19, 20])  # Mid-day or evening shopping
    elif 46 <= age <= 60:
        hour_adjustment = np.random.choice([10, 11, 12, 14])  # Morning or early afternoon shopping
    else:
        hour_adjustment = np.random.choice([9, 10, 11, 15])   # Morning or mid-afternoon shopping
    
    # Adjust order timestamp
    adjusted_order_date = order_date.replace(hour=hour_adjustment)
    
    # Define day-of-week shopping patterns
    if 18 <= age <= 30:
        day_adjustment = np.random.choice([4, 5, 6])  # Higher activity during weekends
    else:
        day_adjustment = np.random.choice(list(range(7)))  # Uniform distribution throughout the week
    
    # Move the order date to the adjusted day of the week
    days_difference = day_adjustment - adjusted_order_date.weekday()
    adjusted_order_date += pd.Timedelta(days=days_difference)
    
    return adjusted_order_date

# SCENARIO 1 - ENRICHMENT 3: Enriching the Customer dimension with Preferred Communication Channels based on age
def S1E3_assign_communication_channel(age):
    """Assign preferred communication channel based on age group."""
    if 18 <= age <= 30:
        return "Email/Mobile Notification"
    elif 31 <= age <= 45:
        return "Email"
    elif 46 <= age <= 60:
        return "Phone Call"
    else:
        return "Direct Mail"


#Scenario 2 - Enrichment 1: Valuable Customers purchase more and more 'premium' products
premium_categories = ['Electronics', 'Luxury']

def S2E1_adjust_spending_and_preference(row):
    adjusted_quantity = row['Quantity']
    
    # Increase by 20% for valuable customers
    if row['ValuableCustomer']:
        adjusted_quantity *= 1.2
    
    # Further increase by 50% for premium products
    if row['ValuableCustomer'] and product[product['ProductKey'] == row['ProductKey']]['Category'].values[0] in premium_categories:
        adjusted_quantity *= 1.5
    
    return adjusted_quantity

#Scenario 2 - Enrichment 2: Add sales (frequency) for valuable customers
def S2E2_generate_additional_orders_for_valuable(valuable_customers, adjusted_sales, num_additional_orders=2):
    additional_orders = []
    for customer_key in valuable_customers:
        customer_orders = adjusted_sales[adjusted_sales['CustomerKey'] == customer_key]
        for _ in range(num_additional_orders):
            new_order = customer_orders.sample(1).copy()  # Sample an adjusted order
            new_order['OrderKey'] = adjusted_sales['OrderKey'].max() + 1  # Assign a new OrderKey
            additional_orders.append(new_order)
    
    return pd.concat(additional_orders)

# SCENARIO 3
def S3E1_adjust_sales_by_season(row):
    month_to_season = {
        'December': 'Winter', 'January': 'Winter', 'February': 'Winter',
        'March': 'Spring', 'April': 'Spring', 'May': 'Spring',
        'June': 'Summer', 'July': 'Summer', 'August': 'Summer',
        'September': 'Fall', 'October': 'Fall', 'November': 'Fall'
    }
    
    # Adjusting the adjustment percentage based on product category
    adjustment_percentages = {
        'Swimwear': 1.4,
        'Winter Gear': 1.3,
        'Gardening Tools': 1.25,
        'School Supplies': 1.2
    }
    
    adjustment = adjustment_percentages.get(row['Category'], 1.1)
    
    if month_to_season[row['Month']] == row['PeakSeason']:
        return row['Net Price'] * adjustment
    else:
        return row['Net Price']


"Senario Enrichments Loaded"

'Senario Enrichments Loaded'

## CUSTOM FUNCTIONS

In [28]:
def add_forecasting_features(df, Forecast_Columns, trend, seasonality, missing_data, 
                                cyclicity, noise_level):
    
    if Forecast_Columns == ["None"]:
        return df.copy()
    
    updated_df = df.copy()
    
    # Ensure Forecast_Columns is a list
    if isinstance(Forecast_Columns, str) or Forecast_Columns == ["Random"]:
        if Forecast_Columns in ("Random", ["Random"]):
            Forecast_Columns = [np.random.choice(['Quantity', 'Net Price', 'Unit Cost'])]

    # Placeholder for adjusted columns
    adjusted_data = {col + "_Adjusted": updated_df[col].copy() for col in Forecast_Columns}

    # Introduce trend
    if trend == "Linear":
        for col in Forecast_Columns:
            adjusted_data[col + "_Adjusted"] += np.linspace(0, updated_df[col].max() * 0.2, len(updated_df))
    elif trend == "Exponential":
        for col in Forecast_Columns:
            adjusted_data[col + "_Adjusted"] *= np.linspace(1, 1.2, len(updated_df))
    
    # Introduce seasonality
    if seasonality != "None":
        freq_map = {
            "Weekly": 7,
            "Monthly": 30,
            "Quarterly": 90,
            "Yearly": 365
        }
        seasonal_pattern = np.tile(np.sin(np.linspace(0, 2 * np.pi, freq_map[seasonality])), 
                                   len(updated_df) // freq_map[seasonality] + 1)[:len(updated_df)]
        for col in Forecast_Columns:
            adjusted_data[col + "_Adjusted"] += updated_df[col].max() * 0.1 * seasonal_pattern
    
    # Introduce missing data
    missing_map = {
        "Low": 0.05,
        "Medium": 0.1,
        "High": 0.2
    }
    if missing_data in missing_map:
        for col in Forecast_Columns:
            missing_indices = np.random.choice(updated_df.index, size=int(missing_map[missing_data] * len(updated_df)), 
                                               replace=False)
            adjusted_data[col + "_Adjusted"].iloc[missing_indices] = np.nan
    
    # Introduce cyclicity
    if cyclicity == "Short Cycles":
        cyclical_pattern = np.tile(np.sin(np.linspace(0, 2 * np.pi * 4, len(updated_df))), 1)
    elif cyclicity == "Long Cycles":
        cyclical_pattern = np.tile(np.sin(np.linspace(0, 2 * np.pi, len(updated_df))), 1)
    else:
        cyclical_pattern = np.ones(len(updated_df))
    for col in Forecast_Columns:
        adjusted_data[col + "_Adjusted"] += updated_df[col].max() * 0.05 * cyclical_pattern
    
    # Introduce noise
    noise_multiplier = {"Low": 0.05, "Medium": 0.1, "High": 0.2}
    if noise_level in noise_multiplier:
        for col in Forecast_Columns:
            noise = np.random.normal(0, updated_df[col].std() * noise_multiplier[noise_level], len(updated_df))
            adjusted_data[col + "_Adjusted"] += noise

    # Add adjusted columns to the original dataframe
    for col, data in adjusted_data.items():
        updated_df[col] = data

    return updated_df

def classify_customer_attributes(df, classifier_columns=["Random"], class_imbalance="No Imbalance", 
                                            num_classes=2, class_distribution="Even", 
                                            feature_relations="Linearly Separable"):
    """
    Classify customer attributes based on the provided user parameters.
    """
    
    # If classifier columns are set to ["Random"], select a random column for classification
    if classifier_columns == ["Random"]:
        classifier_columns = [np.random.choice(['Gender', 'City', 'State', 'Country', 'Continent', 'Age'], 1)[0]]
    
    # Create a combined metric for classification if multiple columns are provided
    if len(classifier_columns) > 1:
        df['Combined_Metric'] = df[classifier_columns].sum(axis=1)
        classifier_column_to_use = 'Combined_Metric'
    else:
        classifier_column_to_use = classifier_columns[0]
    
    # Convert categorical columns to integers for classification
    if df[classifier_column_to_use].dtype == 'object':
        df[classifier_column_to_use] = df[classifier_column_to_use].astype('category').cat.codes

    # Applying class imbalance if specified
    if class_imbalance != "No Imbalance":
        # Assuming binary classification for simplicity
        if num_classes == 2:
            if class_imbalance == "High Imbalance":
                imbalance_ratio = 0.9
            elif class_imbalance == "Medium Imbalance":
                imbalance_ratio = 0.75
            else:  # Low Imbalance
                imbalance_ratio = 0.6
            df['Class_Label'] = (df[classifier_column_to_use] > df[classifier_column_to_use].quantile(imbalance_ratio)).astype(int)
        # For non-binary classification, we'll keep a balanced distribution for simplicity
        else:
            df['Class_Label'] = pd.qcut(df[classifier_column_to_use], q=num_classes, labels=False, duplicates='drop')
    else:
        # If no class imbalance is specified, classify based on quantiles
        df['Class_Label'] = pd.qcut(df[classifier_column_to_use], q=num_classes, labels=False, duplicates='drop')
    
    # If feature relations are non-linearly separable, add some noise to the class labels
    if feature_relations == "Non-linearly Separable":
        noise_level = 0.1  # Adding 10% noise for simplicity
        num_noisy_samples = int(df.shape[0] * noise_level)
        noisy_samples = np.random.choice(df.index, num_noisy_samples, replace=False)
        df.loc[noisy_samples, 'Class_Label'] = 1 - df.loc[noisy_samples, 'Class_Label']

    return df

#####################
## PATTERNS
#####################

def apply_trend_pattern(df, pattern_direction, pattern_intensity):
    """
    Apply a trend pattern to the sales data based on the classification.
    """
    trend_coefficients = {
        "Mild": 0.05,
        "Moderate": 0.1,
        "Strong": 0.2
    }
    
    coeff = trend_coefficients[pattern_intensity]
    
    # Adjusting the trend coefficient based on the direction
    if pattern_direction == "Decreasing":
        coeff = -coeff
        
    # Applying the trend pattern based on the class label
    for label in df['Class_Label'].unique():
        indices = df[df['Class_Label'] == label].index
        df.loc[indices, 'Quantity'] = df.loc[indices, 'Quantity'] * (1 + coeff * (label + 1))
    
    return df

def apply_seasonal_pattern(df, pattern_frequency, pattern_intensity):
    """
    Apply a seasonal pattern to the sales data based on the classification.
    """
    frequency_map = {
        "Weekly": 7,
        "Monthly": 30,
        "Quarterly": 90,
        "Yearly": 365
    }
    
    intensity_coefficients = {
        "Mild": 0.05,
        "Moderate": 0.1,
        "Strong": 0.2
    }
    
    coeff = intensity_coefficients[pattern_intensity]
    freq = frequency_map[pattern_frequency]
    
    # Extracting the day of the year to apply the seasonal pattern
    day_of_year = df['Order Date'].dt.dayofyear
    
    # Applying the seasonal pattern based on the class label
    for label in df['Class_Label'].unique():
        indices = df[df['Class_Label'] == label].index
        df.loc[indices, 'Quantity'] = df.loc[indices, 'Quantity'] * (1 + coeff * np.sin(2 * np.pi * day_of_year / freq))
    
    return df

def apply_cyclic_pattern(df, pattern_frequency, pattern_intensity):
    """
    Apply a cyclic pattern to the sales data based on the classification.
    """
    frequency_map = {
        "Weekly": 52,  # Simulating a cycle that repeats roughly every year
        "Monthly": 18,  # Simulating a cycle that repeats roughly every 1.5 years
        "Quarterly": 8,  # Simulating a cycle that repeats roughly every 2 years
        "Yearly": 4  # Simulating a cycle that repeats roughly every 4 years
    }
    
    intensity_coefficients = {
        "Mild": 0.05,
        "Moderate": 0.1,
        "Strong": 0.2
    }
    
    coeff = intensity_coefficients[pattern_intensity]
    freq = frequency_map[pattern_frequency]
    
    # Extracting the week of the year to apply the cyclic pattern
    week_of_year = df['Order Date'].dt.isocalendar().week
    
    # Applying the cyclic pattern based on the class label
    for label in df['Class_Label'].unique():
        indices = df[df['Class_Label'] == label].index
        df.loc[indices, 'Quantity'] = df.loc[indices, 'Quantity'] * (1 + coeff * np.sin(2 * np.pi * week_of_year / freq))
    
    return df

def apply_customer_behavior_pattern(df, pattern_intensity):
    """
    Apply customer-specific behavior patterns to the sales data based on the classification.
    Incorporating both weekend behavior and product preferences.
    """
    intensity_coefficients = {
        "Mild": 0.05,
        "Moderate": 0.1,
        "Strong": 0.2
    }
    
    coeff = intensity_coefficients[pattern_intensity]
    
    # Extracting the day of the week to apply the customer behavior pattern
    day_of_week = df['Order Date'].dt.dayofweek
    
    # Applying a behavior where customers of certain classes buy more during weekends
    for label in df['Class_Label'].unique():
        weekend_indices = df[(df['Class_Label'] == label) & (day_of_week >= 5)].index
        df.loc[weekend_indices, 'Quantity'] = df.loc[weekend_indices, 'Quantity'] * (1 + coeff)
    
    # Simulating product preferences for certain customer classes
    for label in df['Class_Label'].unique():
        preferred_product_indices = df[(df['Class_Label'] == label) & (df['ProductKey'] % 2 == 0)].index
        df.loc[preferred_product_indices, 'Quantity'] = df.loc[preferred_product_indices, 'Quantity'] * (1 + coeff)
    
    return df

def apply_promotional_impact_pattern(df, pattern_intensity):
    """
    Apply promotional impact pattern to the sales data based on the classification.
    Simulate sales spikes during promotional periods.
    """
    intensity_coefficients = {
        "Mild": 0.1,
        "Moderate": 0.2,
        "Strong": 0.3
    }
    
    coeff = intensity_coefficients[pattern_intensity]
    
    # Extracting the month to simulate a promotional period
    month = df['Order Date'].dt.month
    
    # Applying a promotional spike for certain months (e.g., holiday sales in December)
    promo_indices = df[month == 12].index
    df.loc[promo_indices, 'Quantity'] = df.loc[promo_indices, 'Quantity'] * (1 + coeff)
    
    return df

def apply_inventory_impact_pattern(df, pattern_intensity):
    """
    Apply inventory impact pattern to the sales data.
    Simulate sales reductions during periods of low stock.
    """
    intensity_coefficients = {
        "Mild": 0.1,
        "Moderate": 0.2,
        "Strong": 0.3
    }
    
    coeff = intensity_coefficients[pattern_intensity]
    
    # Extracting the month to simulate multiple low inventory periods
    month = df['Order Date'].dt.month
    
    # Applying a sales reduction for specific months (e.g., low stock in February, June, and October)
    for m in [2, 6, 10]:
        low_stock_indices = df[month == m].index
        df.loc[low_stock_indices, 'Quantity'] = df.loc[low_stock_indices, 'Quantity'] * (1 - coeff)
    
    return df

def apply_anomalies_pattern(df, pattern_intensity):
    """
    Apply anomalies pattern to the sales data.
    Simulate random spikes or drops in sales quantities.
    """
    intensity_coefficients = {
        "Mild": 0.1,
        "Moderate": 0.2,
        "Strong": 0.3
    }
    
    coeff = intensity_coefficients[pattern_intensity]
    
    # Randomly select a small percentage of the data to introduce anomalies
    anomaly_indices = df.sample(frac=0.02, random_state=42).index
    direction = np.random.choice([-1, 1], size=len(anomaly_indices))  # Randomly choose increase or decrease
    anomaly_factors = 1 + direction * coeff
    
    df.loc[anomaly_indices, 'Quantity'] *= anomaly_factors
    
    return df

## FINAL EXECUTION

In [29]:
def generate_custom_or_scenario_data(selected_scenario, Forecast_Columns, Classifier_Columns, pattern_type, pattern_direction, pattern_intensity, pattern_frequency):
    """
    Generate data based on user's selection of Scenario or Custom parameters.
        Parameters:
        - selected_scenario: Name of the scenario chosen by the user.
        - custom_params: Dictionary containing custom parameters set by the user.
        
        Returns:
        - Dictionary containing modified DataFrames based on user's selection.
    """
    
    dfs_to_export = {}  # Dictionary to store all dataframes to be exported
    
    # If the user has chosen a scenario
    if selected_scenario != "Custom":
        if selected_scenario == "Age-based Spending Patterns":
            # Re-defining the spending adjustments based on the scenario parameters
            spending_adjustments = {
                '18-30': 1.2,  # 20% increase for High spending
                '31-45': 1,   # No change for Medium spending
                '46-60': 0.8, # 20% decrease for Low spending
                '60+': 1      # No change for Medium spending
            }
            # Enrichment 1
            sales_with_order_details = pd.merge(order_rows, orders[['OrderKey', 'CustomerKey', 'Order Date']], on='OrderKey')
            sales_with_age = pd.merge(sales_with_order_details, customer[['CustomerKey', 'Age']], on='CustomerKey')
            sales_with_age['Adjusted Net Price'] = sales_with_age.apply(lambda row: S1E1_adjust_spending_by_age(row, spending_adjustments), axis=1)
            # Enrichment 2
            sales_with_age['Adjusted Order Date'] = sales_with_age.apply(S1E2_adjust_order_time, axis=1)
            # Enrichment 3
            customer['PreferredCommunication'] = customer['Age'].apply(S1E3_assign_communication_channel)
            dfs_to_export['Customer Enriched'] = customer
            dfs_to_export['Sales Enriched'] = sales_with_age
            print(f"Data has been modified based on the '{selected_scenario}' scenario.")
        elif selected_scenario == "Rare-class Customer Identification": 
            # Enrichment 1: Labeling Valuable & Unique Spending Patterns
            valuable_percentage = 0.05  # 5% of customers
            customer['ValuableCustomer'] = False
            valuable_customer_indices = np.random.choice(customer.index, int(valuable_percentage * len(customer)), replace=False)
            customer.loc[valuable_customer_indices, 'ValuableCustomer'] = True
            # Enrichment 2: Higher average order values, Preference for premium products
            sales_with_valuable_flag = pd.merge(order_rows, orders[['OrderKey', 'CustomerKey']], on='OrderKey')
            sales_with_valuable_flag = pd.merge(sales_with_valuable_flag, customer[['CustomerKey', 'ValuableCustomer']], on='CustomerKey')
            sales_with_valuable_flag['Adjusted Quantity'] = sales_with_valuable_flag.apply(S2E1_adjust_spending_and_preference, axis=1)
            ## Enrichment 3: frequent shopping
            valuable_customers_list = customer[customer['ValuableCustomer']]['CustomerKey'].tolist()
            additional_orders = S2E2_generate_additional_orders_for_valuable(valuable_customers_list, sales_with_valuable_flag)
            dfs_to_export['Customer Enriched'] = customer
            dfs_to_export['Sales Enriched'] = additional_orders
            print(f"Data has been modified based on the '{selected_scenario}' scenario.")
        elif selected_scenario == "Seasonal Product Preferences": 
            seasonal_categories = {
                'Swimwear': 'Summer',
                'Winter Gear': 'Winter',
                'Gardening Tools': 'Spring',
                'School Supplies': 'Fall'
            }
            product['PeakSeason'] = product['Category'].map(seasonal_categories).fillna('None')
            sales_with_date = pd.merge(order_rows, orders[['OrderKey', 'Order Date']], on='OrderKey')
            sales_with_month = pd.merge(sales_with_date, date_data[['Date', 'Month']], left_on='Order Date', right_on='Date')
            sales_with_season = pd.merge(sales_with_month, product[['ProductKey', 'Category', 'PeakSeason']], on='ProductKey')           
            sales_with_season['Adjusted Net Price for Season'] = sales_with_season.apply(S3E1_adjust_sales_by_season, axis=1)
            dfs_to_export['Sales Enriched'] = sales_with_season[['OrderKey', 'ProductKey', 'Category', 'Net Price', 'Adjusted Net Price for Season', 'Month', 'PeakSeason']]
            print(f"Data has been modified based on the '{selected_scenario}' scenario.")
        else:
            print(f"Scenario '{selected_scenario}' not recognized. No Changes were made")    
    else:
        if Forecast_Columns != ["None"]:
            forecasting_df = add_forecasting_features(sales_df, Forecast_Columns, trend, seasonality, missing_data, cyclicity, noise_level)
            dfs_to_export['Sales Enriched'] = forecasting_df
            print("Data has been modified based on forecasting parameters.")
        if Classifier_Columns != ["None"]:
            classified_customer_df = classify_customer_attributes(customer.copy())
            dfs_to_export['Customer Enriched'] = classified_customer_df
            sales_df_classified = pd.merge(sales_df, classified_customer_df[['CustomerKey', 'Class_Label']], on='CustomerKey', how='left')
            if pattern_type == "None":
                dfs_to_export['Sales Enriched'] = sales_df_classified
                print("Data has been modified based on classification parameters.")
                return dfs_to_export
            # Apply trend pattern
            sales_df_trend = apply_trend_pattern(sales_df_classified.copy(), pattern_direction, pattern_intensity)
            if pattern_type == "Trend":
                dfs_to_export['Sales Enriched'] = sales_df_trend
                print("Data has been modified based on classification parameters, Trend pattern.")
                return dfs_to_export
            # Apply seasonal pattern
            sales_df_seasonal = apply_seasonal_pattern(sales_df_trend.copy(), pattern_frequency, pattern_intensity)
            if pattern_type == "Seasonal":
                dfs_to_export['Sales Enriched'] = sales_df_seasonal
                print("Data has been modified based on classification parameters, Seasonal pattern.")
                return dfs_to_export
            # Apply cyclic pattern
            sales_df_cyclic = apply_cyclic_pattern(sales_df_seasonal.copy(), pattern_frequency, pattern_intensity)
            if pattern_type == "Cyclic":
                dfs_to_export['Sales Enriched'] = sales_df_cyclic
                print("Data has been modified based on classification parameters, Cyclic pattern.")
                return dfs_to_export
            # Apply customer behavior pattern
            sales_df_behavior_updated = apply_customer_behavior_pattern(sales_df_cyclic.copy(), pattern_intensity)
            if pattern_type == "Customer Behavior":
                dfs_to_export['Sales Enriched'] = sales_df_behavior_updated
                print("Data has been modified based on classification parameters, CB pattern.")
                return dfs_to_export
            # Apply promotional impact pattern
            sales_df_promo = apply_promotional_impact_pattern(sales_df_behavior_updated.copy(), pattern_intensity)
            if pattern_type == "Promotional Impact":
                dfs_to_export['Sales Enriched'] = sales_df_promo
                print("Data has been modified based on classification parameters, PI pattern.")
                return dfs_to_export
            # Apply inventory impact pattern
            sales_df_inventory_updated = apply_inventory_impact_pattern(sales_df_promo.copy(), pattern_intensity)
            if pattern_type == "Inventory Impact":
                dfs_to_export['Sales Enriched'] = sales_df_inventory_updated
                print("Data has been modified based on classification parameters, II pattern.")
                return dfs_to_export
            # Apply anomalies pattern
            sales_df_anomalies = apply_anomalies_pattern(sales_df_inventory_updated.copy(), pattern_intensity)
            if pattern_type == "Anomalies":
                dfs_to_export['Sales Enriched'] = sales_df_anomalies
                print("Data has been modified based on classification parameters, Anomalies pattern.")
                return dfs_to_export
        else: 
            print("No operations selected. Exiting...")
            return None
    return dfs_to_export


def export_dataframes(dfs, save_directory, format="csv"):
    """
    Export a dictionary of dataframes to CSV or pipe-delimited text files.
    
    Parameters:
    - dfs: Dictionary containing dataframes to be exported.
    - save_directory: Directory where files should be saved.
    - format: Desired format for export ("csv" or "pipe").
    
    Returns:
    - None
    """
    for name, df in dfs.items():
        if format == "csv":
            filepath = os.path.join(save_directory, f"{name}.csv")
            df.to_csv(filepath, index=False)
        elif format == "pipe":
            filepath = os.path.join(save_directory, f"{name}.txt")
            df.to_csv(filepath, sep="|", index=False)
        else:
            print(f"Format '{format}' not recognized.")
            return
        print(f"Dataframe '{name}' exported to {filepath}")

################ EXPORTING ########################
dfs_to_export = generate_custom_or_scenario_data(selected_scenario, Forecast_Columns, Classifier_Columns, pattern_type, pattern_direction, pattern_intensity, pattern_frequency)

if dfs_to_export:  # Check if dfs_to_export is not None
    save_directory = 'Modified Files'
    if not os.path.exists(save_directory):
        os.makedirs(save_directory)
    export_dataframes(dfs_to_export, save_directory, format=output_format)  # Format can be "csv" or "pipe"

Data has been modified based on the 'Rare-class Customer Identification' scenario.
Dataframe 'Customer Enriched' exported to Modified Files\Customer Enriched.txt
Dataframe 'Sales Enriched' exported to Modified Files\Sales Enriched.txt
