<a href="https://colab.research.google.com/github/Ekeneobi/Customer-Insights/blob/main/Customer_Data_Scamble_Data_for_Test_script.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import re

def scramble_columns(df, columns, noise_range=(3, 5)):
  """
  Scrambles specified columns in a pandas DataFrame using additive noise.

  Args:
    df: The pandas DataFrame.
    columns: A list of column names to scramble.
    noise_range: A tuple representing the minimum and maximum noise values.

  Returns:
    A new DataFrame with the specified columns scrambled.
  """
  scrambled_df = df.copy()
  for col in columns:
    scrambled_df[col] = df[col] + np.random.uniform(noise_range[0], noise_range[1], size=len(df))
  return scrambled_df

def replace_strings_with_regex(df, columns, replacements):
  """
  Replaces string values in specified columns of a pandas DataFrame using regular expressions.

  Args:
    df: The pandas DataFrame.
    columns: A list of column names to replace values in.
    replacements: A dictionary where keys are the columns and
                   values are dictionaries of regex_pattern: replacement_value pairs.

  Returns:
    A new DataFrame with the replaced values.
  """

  df_new = df.copy()

  for col in columns:
    if col in replacements:
      for pattern, replacement in replacements[col].items():
        df_new[col] = df_new[col].str.replace(pattern, replacement, regex=True)

  return df_new

# Example Usage:
# Load the CSV file into a pandas DataFrame
df = pd.read_csv('/content/scrambled_data.csv')

# Specify the columns to scramble
columns_to_scramble = ['Conversions_keywords' ,'Cost' ,'2024 CTR' ,'Sessions Per User' ,'Average Session Duration' ,'Bounce Rate' ,'Engagement Rate' ,'CPC' ,'metrics_interactionRate' ,'Cost_keywords' ,'Interation_rate_keywords' ,'Clicks' ,'Impressions' ,'New Users' ,'Sessions' ,'Total Users' ,'User Engagement' ,'Engaged Sessions' ,'Conversions' ,'Views' ,'Event Count' ,'FB Leads' ,'Clicks_keywords' ,'CostMicros_keywords' ,'Impressions_keywords']

# Scramble the specified columns
scrambled_df = scramble_columns(df, columns_to_scramble)

columns_to_replace = ['Keywords','2024 Products','2024 Audience','Campaign Name','Ad Name','Page Title']
replacements = {
    'Keywords': { '.*Merck.*': 'Keywords Samples', '.*vaccin.*': 'Keywords Samples 1', '.*disease.*': 'Keywords Samples 2',  '.*cattle.*': 'Keywords Samples 3' },
    '2024 Products': { '.*Vista.*': 'Product Samples', '.*Nasalgen.*': 'Product Samples 1', '.*Vision.*': 'Product Samples 2',  '.*Safe.*': 'Product Samples 3',  '.*Revalor.*': 'Product Samples 4',  '.*Evolution.*': 'Product Samples 5',  '.*Resflor.*': 'Product Samples 6', '.*Zuprevo.*': 'Product Samples 7' , '.*Ralgro.*': 'Product Samples 8' , '.*SenseHub.*': 'Product Samples 9'  },
    '2024 Audience': { '.*Cow Calf.*': 'Audience Sample', '.*Feedlot.*': 'Audience Sample 1', '.*Vets.*': 'Audience Sample 2' },
    'Campaign Name': { '.*Implants.*': 'Campaign Samples', '.*Parastiticides.*': 'Campaign Samples 1', '.*Anti.*': 'Campaign Samples 2',  '.*Vaccines.*': 'Campaign Samples 3' },
    'Ad Name': { '.*Vista.*': 'Ad Name Samples', '.*Nasalgen.*': 'Ad Name Samples 1', '.*Vision.*': 'Ad Name Samples 2',  '.*Safe.*': 'Ad Name Samples 3',  '.*Revalor.*': 'Ad Name Samples 4',  '.*Evolution.*': 'Ad Name Samples 5',  '.*Resflor.*': 'Ad Name Samples 6'  },
    'Page Title': { '.*Vista.*': 'Page Title Samples', '.*Nasalgen.*': 'Page Title Samples 1', '.*Vision.*': 'Page Title Samples 2',  '.*Safe.*': 'Page Title Samples 3',  '.*Revalor.*': 'Page Title Samples 4',  '.*Evolution.*': 'Page Title Samples 5',  '.*Resflor.*': 'Page Title Samples 6' , '.*Merck Animal Health USA.*': 'Page Title Samples 7','.*Merck.*': 'Page Title Samples 8'}
}

scrambled_df = replace_strings_with_regex(scrambled_df, columns_to_replace, replacements)

# Save the scrambled data to a new CSV file
scrambled_df.to_csv('scrambled_data.csv', index=False)

  df = pd.read_csv('/content/scrambled_data.csv')
