
# As we donot have access to any company CRM lets Generate the data using faker in python

In [1]:
from faker import Faker
import pandas as pd
import random

# Initialize Faker object with Indian locale
fake = Faker('en_IN')

In [2]:
# Generate sample customer data
num_customers = 1000  # Number of customers to generate

customer_data = []
for customer_id in range(1, num_customers + 1):
    customer = {
        'CustomerID':customer_id,                        # Unique customer ID
        'FirstName': fake.first_name(),                   # First name
        'LastName': fake.last_name(),                     # Last name
        'Email': fake.email().replace('@example.', '@gmail.'),  # Email address with @gmail.com domain
        'PhoneNumber': fake.random_number(digits=10),     # 10-digit phone number
        'Address': fake.street_address(),                 # Indian address
        'City': fake.random_element(elements=('Pune', 'Mumbai', 'Aurangabad', 'Nashik', 'Nagpur')),  # Indian city
        'State': 'Maharashtra',                           # State set to Maharashtra
        'ZipCode': '',                                    # Postal code to be updated based on city
        'Country': 'India',                               # Country set to India
        'Age': fake.random_int(min=18, max=90),           # Age
        'Gender': fake.random_element(['Male', 'Female']),# Gender
    }
    customer_data.append(customer)


# In real-life scenario we have data frames like purchase history customer data marketing pulls demographics and website browsing behaviour

In [3]:
# Generate sample purchase history
products = ['Laptop', 'Smartphone', 'Headphones', 'Shoes', 'Jacket', 'Watch', 'Camera']
purchase_data = []
for customer_id in range(1, num_customers + 1):
    num_purchases = random.randint(1, 5)
    for _ in range(num_purchases):
        purchase = {
            'CustomerID': customer_id,                                      # Customer ID
            'Product': random.choice(products),                            # Product purchased
            'Price': round(random.uniform(50, 1000), 2),                   # Purchase price
            'Quantity': random.randint(1, 3),                              # Quantity purchased
            'PurchaseDate': fake.date_time_between(start_date='-1y', end_date='now', tzinfo=None),  # Purchase date
        }
        purchase_data.append(purchase)

In [4]:

# Generate sample website browsing behavior
pages_visited = ['Home', 'Products', 'About Us', 'Contact', 'Special Offers']
website_data = []
for customer_id in range(1, num_customers + 1):
    num_visits = random.randint(1, 10)
    for _ in range(num_visits):
        visit = {
            'CustomerID': customer_id,                                      # Customer ID
            'PageVisited': random.choice(pages_visited),                   # Page visited
            'VisitDate': fake.date_time_between(start_date='-1y', end_date='now', tzinfo=None),  # Visit date
        }
        website_data.append(visit)


# Previous results of marketing campaings of comapany we will try to improve this usig data Analysis 

In [5]:
# Generate sample interactions with marketing campaigns
campaigns = ['Email', 'SMS', 'Social Media', 'Direct Mail']
interaction_data = []
for customer_id in range(1, num_customers + 1):
    num_interactions = random.randint(0, 3)
    for _ in range(num_interactions):
        interaction = {
            'CustomerID': customer_id,                                      # Customer ID
            'Campaign': random.choice(campaigns),                           # Marketing campaign
            'InteractionDate': fake.date_time_between(start_date='-1y', end_date='now', tzinfo=None),  # Interaction date
            'Outcome': random.choice(['Clicked', 'Opened', 'Converted', 'Unsubscribed']),  # Interaction outcome
        }
        interaction_data.append(interaction)

In [6]:
# Update postal codes based on city
for customer in customer_data:
    if customer['City'] == 'Pune':
        customer['ZipCode'] = '411021'
    elif customer['City'] == 'Mumbai':
        customer['ZipCode'] = random.choice(['40215', '45589', '4065'])
    elif customer['City'] == 'Aurangabad':
        customer['ZipCode'] = '40080'
    elif customer['City'] == 'Nashik':
        customer['ZipCode'] = '45021'
    elif customer['City'] == 'Nagpur':
        customer['ZipCode'] = '40001'


In [7]:
# Create DataFrames from the generated data
customers_df = pd.DataFrame(customer_data).set_index('CustomerID')
purchases_df = pd.DataFrame(purchase_data)
website_df = pd.DataFrame(website_data)
interactions_df = pd.DataFrame(interaction_data)


In [8]:
# Reset index
customers_df.reset_index(inplace=True)

# Save each DataFrame to separate CSV files
customers_df.to_csv('customer_data.csv', index=False)
purchases_df.to_csv('purchase_history.csv', index=False)
website_df.to_csv('website_browsing_behavior.csv', index=False)
interactions_df.to_csv('marketing_campaigns.csv', index=False)

In [9]:
# Merge all DataFrames
merged_df = pd.concat([customers_df, purchases_df, website_df, interactions_df], axis=1)

# Save merged DataFrame to CSV file
merged_df.to_csv('retail_data.csv', index=False)

In [10]:

# Display sample data
print("Sample Merged Data:")
print(merged_df.head())


Sample Merged Data:
  CustomerID FirstName LastName                    Email   PhoneNumber  \
0        1.0       Eva    Varma    sahnivedika@gmail.com  3.884212e+09   
1        2.0     Zaina    Koshy  indrajitdalal@gmail.com  7.080247e+09   
2        3.0     Mehul     Gour    vritikakeer@gmail.net  5.619663e+09   
3        4.0      Rhea   Sachar     bvarughese@gmail.org  3.303367e+09   
4        5.0  Neelofar    Baral   maharajumang@gmail.org  3.761430e+09   

                 Address    City        State ZipCode Country  ...   Price  \
0      14, Gulati Circle  Nagpur  Maharashtra   40001   India  ...  212.16   
1  H.No. 50, Dora Circle  Nagpur  Maharashtra   40001   India  ...  672.93   
2   H.No. 773, Kar Chowk  Nagpur  Maharashtra   40001   India  ...  549.29   
3          220\nJha Path  Nagpur  Maharashtra   40001   India  ...  947.58   
4   62/57\nChauhan Chowk  Mumbai  Maharashtra    4065   India  ...  354.49   

  Quantity        PurchaseDate CustomerID  PageVisited           V