In [1]:
import pandas as pd
import random
import string
from faker import Faker
from datetime import datetime, timedelta, date


##### Generating fake customer data

In [2]:
fake = Faker()

def generate_customer_data(num_records, countries=None):
    if countries is None:
        countries = {'en_US': 'United States'}
    
    data = []
    for _ in range(num_records):
        customer_id= fake.unique.random_number(digits=6,fix_len=True)
        birth_date = fake.date_of_birth(minimum_age=18, maximum_age=90).strftime('%Y-%m-%d')
        cusip = fake.random_number(digits=9, fix_len=True)
        duns = fake.random_number(digits=9, fix_len=True)
        ftn = '-'.join([''.join(random.choices(string.digits, k=3)) for _ in range(3)])
        name_first = fake.first_name()
        name_last = fake.last_name()
        name_middle = fake.first_name()
        name_prefix = fake.prefix()
        name_suffix = fake.suffix()
        gender = random.choice(['male', 'female'])
        name_type = random.choice(['corporate', 'individual'])
        ticker = fake.random_element(['AAPL', 'GOOGL', 'MSFT', 'AMZN', 'FB']) if random.choice([True, False]) else None
        email = fake.email()
        employer = fake.company()
        fax = fake.phone_number() if name_type == 'corporate' else None
        phone = fake.phone_number()
        role_id = random.randint(1, 10)  # Assuming role_id is an integer between 1 and 10
        flag_id = random.randint(1, 5)   # Assuming flag_id is an integer between 1 and 5

        locale = random.choice(list(countries.keys()))
        fake.locale = locale
        street_address = fake.street_address()
        city = fake.city()
        state = fake.state_abbr()
        country = countries[locale]
        zip_code = fake.zipcode()
        zip_ext = fake.zipcode_plus4()

        data.append([
           customer_id, birth_date, cusip, duns, ftn, name_first,
            f"{name_first} {name_middle} {name_last}",
            fake.random_element(elements=['Sr.', 'Jr.', 'III', 'IV', 'V']),
            name_last, name_middle, name_prefix, name_suffix, gender, name_type,
            ticker, email, employer, fax, phone,
            role_id, flag_id,
            street_address, city, state, country, zip_code, zip_ext
        ])

    columns = ['customer_id','birth_date', 'cusip', 'duns', 'ftn', 'name_first',
               'full_name', 'name_suffix', 'name_last', 'name_middle', 'name_prefix', 'name_suffix', 'gender',
               'name_type', 'ticker', 'email', 'employer', 'fax', 'phone', 'role_id', 'flag_id',
               'street_address', 'city', 'state', 'country', 'zip_code', 'zip_ext']

    return pd.DataFrame(data, columns=columns)



##### Genearting fake account data 

In [3]:
def generate_account_data(num_records):


    data = []
    for _ in range(num_records):
        account_number = fake.unique.random_number(digits=20, fix_len=True)
        bank_id = fake.unique.random_number(digits=6, fix_len=True)
        d_string = '01-01-2024'
        end_dt = pd.to_datetime(d_string)
        account_opened = fake.date_between(start_date='-25y', end_date= end_dt)
        account_status = random.choice(['Active', 'Inactive', 'Purge'])

  

        account_type = fake.random_element(elements=['Savings Account', 'Checking Account', 'Investment Account'])
        account_purpose = fake.sentence(nb_words=6, variable_nb_words=True)
        account_ownership = fake.random_element(elements=['Individual', 'Joint', 'Business Entity'])
        additional_designations = fake.sentence(nb_words=8, variable_nb_words=True)
        if account_ownership == 'Business Entity':
            account_holder_name = fake.company()
        else:
            account_holder_name = fake.name()

        if account_ownership == 'Business Entity':
            certification_status = fake.random_element(elements=['Certified', 'Not Certified', 'Pending'])
            regulatory_certification_status = fake.random_element(elements=['Certified', 'Not Certified', 'Pending'])
            package_id = fake.unique.random_number(digits=5, fix_len=True)
            package_description = fake.catch_phrase()
            business_classification_code = fake.unique.random_number(digits=5, fix_len=True)
            business_classification_description = fake.catch_phrase()
        else:
            certification_status = regulatory_certification_status = package_id = package_description = \
                business_classification_code = business_classification_description = None

        tax_number = fake.random_number(digits=9, fix_len=True)
        tax_type = random.choice(['SSN', 'EIN'])
        risk_id = random.randint(1, 10)  # Assuming risk_id is an integer between 1 and 10

        start_date = account_opened + timedelta(days=30)
        
        if random.choice([True, False]):
            account_close_date = fake.date_between(start_date=start_date, end_date=date.today())
        else:
            account_close_date = None

        data.append([
            account_number, bank_id, account_opened, account_status,
            account_holder_name, account_type, account_purpose,
            account_ownership, additional_designations,
            certification_status, regulatory_certification_status,
            package_id, package_description, tax_number, tax_type, risk_id,
            account_close_date, business_classification_code,
            business_classification_description
        ])

    columns = ['account_number', 'bank_id', 'account_opened', 'account_status',
               'account_holder_name', 'account_type', 'account_purpose',
               'account_ownership', 'additional_designations',
               'certification_status', 'regulatory_certification_status',
               'package_id', 'package_description', 'tax_number', 'tax_type', 'risk_id',
               'account_close_date', 'business_classification_code',
               'business_classification_description']

    return pd.DataFrame(data, columns=columns)

##### Generating fake addresses and branch code and assigning to random accounts

In [4]:
def generate_address_data(num_records, countries=None):
    fake = Faker()
    if countries is None:
        countries = {'en_US': 'United States'}

    street_address = []
    city = []
    state = []
    country = []
    zip_code = []
    zip_ext = []
    branch = []

    for _ in range(num_records):
        locale = random.choice(list(countries.keys()))
        fake.locale = locale

        branch.append(fake.random_number(digits=4, fix_len=True))
        street_address.append(fake.street_address())
        city.append(fake.city())
        state.append(fake.state_abbr())
        country.append(countries[locale])
        zip_code.append(fake.zipcode())
        zip_ext.append(fake.zipcode_plus4())

    address_data = {
        'branch_id': branch,
        'street_address': street_address,
        'city': city,
        'state': state,
        'country': country,
        'zip_code': zip_code,
        'zip_ext': zip_ext
    }

    return pd.DataFrame(address_data)

# If you want to generate addresses with different countries:
# address_df = generate_address_data(1000, countries={'en_US': 'United States', 'en_GB': 'United Kingdom'})


In [5]:
address_data=generate_address_data(10)
branch_address_data=pd.DataFrame(columns=address_data.columns)
for i in range(1000):
    branch_address_data=pd.concat([branch_address_data,address_data.sample(2)])

In [6]:
branch_address_data.reset_index(drop=True,inplace=True)

In [7]:
customer_data=generate_customer_data(1000)

In [8]:
account_data=generate_account_data(2000)
account_data=pd.concat([account_data,branch_address_data],axis=1)
account_data['customer_id'] = [random.choice(customer_data['customer_id']) for i in range(len(account_data))]

In [10]:
product_data=pd.read_csv('Data/product.csv')  # import  product.csv 

products=pd.DataFrame({'account_number':random.sample(list(account_data.account_number),k=2000),
                    'product_id':random.choices(product_data.product_id, k=2000),
                      })
print(products)


products=account_data.merge(products,on='account_number',how='inner')
products=products[['customer_id','product_id','account_number']]
products['customer_id']=products['customer_id']
products.drop_duplicates(inplace=True)

            account_number  product_id
0     58846836860287965957         562
1     93387993470494894639         398
2     13698803464290973746         305
3     15481214287709978357         196
4     56815153848089907912         281
...                    ...         ...
1995  29780028465301006079         339
1996  44977594541331075006         171
1997  22494283812647262284         839
1998  69334470575789567841         951
1999  45891376572578700868         996

[2000 rows x 2 columns]


In [11]:
product_data

Unnamed: 0.1,Unnamed: 0,product_id,name,category
0,0,867,Student Loans,Loan
1,1,971,"Virtual Wallet Checking Pro with Spend, Reserv...",Checking
2,2,535,Customer Service,Online & Mobile Banking
3,3,996,Investment Solutions,Investment and Wealth Planning
4,4,569,Tax-Efficient Investing,Investment and Wealth Planning
5,5,299,PNC Cash UnlimitedSM Visa Signature® Credit Card,Credit Card
6,6,767,Virtual Wallet® Checking Pro,Checking and Savings Account
7,7,36,TurboTax®,Online & Mobile Banking
8,8,15,Send Money to people you know and trust with Z...,Online & Mobile Banking
9,9,959,PNC High Yield Savings®,Savings Account


In [12]:
products

Unnamed: 0,customer_id,product_id,account_number
0,874852,520,85823615136333207510
1,571374,502,42097252895672518852
2,639532,281,82240279093306742028
3,739384,928,79281559701093455174
4,543594,861,39365005522298388958
...,...,...,...
1995,551796,636,13842690391550423401
1996,189905,774,37667561898367653093
1997,736986,305,80745703155202171593
1998,469501,298,38501132325571998048


In [14]:
#saving all files

products.to_csv('Data_new/product_customer.csv',index=False)

account_data.to_csv('Data_new/account.csv',index_label=False,index=False)

customer_data.to_csv('Data_new/customer.csv',index=False)

##### Generating data for all other nodes with there connecting links

In [15]:
# generate_engagement():
engagement=   pd.DataFrame ({
            "engagement_id": [fake.unique.random_number(digits=4,fix_len=True) for i in range(len(product_data))],
            "timestamp": [fake.date_time_this_decade() for i in range(len(product_data))],
            "type_of_engagement": [random.choice(["Phone Call", "Branch Visit", "Online Interaction"]) for i in range(len(product_data))],
            "interaction_details": [fake.sentence() for i in range(len(product_data))],
            "outcome": [random.choice(["Resolved", "Unresolved", "Transaction Completed"]) for i in range(len(product_data))],
            "associated_product": [product_data.iloc[i,1] for i in range(len(product_data))],
            "customer_id" : [product_data.iloc[i,0] for i in range(len(product_data))]
        })


engagement.to_csv("Data_new/engagement.csv", index=False)

In [16]:
df=engagement[engagement['outcome']=='Unresolved']
df.reset_index(drop=True,inplace=True)
next_engagement=   pd.DataFrame ({
            "engagement_id": [fake.unique.random_number(digits=4,fix_len=True) for i in range(len(df))],
            "timestamp": [fake.date_time_this_decade() for i in range(len(df))],
            "type_of_engagement": [random.choice(["Phone Call", "Branch Visit", "Online Interaction"]) for i in range(len(df))],
            "interaction_details": [fake.sentence() for i in range(len(df))],
            "outcome": [random.choice(["Resolved", "Unresolved", "Transaction Completed"]) for i in range(len(df))],
            "associated_product": [df.loc[i,'associated_product'] for i in range(len(df))],
            "previous_engagement":[df.loc[i,'engagement_id'] for i in range(len(df))]
        })  


next_engagement.to_csv("Data_new/next_engagement.csv", index=False)

In [17]:
all_engagement=pd.concat([engagement,next_engagement],join='inner',ignore_index=True)
all_engagement.to_csv('Data_new/all_engagement.csv',index=False)

In [18]:
all_engagement

Unnamed: 0,engagement_id,timestamp,type_of_engagement,interaction_details,outcome,associated_product
0,1529,2023-12-19 03:47:11,Online Interaction,Fight show purpose indicate begin shake act.,Unresolved,867
1,3316,2022-04-20 12:33:23,Online Interaction,Remember serious remember learn though.,Resolved,971
2,3902,2022-07-29 02:09:45,Branch Visit,Side method guy feeling safe.,Resolved,535
3,9300,2023-03-27 16:11:59,Online Interaction,Us mouth general this value national increase.,Transaction Completed,996
4,5174,2022-08-08 00:22:54,Phone Call,Training fact way different.,Resolved,569
...,...,...,...,...,...,...
64,5266,2020-03-01 23:06:22,Branch Visit,Win soldier first read bank.,Resolved,784
65,5003,2022-06-30 02:11:23,Phone Call,Chair cause across kind region might above.,Unresolved,928
66,5720,2021-11-17 23:43:41,Branch Visit,Professional move put instead affect end law.,Unresolved,398
67,5532,2023-03-08 17:45:01,Phone Call,Social future partner town report feeling myself.,Resolved,168


In [19]:
Channel={"Phone":1, "Email":2, "In-person":3,"Banner":4,"Social-Media":5}

#impression

df=pd.read_csv('Data/product.csv')
impression=pd.DataFrame({
        "impression_id": [fake.unique.random_number(digits=3,fix_len=True) for i in range(52)],
        "timestamp": [fake.date_time_this_decade() for i in range(52)],
        "channel": [random.choice(["Website", "Email", "Social Media"]) for i in range(52)],
        "content": [fake.sentence() for i in range(52)],
        "target_audience": [fake.word() for i in range(52)],
        "response": [random.choice(["Positive", "Negative", "Neutral"]) for i in range(52)],
        "product_id":df.product_id,
        "channel": [random.choice(list(Channel.keys())) for i in range(52)]
})  

impression['channel_id']=[Channel[key] for key in impression['channel']]
impression.to_csv('Data_new/impression.csv',index=False)

In [20]:
impression

Unnamed: 0,impression_id,timestamp,channel,content,target_audience,response,product_id,channel_id
0,786,2020-03-12 07:07:16,Phone,Available address activity evidence hotel.,leave,Positive,867,1
1,674,2022-06-26 05:33:06,In-person,No go range piece.,fill,Neutral,971,3
2,963,2020-07-02 02:21:56,Social-Media,Western affect help city be since catch.,interview,Positive,535,5
3,435,2020-03-05 19:54:19,Phone,Able positive could country piece.,quickly,Positive,996,1
4,350,2021-06-24 21:28:57,Email,Evening strong deal.,wall,Neutral,569,2
5,386,2023-08-14 05:45:38,Email,Help federal activity.,class,Negative,299,2
6,978,2021-07-31 12:40:49,Phone,Leader address character.,go,Positive,767,1
7,714,2023-08-16 01:40:00,Email,Building owner thank.,American,Positive,36,2
8,262,2020-02-20 02:56:42,Email,A rest forward program important do or age.,meet,Negative,15,2
9,519,2021-05-09 06:37:39,Banner,Skin spend minute gas.,contain,Negative,959,4


In [21]:
product_data=products.merge(impression,on='product_id',how='inner')
product_data=product_data[['customer_id','product_id','account_number','impression_id']]

In [22]:
product_data

Unnamed: 0,customer_id,product_id,account_number,impression_id
0,874852,520,85823615136333207510,973
1,940764,520,81530773566307359926,973
2,209443,520,97545544052308402351,973
3,885532,520,81739716818947702597,973
4,637752,520,81580561655080514114,973
...,...,...,...,...
1995,489580,162,84799734366765914183,815
1996,620876,162,44194163839245895298,815
1997,794248,162,47560543676669014920,815
1998,177390,162,99045454581305522136,815


In [23]:
#Responce
responce=pd.DataFrame({"response_id": [fake.unique.random_number(digits=4,fix_len=True) for i in range(500)] ,
        "timestamp": [fake.date_time_this_decade() for i in range(500)],
        "response_type": [random.choice(["Positive", "Negative", "Neutral"]) for i in range(500)],
        "content": [fake.sentence() for i in range(500) ]} )
responce['abandoned'] = ["Y" if response_type != 'Positive' else "N" for response_type in responce['response_type']]
responce['impression_id']=[random.choice(list(impression['impression_id'])) for I in range(500)]

responce.to_csv('Data_new/responce.csv',index=False)

In [24]:
#interaction
interaction = pd.DataFrame({
    "interaction_id": [fake.uuid4() for _ in range(2000)],
    "timestamp": [fake.date_time_this_decade() for _ in range(2000)],
    "type_of_interaction": [random.choice(["Phone Call", "Branch Visit", "Online Chat"]) for _ in range(2000)],
    "interaction_details": [fake.sentence() for _ in range(2000)],
    "outcome": [random.choice(["Inquiry", "Transaction", "Complaint"]) for _ in range(2000)],
    
})


interaction.to_csv('Data_new/interaction.csv',index=False)

In [25]:
responce

Unnamed: 0,response_id,timestamp,response_type,content,abandoned,impression_id
0,1725,2022-11-04 04:00:15,Positive,Where he before positive.,N,262
1,1892,2022-08-17 03:50:33,Neutral,Matter standard break resource least.,Y,861
2,4681,2022-03-14 11:06:44,Positive,Down way economic amount hard.,N,111
3,7109,2021-01-24 03:48:04,Neutral,Nice hot line stand save.,Y,775
4,3774,2022-06-20 23:55:19,Positive,Certain glass discuss pressure.,N,477
...,...,...,...,...,...,...
495,2650,2022-12-31 00:52:31,Positive,Scientist as support spend song someone science.,N,818
496,9263,2022-12-04 10:37:56,Negative,Owner professional free none theory company.,Y,973
497,3549,2022-09-21 09:48:14,Neutral,Decade base give most.,Y,707
498,8921,2021-10-10 15:49:44,Positive,Game art recent claim deep recognize sometimes.,N,983


In [26]:
interaction

Unnamed: 0,interaction_id,timestamp,type_of_interaction,interaction_details,outcome
0,ef0a95e6-55b6-4363-887e-3f8fe17af252,2024-02-04 04:27:51,Phone Call,Fact boy behavior produce wind skill loss.,Complaint
1,29d6119d-e08b-4373-875e-2b02309d82bc,2020-08-23 04:20:11,Online Chat,Two size old hit mean scientist.,Transaction
2,918e01e7-a3b2-434f-939b-1a8661be860e,2022-09-26 02:03:25,Phone Call,Physical middle right.,Transaction
3,abb348f6-4c79-44ba-ba7a-e02f829297b8,2021-07-19 09:17:15,Branch Visit,Appear do goal.,Complaint
4,e124da1e-ed00-43d8-94c7-0c449e4f0760,2023-11-05 09:41:51,Branch Visit,Value those pay property avoid born.,Inquiry
...,...,...,...,...,...
1995,f6d3b525-1833-4005-9ecf-36cd4025dc17,2021-04-28 08:40:22,Phone Call,Town impact true main.,Complaint
1996,6b87f4ad-919e-4791-ba90-5bafa719407a,2021-03-31 18:47:26,Branch Visit,Ever chance establish central style.,Transaction
1997,eed56db6-4a98-4f8c-8942-3d76f087869a,2020-02-19 23:02:38,Branch Visit,Usually Mr page more opportunity plan.,Inquiry
1998,e74aec93-8620-4643-a392-c952b83aa0a8,2020-12-30 05:26:56,Phone Call,Audience direction I.,Transaction


In [27]:
product_data['interaction_id']=interaction['interaction_id']    #linking iteractions to product and customer data
product_data.to_csv('Data_new/product_customer_impression.csv',index=False)  

In [28]:
product_data

Unnamed: 0,customer_id,product_id,account_number,impression_id,interaction_id
0,874852,520,85823615136333207510,973,ef0a95e6-55b6-4363-887e-3f8fe17af252
1,940764,520,81530773566307359926,973,29d6119d-e08b-4373-875e-2b02309d82bc
2,209443,520,97545544052308402351,973,918e01e7-a3b2-434f-939b-1a8661be860e
3,885532,520,81739716818947702597,973,abb348f6-4c79-44ba-ba7a-e02f829297b8
4,637752,520,81580561655080514114,973,e124da1e-ed00-43d8-94c7-0c449e4f0760
...,...,...,...,...,...
1995,489580,162,84799734366765914183,815,f6d3b525-1833-4005-9ecf-36cd4025dc17
1996,620876,162,44194163839245895298,815,6b87f4ad-919e-4791-ba90-5bafa719407a
1997,794248,162,47560543676669014920,815,eed56db6-4a98-4f8c-8942-3d76f087869a
1998,177390,162,99045454581305522136,815,e74aec93-8620-4643-a392-c952b83aa0a8


In [29]:
df = interaction[interaction['outcome'].isin(['Inquiry', 'Complaint'])]
df.reset_index(drop=True,inplace=True)
merged_data = pd.merge(product_data, df, on='interaction_id', how='inner')
interaction_level2 = pd.DataFrame({
    "interaction_id": [fake.uuid4() for _ in range(len(df))],
    "timestamp": [fake.date_time_this_decade()  for _ in range(len(df))],
    "type_of_interaction": [random.choice(["Phone Call", "Branch Visit", "Online Chat"])  for _ in range(len(df))],
    "interaction_details": [fake.sentence()  for _ in range(len(df))],
    "outcome": [random.choice(["Inquiry", "Transaction", "Complaint"])  for _ in range(len(df))],
    "customer_id": merged_data['customer_id'],
    "previous_interaction": merged_data['interaction_id']
})


interaction_level2.to_csv('Data_new/interaction_level2.csv',index=False)

In [30]:
interaction_level2

Unnamed: 0,interaction_id,timestamp,type_of_interaction,interaction_details,outcome,customer_id,previous_interaction
0,c7c25ca1-3334-4980-9385-51b1761af7a0,2023-03-20 20:25:49,Phone Call,Day total minute industry role individual.,Inquiry,874852,ef0a95e6-55b6-4363-887e-3f8fe17af252
1,793e0053-4f4c-4a7f-b875-492856a08300,2020-01-26 01:49:59,Online Chat,Father increase term once recently lay spring.,Complaint,885532,abb348f6-4c79-44ba-ba7a-e02f829297b8
2,fdbbf69d-9f01-4285-b5f7-b031b4bbdcd0,2020-04-19 18:59:44,Phone Call,Commercial seat recognize picture ahead.,Transaction,637752,e124da1e-ed00-43d8-94c7-0c449e4f0760
3,0987d6b2-b32e-418e-b6d2-03785929d393,2020-10-23 12:20:42,Online Chat,Mean past join suggest nature focus.,Inquiry,542987,24e50ed1-2777-4444-95a1-6a92985ed445
4,d29fc9ed-6a33-44df-8d39-3c0345573dd0,2024-01-10 15:02:59,Online Chat,Long dream amount church social.,Inquiry,984980,d01e933e-3da2-4f5d-b362-4a24081031bf
...,...,...,...,...,...,...,...
1310,adc0fdfe-8b31-476c-bc41-ddc951a009a2,2022-03-15 17:09:02,Phone Call,Standard more film while question control wish.,Inquiry,574567,7861d44c-e09e-453d-8a98-600c462ec24f
1311,4f061966-ef23-46da-90d3-33e13bac3fb1,2021-07-31 12:38:52,Online Chat,Join until window gas itself true control.,Transaction,972751,7bf6f1bc-8a51-44b2-a998-34a12e9dd530
1312,a94c405b-34bc-447d-bec0-538380c28302,2023-12-25 17:56:33,Online Chat,Quality have nothing too argue public.,Inquiry,494743,3ad4416c-d1b2-4796-87c4-65913ad57c39
1313,57e9891c-5434-4d5a-afb1-474d3f527343,2020-02-06 04:22:14,Online Chat,Actually over would general add.,Complaint,489580,f6d3b525-1833-4005-9ecf-36cd4025dc17


In [31]:
df=interaction_level2[interaction_level2['outcome']=='Inquiry']
df.reset_index(drop=True,inplace=True)
#merged_data = pd.merge(product_data, df, on='interaction_id', how='inner')
interaction_level3=pd.DataFrame({
    "interaction_id": [fake.uuid4() for _ in range(len(df))],
    "timestamp": [fake.date_time_this_decade()  for _ in range(len(df))],
    "type_of_interaction": [random.choice(["Phone Call", "Branch Visit", "Online Chat"])  for _ in range(len(df))],
    "interaction_details": [fake.sentence()  for _ in range(len(df))],
    "outcome": [random.choice(["Inquiry", "Transaction" ])  for _ in range(len(df))],
    "customer_id":[df.loc[i,'customer_id'] for i in range(len(df))],
     "previous_interaction":[df.loc[i,'interaction_id'] for i in range(len(df))]
})



interaction_level3.to_csv('Data_new/interaction_level3.csv',index=False)

In [32]:
interaction_level3

Unnamed: 0,interaction_id,timestamp,type_of_interaction,interaction_details,outcome,customer_id,previous_interaction
0,aca9aad1-56d5-4bd9-9bc5-ab7a8933798f,2021-09-10 07:36:35,Online Chat,While heavy want grow.,Inquiry,874852,c7c25ca1-3334-4980-9385-51b1761af7a0
1,6f951c25-acfe-4e69-a61c-7c8f4b902a8e,2021-07-19 02:57:24,Online Chat,West economic statement response.,Transaction,542987,0987d6b2-b32e-418e-b6d2-03785929d393
2,68c5434d-ad7e-4150-9eb5-3abb26ada6ed,2021-12-27 15:01:22,Phone Call,Site dark rock sea child must.,Transaction,984980,d29fc9ed-6a33-44df-8d39-3c0345573dd0
3,5705064a-4d65-4383-a084-00054b5d1267,2024-01-19 18:37:16,Branch Visit,Tonight meet cut guess reach lose just.,Inquiry,286396,b502a8af-a44d-452c-a752-d0c0e78f3ea9
4,6f0074ae-609e-4795-8a29-70b03f56b0b8,2021-02-01 07:45:00,Phone Call,Good general management coach concern.,Transaction,278171,4313966f-0d94-4ffb-863f-7e5647c2793c
...,...,...,...,...,...,...,...
436,7a1ad1e5-9c72-47a4-b587-74b912af1b1a,2024-03-21 14:24:37,Phone Call,Yet machine growth community door always.,Transaction,983646,af5504a8-3f45-4313-922a-b6422109a44e
437,9c0c9698-7f49-442b-9465-99e83b21332c,2023-03-30 06:22:26,Phone Call,Stock affect experience structure arrive.,Transaction,645513,5a74c2b0-6f84-4eed-8f2c-f7450e33b685
438,22fcffda-f683-44d5-bd00-bac1f8a4f304,2021-11-28 18:00:28,Phone Call,Look trade wonder recent information stand.,Transaction,147342,88bf06fa-1d09-49e3-8418-1c10a1cab480
439,cf55999f-759f-4b42-b427-9e214c5637b7,2022-04-29 13:54:43,Phone Call,Ground side suggest morning first yard.,Transaction,574567,adc0fdfe-8b31-476c-bc41-ddc951a009a2


In [33]:
df=interaction_level3[interaction_level3['outcome']=='Inquiry']
df.reset_index(drop=True,inplace=True)
#merged_data = pd.merge(product_data, df, on='interaction_id', how='inner')
interaction_level4=pd.DataFrame({
    "interaction_id": [fake.uuid4() for _ in range(len(df))],
    "timestamp": [fake.date_time_this_decade()  for _ in range(len(df))],
    "type_of_interaction": [random.choice(["Phone Call", "Branch Visit", "Online Chat"])  for _ in range(len(df))],
    "interaction_details": [fake.sentence()  for _ in range(len(df))],
    "outcome": [random.choice(["Transaction" ])  for _ in range(len(df))],
    "customer_id":[df.loc[i,'customer_id'] for i in range(len(df))],
     "previous_interaction":[df.loc[i,'interaction_id'] for i in range(len(df))],
    
})

interaction_level4.to_csv('Data_new/interaction_level4.csv',index=False)

In [34]:
interaction_level4

Unnamed: 0,interaction_id,timestamp,type_of_interaction,interaction_details,outcome,customer_id,previous_interaction
0,2a99d792-318b-4fcb-a484-15479ff054a7,2023-11-03 00:20:17,Branch Visit,Probably exactly tend dream night.,Transaction,874852,aca9aad1-56d5-4bd9-9bc5-ab7a8933798f
1,900b527a-eefa-46f5-b4a8-6aa3f63b43c8,2023-10-27 01:17:11,Branch Visit,Focus back seem garden already.,Transaction,286396,5705064a-4d65-4383-a084-00054b5d1267
2,63c9f7fc-8ccc-46cb-991f-a821e29ae68e,2023-11-22 09:06:53,Phone Call,Congress could within degree later pull.,Transaction,432126,907e4dc9-7554-4b4c-ba89-8b183d5d8df5
3,f8072b22-8aa9-48ef-9968-7a28405d665e,2020-03-06 15:19:11,Branch Visit,Over white part prove.,Transaction,900555,8ca8a253-f264-4bf6-b35a-0ae785a23d8e
4,d061d624-ac14-4ef5-a875-c1aea9ccd159,2020-07-18 19:10:45,Phone Call,Huge box opportunity help able.,Transaction,238440,f28be5a1-fa2d-45ee-bc5f-a25a5614c93b
...,...,...,...,...,...,...,...
208,d256a232-40c0-4794-80ed-74df30c5a0fb,2023-12-04 23:58:53,Online Chat,Debate north drop movie late wrong popular.,Transaction,556350,fafd892b-6f24-4b53-ac66-8f99c9dc174c
209,e404245e-34f7-41a4-a878-34d652ff2ae0,2021-04-21 22:20:44,Online Chat,Song situation old score.,Transaction,455953,18b4139a-b05f-47f4-a5fa-bb82652fc766
210,f7ae2681-b5b0-4418-af82-b008190e893c,2022-07-24 21:57:24,Online Chat,Keep then could maybe stock street important man.,Transaction,407384,d1b8b89e-c433-4f27-93b8-7a4fcb80f4bb
211,89d93a5d-8fb4-4443-924e-4ebbd5d9f448,2022-03-10 10:45:09,Online Chat,Lead girl film from stand.,Transaction,221877,b4a0b4a8-1887-4f4b-9ac7-af188a04b91f


In [35]:
all_interaction=pd.concat([interaction,interaction_level2,interaction_level3,interaction_level4],join='inner',ignore_index=True)
all_interaction.to_csv('Data_new/all_interaction.csv',index=False)

In [36]:
branch_call = pd.DataFrame({
    "call_id": [fake.uuid4() for _ in range(interaction_level2.shape[0])],
    "timestamp": [fake.date_time_this_decade() for _ in range(interaction_level2.shape[0])],
    "caller": [random.choice(["Customer", "Bank Representative"]) for _ in range(interaction_level2.shape[0])],
    "purpose": [fake.sentence() for _ in range(interaction_level2.shape[0])],
    "product_discussed": [fake.word() for _ in range(interaction_level2.shape[0])],
    "outcome": [random.choice(["Inquiry Handled", "Follow-up Required"]) for _ in range(interaction_level2.shape[0])]
})
merge_data=pd.merge(product_data,interaction_level2,how='inner',on='customer_id')
branch_call['product_id']=merge_data.product_id
branch_call['interaction_id']=interaction_level2.interaction_id
branch_call.to_csv('Data_new/branch_call.csv',index=False)

In [37]:
branch_call

Unnamed: 0,call_id,timestamp,caller,purpose,product_discussed,outcome,product_id,interaction_id
0,3d30d887-2bf1-4dc5-9288-38412e8f0fa0,2021-09-03 23:23:48,Bank Representative,Field consumer side skill magazine finally fear.,ahead,Follow-up Required,520,c7c25ca1-3334-4980-9385-51b1761af7a0
1,c8713304-1c29-482b-986a-714ec1f6485a,2022-10-07 07:52:19,Bank Representative,In issue edge general rather candidate.,car,Follow-up Required,520,793e0053-4f4c-4a7f-b875-492856a08300
2,b0165a3b-c6eb-4ed4-bf99-a35acdab4c28,2022-05-18 09:55:33,Bank Representative,Until usually fact involve.,front,Follow-up Required,520,fdbbf69d-9f01-4285-b5f7-b031b4bbdcd0
3,5d5d2499-697f-401e-92ae-79623e438f9f,2024-03-19 23:25:08,Bank Representative,Music must effect professor find just.,authority,Follow-up Required,605,0987d6b2-b32e-418e-b6d2-03785929d393
4,7a6590de-a477-47c0-a937-cae863cefb39,2020-07-06 14:24:00,Customer,Could too various none street chair president.,vote,Inquiry Handled,605,d29fc9ed-6a33-44df-8d39-3c0345573dd0
...,...,...,...,...,...,...,...,...
1310,da793d81-e78b-4ca3-a727-46b7904253c1,2022-01-07 01:13:32,Customer,Trouble clearly who along organization positiv...,most,Inquiry Handled,340,adc0fdfe-8b31-476c-bc41-ddc951a009a2
1311,1810fab1-7001-48e1-a6ca-ac08aa8faa0e,2022-12-22 14:32:11,Bank Representative,Else seat sense blood soon color western stay.,such,Inquiry Handled,340,4f061966-ef23-46da-90d3-33e13bac3fb1
1312,9768ab16-c56c-48c3-91c6-f9fc7eb4f110,2024-04-18 08:56:27,Bank Representative,Truth test manager page ever.,term,Follow-up Required,340,a94c405b-34bc-447d-bec0-538380c28302
1313,74ea27a4-b02b-46a0-a188-aaef0756dfa8,2022-02-08 11:01:16,Customer,Huge term check.,not,Follow-up Required,728,57e9891c-5434-4d5a-afb1-474d3f527343


In [38]:
call_response=pd.DataFrame({"call_response_id": [fake.unique.random_number(digits=5,fix_len=True) for i in range(len(branch_call))] ,
        "timestamp": [fake.date_time_this_decade() for i in range(len(branch_call))],
        "response_type": [random.choice(["Positive", "Negative", "Neutral"]) for i in  range(len(branch_call))],
        } )

call_response['call_id']=branch_call.call_id
call_response.to_csv('Data_new/call_response.csv',index=False)

In [39]:
call_response

Unnamed: 0,call_response_id,timestamp,response_type,call_id
0,54438,2022-01-09 14:48:39,Negative,3d30d887-2bf1-4dc5-9288-38412e8f0fa0
1,16861,2023-02-02 09:42:06,Neutral,c8713304-1c29-482b-986a-714ec1f6485a
2,77602,2023-10-01 00:03:21,Positive,b0165a3b-c6eb-4ed4-bf99-a35acdab4c28
3,35784,2020-04-27 19:13:54,Positive,5d5d2499-697f-401e-92ae-79623e438f9f
4,72608,2020-03-24 23:20:51,Positive,7a6590de-a477-47c0-a937-cae863cefb39
...,...,...,...,...
1310,49895,2022-10-24 00:04:06,Positive,da793d81-e78b-4ca3-a727-46b7904253c1
1311,59683,2020-12-18 15:57:50,Negative,1810fab1-7001-48e1-a6ca-ac08aa8faa0e
1312,77099,2021-05-22 05:21:30,Positive,9768ab16-c56c-48c3-91c6-f9fc7eb4f110
1313,31813,2020-11-11 02:00:31,Negative,74ea27a4-b02b-46a0-a188-aaef0756dfa8


In [40]:
application = pd.DataFrame({
    "application_id": [fake.unique.random_number(digits=5,fix_len=True) if response_type == 'Positive' else None for response_type in call_response['response_type']],
    "timestamp": [fake.date_time_this_decade() for _ in range(len(call_response['response_type']))],
    "application_type": [random.choice(["Loan Application", "Credit Card Application"]) for _ in range(len(call_response['response_type']))],
    "approval_id": [fake.uuid4() for _ in range(len(call_response['response_type']))],
    "approval_status": [random.choice(["Approved", "Rejected", "Pending"]) for _ in range(len(call_response['response_type']))]
})

application.dropna(inplace=True)
application['call_respose_id']=call_response.call_response_id
application.to_csv('Data_new/application.csv',index=False)

In [41]:
application

Unnamed: 0,application_id,timestamp,application_type,approval_id,approval_status,call_respose_id
2,79240.0,2021-10-23 08:44:02,Loan Application,e070981a-35e7-44ad-a035-045d6c5e8832,Approved,77602
3,39367.0,2021-04-03 16:55:06,Loan Application,7b3c36aa-21e2-49f1-9a20-1fac5ddc4ba1,Pending,35784
4,48858.0,2021-02-25 03:50:44,Loan Application,3d998c1d-e17a-437f-9d5a-7e4a9ec3af9b,Pending,72608
7,89221.0,2024-03-16 04:18:37,Loan Application,3caf669a-b57c-4e64-8aee-60165f97f21c,Pending,38233
12,34283.0,2023-04-19 23:13:24,Credit Card Application,4a60dc28-3866-4970-ad9f-dbc70c305b23,Pending,55633
...,...,...,...,...,...,...
1302,66537.0,2021-07-01 10:31:57,Credit Card Application,7cc7753c-ea0c-44f8-9349-523ee6bfa7bc,Rejected,34581
1308,22184.0,2024-04-03 19:48:32,Loan Application,1abe0da2-6e12-44d4-95f3-b9ea2f2b04fe,Pending,44669
1310,58548.0,2021-10-30 20:26:57,Credit Card Application,2deb36bf-2494-459c-8039-4a3cf3b4007a,Pending,49895
1312,99823.0,2022-06-30 16:13:44,Credit Card Application,78138334-8e2b-4685-b685-6854952254ed,Pending,77099


In [42]:
approved_applications = application[application['approval_status'] == 'Approved']
l = len(approved_applications)

book = pd.DataFrame({
    "booking_id": [fake.unique.random_number(digits=6, fix_len=True) if status == 'Approved' else None for status in approved_applications['approval_status']],
    "booked_product": [fake.word() for _ in range(l)],
    "booking_details": [fake.sentence() for _ in range(l)],
    "approval_id": approved_applications['approval_id']
})

book.to_csv('Data_new/book.csv',index=False)

In [43]:
book

Unnamed: 0,booking_id,booked_product,booking_details,approval_id
2,158660,many,Good deal nor.,e070981a-35e7-44ad-a035-045d6c5e8832
14,610271,might,Vote relationship reach step control.,2b268dbd-aa3f-478e-9782-608e0596335c
15,539327,party,Again each speech black number.,d357879f-3aa0-4309-abbe-4fb234330025
25,553355,none,Memory view language their day.,1c443815-667e-4067-895f-0bf3b969ce1b
27,197532,movie,Yourself lose keep big well.,2bedca5f-08f8-4b6b-b860-daca4d172fc6
...,...,...,...,...
1286,269107,difficult,His employee north wall.,564c6009-5593-4271-946a-55caef529900
1293,749050,seek,Decade nor true.,ebce4179-b1a2-4f71-80c5-6eab791e472b
1294,537775,sell,Usually serve magazine already total interview.,5695e572-73b7-4285-b50a-65b0b12286ba
1297,569442,carry,Expert key low.,e65eeb0f-d937-411a-b830-60140f2db619
