In [34]:
import pandas as pd
import numpy as np
from faker import Faker
from datetime import datetime, timedelta

fake = Faker("en_GB")

COUNTIES = [
    "Greater London", "West Midlands", "Greater Manchester", "Merseyside",
    "South Yorkshire", "West Yorkshire", "Tyne and Wear", "Hampshire",
    "Kent", "Essex", "Surrey", "Lancashire", "Devon", "Cornwall"
]

In [35]:
start_date = fake.date_time_between(start_date="-5y", end_date="now")
end_date = start_date + timedelta(days=np.random.randint(30, 1000))


data = []

for i in range(5):
    data.append({
                "address_id": i,
                "first_line": fake.street_address(),
                "second_line": fake.secondary_address() if np.random.rand() > 0.5 else np.nan,
                "city": fake.city(),
                "county": np.random.choice(COUNTIES),
                "post_code": fake.postcode(),
                "address_type_id": np.random.choice([1, 2]),
                "start_date": start_date,
                "end_date": end_date,
                "created_at": fake.date_time_between(start_date=start_date, end_date=end_date),
                "last_updated": datetime.now()
            })

df = pd.DataFrame(data)
df

Unnamed: 0,address_id,first_line,second_line,city,county,post_code,address_type_id,start_date,end_date,created_at,last_updated
0,0,Flat 61\nHowarth loop,Flat 32R,New Catherinechester,South Yorkshire,TA2 9TF,1,2022-03-17 09:12:34.674486,2024-05-23 09:12:34.674486,2022-05-10 11:28:09.855499,2025-08-25 19:17:03.488401
1,1,Flat 1\nDean locks,Flat 4,Lake Dean,West Midlands,S43 9NX,1,2022-03-17 09:12:34.674486,2024-05-23 09:12:34.674486,2022-07-10 19:27:39.309584,2025-08-25 19:17:03.488821
2,2,Studio 3\nJoyce coves,Flat 23,Lake Guy,Greater London,ZE8R 6DS,2,2022-03-17 09:12:34.674486,2024-05-23 09:12:34.674486,2022-04-10 22:59:58.786572,2025-08-25 19:17:03.489241
3,3,Studio 5\nLouise stream,Flat 85,Julianton,Surrey,PH4P 5LL,2,2022-03-17 09:12:34.674486,2024-05-23 09:12:34.674486,2023-08-29 15:09:14.378683,2025-08-25 19:17:03.489566
4,4,Studio 37\nRaymond plain,Studio 5,Jayborough,West Yorkshire,OX3Y 5GG,1,2022-03-17 09:12:34.674486,2024-05-23 09:12:34.674486,2023-06-07 10:04:20.435275,2025-08-25 19:17:03.489920


In [36]:
df["second_line"] = df["second_line"].fillna("")

df

Unnamed: 0,address_id,first_line,second_line,city,county,post_code,address_type_id,start_date,end_date,created_at,last_updated
0,0,Flat 61\nHowarth loop,Flat 32R,New Catherinechester,South Yorkshire,TA2 9TF,1,2022-03-17 09:12:34.674486,2024-05-23 09:12:34.674486,2022-05-10 11:28:09.855499,2025-08-25 19:17:03.488401
1,1,Flat 1\nDean locks,Flat 4,Lake Dean,West Midlands,S43 9NX,1,2022-03-17 09:12:34.674486,2024-05-23 09:12:34.674486,2022-07-10 19:27:39.309584,2025-08-25 19:17:03.488821
2,2,Studio 3\nJoyce coves,Flat 23,Lake Guy,Greater London,ZE8R 6DS,2,2022-03-17 09:12:34.674486,2024-05-23 09:12:34.674486,2022-04-10 22:59:58.786572,2025-08-25 19:17:03.489241
3,3,Studio 5\nLouise stream,Flat 85,Julianton,Surrey,PH4P 5LL,2,2022-03-17 09:12:34.674486,2024-05-23 09:12:34.674486,2023-08-29 15:09:14.378683,2025-08-25 19:17:03.489566
4,4,Studio 37\nRaymond plain,Studio 5,Jayborough,West Yorkshire,OX3Y 5GG,1,2022-03-17 09:12:34.674486,2024-05-23 09:12:34.674486,2023-06-07 10:04:20.435275,2025-08-25 19:17:03.489920


In [37]:
df_address_type = pd.DataFrame({
    "address_type_id": [1, 2] ,
    "address_type": ["Home", "Billing"]
})

In [38]:
df = df.merge(df_address_type, on="address_type_id", how="left")

df

Unnamed: 0,address_id,first_line,second_line,city,county,post_code,address_type_id,start_date,end_date,created_at,last_updated,address_type
0,0,Flat 61\nHowarth loop,Flat 32R,New Catherinechester,South Yorkshire,TA2 9TF,1,2022-03-17 09:12:34.674486,2024-05-23 09:12:34.674486,2022-05-10 11:28:09.855499,2025-08-25 19:17:03.488401,Home
1,1,Flat 1\nDean locks,Flat 4,Lake Dean,West Midlands,S43 9NX,1,2022-03-17 09:12:34.674486,2024-05-23 09:12:34.674486,2022-07-10 19:27:39.309584,2025-08-25 19:17:03.488821,Home
2,2,Studio 3\nJoyce coves,Flat 23,Lake Guy,Greater London,ZE8R 6DS,2,2022-03-17 09:12:34.674486,2024-05-23 09:12:34.674486,2022-04-10 22:59:58.786572,2025-08-25 19:17:03.489241,Billing
3,3,Studio 5\nLouise stream,Flat 85,Julianton,Surrey,PH4P 5LL,2,2022-03-17 09:12:34.674486,2024-05-23 09:12:34.674486,2023-08-29 15:09:14.378683,2025-08-25 19:17:03.489566,Billing
4,4,Studio 37\nRaymond plain,Studio 5,Jayborough,West Yorkshire,OX3Y 5GG,1,2022-03-17 09:12:34.674486,2024-05-23 09:12:34.674486,2023-06-07 10:04:20.435275,2025-08-25 19:17:03.489920,Home


In [39]:
df = df.drop("address_type_id", axis=1)

df

Unnamed: 0,address_id,first_line,second_line,city,county,post_code,start_date,end_date,created_at,last_updated,address_type
0,0,Flat 61\nHowarth loop,Flat 32R,New Catherinechester,South Yorkshire,TA2 9TF,2022-03-17 09:12:34.674486,2024-05-23 09:12:34.674486,2022-05-10 11:28:09.855499,2025-08-25 19:17:03.488401,Home
1,1,Flat 1\nDean locks,Flat 4,Lake Dean,West Midlands,S43 9NX,2022-03-17 09:12:34.674486,2024-05-23 09:12:34.674486,2022-07-10 19:27:39.309584,2025-08-25 19:17:03.488821,Home
2,2,Studio 3\nJoyce coves,Flat 23,Lake Guy,Greater London,ZE8R 6DS,2022-03-17 09:12:34.674486,2024-05-23 09:12:34.674486,2022-04-10 22:59:58.786572,2025-08-25 19:17:03.489241,Billing
3,3,Studio 5\nLouise stream,Flat 85,Julianton,Surrey,PH4P 5LL,2022-03-17 09:12:34.674486,2024-05-23 09:12:34.674486,2023-08-29 15:09:14.378683,2025-08-25 19:17:03.489566,Billing
4,4,Studio 37\nRaymond plain,Studio 5,Jayborough,West Yorkshire,OX3Y 5GG,2022-03-17 09:12:34.674486,2024-05-23 09:12:34.674486,2023-06-07 10:04:20.435275,2025-08-25 19:17:03.489920,Home


In [40]:
address = []
for inx, row in df.iterrows():
    parts = [row["first_line"]]

    if row["second_line"]:
        parts.append(row["second_line"])

    address.append(", ".join(parts))

df["full_address"] = address

df = df.drop("first_line", axis=1)
df = df.drop("second_line", axis=1)

df

    


Unnamed: 0,address_id,city,county,post_code,start_date,end_date,created_at,last_updated,address_type,full_address
0,0,New Catherinechester,South Yorkshire,TA2 9TF,2022-03-17 09:12:34.674486,2024-05-23 09:12:34.674486,2022-05-10 11:28:09.855499,2025-08-25 19:17:03.488401,Home,"Flat 61\nHowarth loop, Flat 32R"
1,1,Lake Dean,West Midlands,S43 9NX,2022-03-17 09:12:34.674486,2024-05-23 09:12:34.674486,2022-07-10 19:27:39.309584,2025-08-25 19:17:03.488821,Home,"Flat 1\nDean locks, Flat 4"
2,2,Lake Guy,Greater London,ZE8R 6DS,2022-03-17 09:12:34.674486,2024-05-23 09:12:34.674486,2022-04-10 22:59:58.786572,2025-08-25 19:17:03.489241,Billing,"Studio 3\nJoyce coves, Flat 23"
3,3,Julianton,Surrey,PH4P 5LL,2022-03-17 09:12:34.674486,2024-05-23 09:12:34.674486,2023-08-29 15:09:14.378683,2025-08-25 19:17:03.489566,Billing,"Studio 5\nLouise stream, Flat 85"
4,4,Jayborough,West Yorkshire,OX3Y 5GG,2022-03-17 09:12:34.674486,2024-05-23 09:12:34.674486,2023-06-07 10:04:20.435275,2025-08-25 19:17:03.489920,Home,"Studio 37\nRaymond plain, Studio 5"
