**Import libraries**


In [None]:
import pandas as pd
import re
import numpy as np


**Import dataset**

In [None]:
db=pd.read_csv('data.csv')

**Data Transformation**

In [None]:
db.head()

Unnamed: 0,\tEmail,Address,Avatar,Time on App,Time on Website,Length of Membership,Yearly Amount Spent
0,mstephenson@fernandez.com,"835 Frank TunnelWrightmouth, MI 82180-9605",Violet,12.66,39.58,4.08,587.95
1,hduke@hotmail.com,"4547 Archer CommonDiazchester, CA 06566-8576",DarkGreen,11.11,37.27,2.66,392.2
2,pallen@yahoo.com,"24645 Valerie Unions Suite 582Cobbborough, DC ...",Bisque,11.33,37.11,4.1,487.55
3,riverarebecca@gmail.com,"1414 David ThroughwayPort Jason, OH 22070-1220",SaddleBrown,13.72,36.72,3.12,581.85
4,mstephens@davidson-herman.com,"14023 Rodriguez PassagePort Jacobville, PR 372...",MediumAquaMarine,12.8,37.54,4.45,599.41


In [None]:
db.isnull().sum()

Unnamed: 0,0
\tEmail,0
Address,0
Avatar,0
Time on App,0
Time on Website,0
Length of Membership,0
Yearly Amount Spent,0


In [None]:
db.dtypes

Unnamed: 0,0
\tEmail,object
Address,object
Avatar,object
Time on App,float64
Time on Website,float64
Length of Membership,float64
Yearly Amount Spent,float64


**Breaking address in 4 segments**

In [None]:
military_keywords = ['APO', 'FPO', 'DPO', 'PSC', 'Unit', 'USNS']
standard_pattern = re.compile(r",\s*([A-Z]{2})\s+(\d{5}(?:-\d{4})?)")

# Parse function
def parse_address(addr):
    is_military = any(keyword in addr for keyword in military_keywords)
    addr_type = "Military" if is_military else "Standard"

    # Extract state and zip
    match = standard_pattern.search(addr)
    state = match.group(1) if match else None
    zip_code = match.group(2) if match else None

    # Extract city if standard address
    city_match = re.search(r"([A-Za-z\s]+),\s*[A-Z]{2}\s+\d{5}", addr)
    city = city_match.group(1).strip() if city_match else None

    return pd.Series([city, state, zip_code, addr_type])

# Apply function
db[['City', 'State', 'ZIP Code', 'Address Type']] = db['Address'].apply(parse_address)

# Show output
print(db.head())

                         \tEmail  \
0      mstephenson@fernandez.com   
1              hduke@hotmail.com   
2               pallen@yahoo.com   
3        riverarebecca@gmail.com   
4  mstephens@davidson-herman.com   

                                             Address            Avatar  \
0         835 Frank TunnelWrightmouth, MI 82180-9605            Violet   
1       4547 Archer CommonDiazchester, CA 06566-8576         DarkGreen   
2  24645 Valerie Unions Suite 582Cobbborough, DC ...            Bisque   
3     1414 David ThroughwayPort Jason, OH 22070-1220       SaddleBrown   
4  14023 Rodriguez PassagePort Jacobville, PR 372...  MediumAquaMarine   

   Time on App  Time on Website  Length of Membership  Yearly Amount Spent  \
0        12.66            39.58                  4.08               587.95   
1        11.11            37.27                  2.66               392.20   
2        11.33            37.11                  4.10               487.55   
3        13.72            

In [None]:
db[['Street Address', 'City', 'State', 'ZIP Code']] = db['Address'].apply(split_address)

In [None]:
db.head(10)

Unnamed: 0,\tEmail,Address,Avatar,Time on App,Time on Website,Length of Membership,Yearly Amount Spent,City,State,ZIP Code,Address Type
0,mstephenson@fernandez.com,"835 Frank TunnelWrightmouth, MI 82180-9605",Violet,12.66,39.58,4.08,587.95,Frank TunnelWrightmouth,MI,82180-9605,Standard
1,hduke@hotmail.com,"4547 Archer CommonDiazchester, CA 06566-8576",DarkGreen,11.11,37.27,2.66,392.2,Archer CommonDiazchester,CA,06566-8576,Standard
2,pallen@yahoo.com,"24645 Valerie Unions Suite 582Cobbborough, DC ...",Bisque,11.33,37.11,4.1,487.55,Cobbborough,DC,99414-7564,Standard
3,riverarebecca@gmail.com,"1414 David ThroughwayPort Jason, OH 22070-1220",SaddleBrown,13.72,36.72,3.12,581.85,David ThroughwayPort Jason,OH,22070-1220,Standard
4,mstephens@davidson-herman.com,"14023 Rodriguez PassagePort Jacobville, PR 372...",MediumAquaMarine,12.8,37.54,4.45,599.41,Rodriguez PassagePort Jacobville,PR,37242-1057,Standard
5,alvareznancy@lucas.biz,"645 Martha Park Apt. 611Jeffreychester, MN 672...",FloralWhite,12.03,34.48,5.49,637.1,Jeffreychester,MN,67218-7250,Standard
6,katherine20@yahoo.com,"68388 Reyes Lights Suite 692Josephbury, WV 922...",DarkSlateBlue,11.37,36.68,4.69,521.57,Josephbury,WV,92213-0247,Standard
7,awatkins@yahoo.com,Unit 6538 Box 8980DPO AP 09026-4941,Aqua,12.35,37.37,4.43,549.9,,,,Military
8,vchurch@walter-martinez.com,"860 Lee KeyWest Debra, SD 97450-0495",Salmon,13.39,37.53,3.27,570.2,Lee KeyWest Debra,SD,97450-0495,Standard
9,bonnie69@lin.biz,"PSC 2734, Box 5255APO AA 98456-7482",Brown,11.81,37.15,3.2,427.2,,,,Military


In [None]:
db.head(10)

Unnamed: 0,\tEmail,Address,Avatar,Time on App,Time on Website,Length of Membership,Yearly Amount Spent,Address Type,Street Address,City,State,ZIP Code
0,mstephenson@fernandez.com,"835 Frank TunnelWrightmouth, MI 82180-9605",Violet,12.66,39.58,4.08,587.95,Standard Address,835 Frank,TunnelWrightmouth,MI,82180-9605
1,hduke@hotmail.com,"4547 Archer CommonDiazchester, CA 06566-8576",DarkGreen,11.11,37.27,2.66,392.2,Standard Address,4547 Archer,CommonDiazchester,CA,06566-8576
2,pallen@yahoo.com,"24645 Valerie Unions Suite 582Cobbborough, DC ...",Bisque,11.33,37.11,4.1,487.55,Standard Address,24645 Valerie Unions Suite,582Cobbborough,DC,99414-7564
3,riverarebecca@gmail.com,"1414 David ThroughwayPort Jason, OH 22070-1220",SaddleBrown,13.72,36.72,3.12,581.85,Standard Address,1414 David ThroughwayPort,Jason,OH,22070-1220
4,mstephens@davidson-herman.com,"14023 Rodriguez PassagePort Jacobville, PR 372...",MediumAquaMarine,12.8,37.54,4.45,599.41,Standard Address,14023 Rodriguez PassagePort,Jacobville,PR,37242-1057
5,alvareznancy@lucas.biz,"645 Martha Park Apt. 611Jeffreychester, MN 672...",FloralWhite,12.03,34.48,5.49,637.1,Unknown Format,,,,
6,katherine20@yahoo.com,"68388 Reyes Lights Suite 692Josephbury, WV 922...",DarkSlateBlue,11.37,36.68,4.69,521.57,Standard Address,68388 Reyes Lights Suite,692Josephbury,WV,92213-0247
7,awatkins@yahoo.com,Unit 6538 Box 8980DPO AP 09026-4941,Aqua,12.35,37.37,4.43,549.9,Military/Diplomatic,Unit 6538 Box 8980,,AP,09026-4941
8,vchurch@walter-martinez.com,"860 Lee KeyWest Debra, SD 97450-0495",Salmon,13.39,37.53,3.27,570.2,Standard Address,860 Lee KeyWest,Debra,SD,97450-0495
9,bonnie69@lin.biz,"PSC 2734, Box 5255APO AA 98456-7482",Brown,11.81,37.15,3.2,427.2,Military/Diplomatic,"PSC 2734, Box 5255",,AA,98456-7482


In [None]:
db['avgtime']= (db['Time on App']+db['Time on Website'])/2

In [None]:
db

Unnamed: 0,\tEmail,Address,Avatar,Time on App,Time on Website,Length of Membership,Yearly Amount Spent,Address Type,Street Address,City,State,ZIP Code,avgtime
0,mstephenson@fernandez.com,"835 Frank TunnelWrightmouth, MI 82180-9605",Violet,12.66,39.58,4.08,587.95,Standard Address,835 Frank,TunnelWrightmouth,MI,82180-9605,26.120
1,hduke@hotmail.com,"4547 Archer CommonDiazchester, CA 06566-8576",DarkGreen,11.11,37.27,2.66,392.20,Standard Address,4547 Archer,CommonDiazchester,CA,06566-8576,24.190
2,pallen@yahoo.com,"24645 Valerie Unions Suite 582Cobbborough, DC ...",Bisque,11.33,37.11,4.10,487.55,Standard Address,24645 Valerie Unions Suite,582Cobbborough,DC,99414-7564,24.220
3,riverarebecca@gmail.com,"1414 David ThroughwayPort Jason, OH 22070-1220",SaddleBrown,13.72,36.72,3.12,581.85,Standard Address,1414 David ThroughwayPort,Jason,OH,22070-1220,25.220
4,mstephens@davidson-herman.com,"14023 Rodriguez PassagePort Jacobville, PR 372...",MediumAquaMarine,12.80,37.54,4.45,599.41,Standard Address,14023 Rodriguez PassagePort,Jacobville,PR,37242-1057,25.170
...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,lewisjessica@craig-evans.com,"4483 Jones Motorway Suite 872Lake Jamiefurt, U...",Tan,13.57,36.42,3.75,573.85,Standard Address,4483 Jones Motorway Suite 872Lake,Jamiefurt,UT,75292,24.995
496,katrina56@gmail.com,"172 Owen Divide Suite 497West Richard, CA 19320",PaleVioletRed,11.70,37.19,3.58,529.05,Standard Address,172 Owen Divide Suite 497West,Richard,CA,19320,24.445
497,dale88@hotmail.com,"0787 Andrews Ranch Apt. 633South Chadburgh, TN...",Cornsilk,11.50,38.33,4.96,551.62,Unknown Format,,,,,24.915
498,cwilson@hotmail.com,"680 Jennifer Lodge Apt. 808Brendachester, TX 0...",Teal,12.39,36.84,2.34,456.47,Unknown Format,,,,,24.615


In [None]:
db['Estimated total spending']=db['Length of Membership']*db['Yearly Amount Spent']

In [None]:

db.head(10)

Unnamed: 0,\tEmail,Address,Avatar,Time on App,Time on Website,Length of Membership,Yearly Amount Spent,City,State,ZIP Code,Address Type,avgtime,Estimated total spending
0,mstephenson@fernandez.com,"835 Frank TunnelWrightmouth, MI 82180-9605",Violet,12.66,39.58,4.08,587.95,Frank TunnelWrightmouth,MI,82180-9605,Standard,26.12,2398.836
1,hduke@hotmail.com,"4547 Archer CommonDiazchester, CA 06566-8576",DarkGreen,11.11,37.27,2.66,392.2,Archer CommonDiazchester,CA,06566-8576,Standard,24.19,1043.252
2,pallen@yahoo.com,"24645 Valerie Unions Suite 582Cobbborough, DC ...",Bisque,11.33,37.11,4.1,487.55,Cobbborough,DC,99414-7564,Standard,24.22,1998.955
3,riverarebecca@gmail.com,"1414 David ThroughwayPort Jason, OH 22070-1220",SaddleBrown,13.72,36.72,3.12,581.85,David ThroughwayPort Jason,OH,22070-1220,Standard,25.22,1815.372
4,mstephens@davidson-herman.com,"14023 Rodriguez PassagePort Jacobville, PR 372...",MediumAquaMarine,12.8,37.54,4.45,599.41,Rodriguez PassagePort Jacobville,PR,37242-1057,Standard,25.17,2667.3745
5,alvareznancy@lucas.biz,"645 Martha Park Apt. 611Jeffreychester, MN 672...",FloralWhite,12.03,34.48,5.49,637.1,Jeffreychester,MN,67218-7250,Standard,23.255,3497.679
6,katherine20@yahoo.com,"68388 Reyes Lights Suite 692Josephbury, WV 922...",DarkSlateBlue,11.37,36.68,4.69,521.57,Josephbury,WV,92213-0247,Standard,24.025,2446.1633
7,awatkins@yahoo.com,Unit 6538 Box 8980DPO AP 09026-4941,Aqua,12.35,37.37,4.43,549.9,,,,Military,24.86,2436.057
8,vchurch@walter-martinez.com,"860 Lee KeyWest Debra, SD 97450-0495",Salmon,13.39,37.53,3.27,570.2,Lee KeyWest Debra,SD,97450-0495,Standard,25.46,1864.554
9,bonnie69@lin.biz,"PSC 2734, Box 5255APO AA 98456-7482",Brown,11.81,37.15,3.2,427.2,,,,Military,24.48,1367.04


In [None]:
db.to_csv("Ecommerce_data_week2_final.csv", index= False)
from google.colab import files
files.download('Ecommerce_data_week2_final.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>