In [1]:
import csv
import json
from datetime import datetime 

In [2]:
filename = "acw_user_data.csv"

In [3]:
headers = []
rows = []
with open(filename,'r') as csvfile:
    reader = csv.reader(csvfile)
    
    headers = next(reader)
    
    for row in reader:
        rows.append(row)

In [4]:
headers

['Address Street',
 'Address City',
 'Address Postcode',
 'Age (Years)',
 'Distance Commuted to Work (Km)',
 'Employer Company',
 'Credit Card Start Date',
 'Credit Card Expiry Date',
 'Credit Card Number',
 'Credit Card CVV',
 'Dependants',
 'First Name',
 'Bank IBAN',
 'Last Name',
 'Marital Status',
 'Yearly Pension (Dollar)',
 'Retired',
 'Yearly Salary (Dollar)',
 'Sex',
 'Vehicle Make',
 'Vehicle Model',
 'Vehicle Year',
 'Vehicle Type']

In [5]:
data = []

for values in rows:
    row = dict(zip(headers, values))
    person = {
        "first_name": row["First Name"],
        "last_name": row["Last Name"],
        "age": int(row["Age (Years)"]),
        "sex": row["Sex"],
        "distance_commuted_to_work_km": float(row["Distance Commuted to Work (Km)"]),
        "employer_company": row["Employer Company"],
        "dependants": row["Dependants"],
        "marital_status": row["Marital Status"],
        "yearly_pension_dollar": int(row["Yearly Pension (Dollar)"]),
        "retired": row["Retired"].strip().lower() == "true",
        "yearly_salary_dollar": int(row["Yearly Salary (Dollar)"]),
        "vehicle": {
            "make": row["Vehicle Make"],
            "model": row["Vehicle Model"],
            "year": int(row["Vehicle Year"]),
            "type": row["Vehicle Type"]
        },
        "credit_card": {
            "start_date": row["Credit Card Start Date"],
            "expiry_date": row["Credit Card Expiry Date"],
            "number": row["Credit Card Number"],
            "cvv": int(row["Credit Card CVV"]),
            "iban": row["Bank IBAN"]
        },
        "address": {
            "street": row["Address Street"],
            "city": row["Address City"],
            "postcode": row["Address Postcode"]
        },
    }
    data.append(person)
# Print the first 5 entries to verify the data structure
for person in data[:5]:
    print(person)   

{'first_name': 'Kieran', 'last_name': 'Wilson', 'age': 89, 'sex': 'Male', 'distance_commuted_to_work_km': 0.0, 'employer_company': 'N/A', 'dependants': '3', 'marital_status': 'married or civil partner', 'yearly_pension_dollar': 7257, 'retired': True, 'yearly_salary_dollar': 72838, 'vehicle': {'make': 'Hyundai', 'model': 'Bonneville', 'year': 2009, 'type': 'Pickup'}, 'credit_card': {'start_date': '08/18', 'expiry_date': '11/27', 'number': '676373692463', 'cvv': 875, 'iban': 'GB62PQKB71416034141571'}, 'address': {'street': '70 Lydia isle', 'city': 'Lake Conor', 'postcode': 'S71 7XZ'}}
{'first_name': 'Jonathan', 'last_name': 'Thomas', 'age': 46, 'sex': 'Male', 'distance_commuted_to_work_km': 13.72, 'employer_company': 'Begum-Williams', 'dependants': '1', 'marital_status': 'married or civil partner', 'yearly_pension_dollar': 0, 'retired': False, 'yearly_salary_dollar': 54016, 'vehicle': {'make': 'Nissan', 'model': 'ATS', 'year': 1996, 'type': 'Coupe'}, 'credit_card': {'start_date': '08/12'

In [6]:
rows_for_dependants = []
for i,value in enumerate(data):
    if value["dependants"] == " " or value["dependants"] == "":
        rows_for_dependants.append(i)
        
print(f"Problematic rows for dependants:{rows_for_dependants}")    

Problematic rows for dependants:[21, 109, 179, 205, 270, 272, 274, 358, 460, 468, 579, 636, 679, 725, 822, 865, 917, 931, 983]


In [7]:

# Write the data to a JSON file
with open("processed.json", "w") as jsonfile:
    json.dump(data, jsonfile, indent=4)

In [8]:
retired = []
employed = []

for person in data:
    if person["retired"] == True:
        retired.append(person)
    else:
        employed.append(person)

In [9]:
with open("retired.json", "w") as jsonfile:
    json.dump(retired, jsonfile, indent=4)
    
with open("employed.json", "w") as jsonfile:
    json.dump(employed, jsonfile, indent=4)

In [10]:
start_date = data[1]["credit_card"]["start_date"]
expiry_date = data[1]["credit_card"]["expiry_date"]

start_date = datetime.strptime(start_date, "%m/%y")
expiry_date = datetime.strptime(expiry_date, "%m/%y")

diff_in_years = expiry_date.year - start_date.year 
print(diff_in_years)


14


In [11]:
def remove_expired_credit_cards(data, years_threshold=10):
   
    removed_ccards = []

    for index,person in enumerate(data):  
        
        start_date = datetime.strptime(person["credit_card"]["start_date"], "%m/%y")
        expiry_date = datetime.strptime(person["credit_card"]["expiry_date"], "%m/%y")
        diff_in_years = expiry_date.year - start_date.year

        if diff_in_years > years_threshold:
            removed_person = data.pop(index)
            removed_ccards.append(removed_person)

    return removed_ccards

In [12]:
removed_ccards = remove_expired_credit_cards(data, years_threshold=10)
print(f"Removed {len(removed_ccards)} expired credit cards.")

Removed 176 expired credit cards.


In [13]:
with open("removed_ccards.json", "w") as jsonfile:
    json.dump(removed_ccards, jsonfile, indent=4)

In [21]:
processed_new = []
with open("processed.json",'r') as jsonfile:
    processed_new = json.load(jsonfile)

In [24]:
for index,person in enumerate(processed_new):
    if(person["distance_commuted_to_work_km"] <= 1):
        person["Salary-Commute"] = person["yearly_salary_dollar"]
    else:
        person["Salary-Commute"] = person["yearly_salary_dollar"] / person["distance_commuted_to_work_km"]

In [30]:
processed_new = sorted(processed_new,key=lambda x:x["Salary-Commute"])

In [31]:
with open("commute.json", "w") as jsonfile:
    json.dump(processed_new, jsonfile, indent=4)