In [None]:
# Interview asks:

# Refactor into a class using Pydantic for validation (date parsing, email validation).
# Make it extensible and clean.

import json, datetime 
def parse(data):
    output=[]
    for d in data:
        print (d)
        dt=datetime.datetime.strptime(d['date'],'%Y-%m-%d')
        d['date']=dt
        if "email" in d:
            d['email'] = d['email'].lower()
        output.append(d)
    return output

data=json.loads('[{"date":"2023-01-02","email":"USER@TEST.COM"},{"date":"2023-05-07"}]')
res = parse(data)
print(res)



In [None]:

# My solution

import json,datetime

class DataParser:
    def __init__(self, str_data):
        self.data = json.loads(str_data) 
    
    def email_modifier(self):
        for d in self.data:
            if 'email' in d:
                d['email'] =  d['email'].lower()
        
    def data_modifier(self):
        for d in self.data:
            dt=datetime.datetime.strptime(d['date'],'%Y-%m-%d')
            d['date']=dt
            
    def print_data(self):
        for d in self.data:
            print(d)

data = '[{"date":"2023-01-02","email":"USER@TEST.COM"},{"date":"2023-05-07"}]'       
data_parser = DataParser(data)
data_parser.print_data()
data_parser.email_modifier()
data_parser.print_data()
data_parser.data_modifier()
data_parser.print_data()

Nice work 👏 — this is already a huge step forward compared to the messy original script! You’ve wrapped the logic in a class, used methods, and made the flow clearer. That’s exactly the type of improvement interviewers want to see.

That said, for a MLE interview cleanup task, you can push this further toward production-quality. Let me walk you through what’s great and where you can level up:

✅ What you did well

Wrapped messy script into a DataParser class ✅

Split transformations into separate methods (email_modifier, data_modifier) ✅

Works correctly on the given input ✅

🚀 Next-level improvements (interview polish)
1. Naming / responsibilities

data_modifier is vague → rename to parse_dates.

email_modifier → normalize_emails.

2. Avoid in-place mutations where possible

Right now, you’re mutating self.data. In production, we often prefer to return a new cleaned dataset so you can chain operations and avoid side effects.

3. Error handling

If date has a wrong format → strptime will crash.

Add try/except or a validator.

4. Use Pydantic for validation 

This lets you validate + transform in one place.

In [9]:
!pip install pydantic[email]

Collecting email-validator>=2.0.0 (from pydantic[email])
  Downloading email_validator-2.3.0-py3-none-any.whl.metadata (26 kB)
Collecting dnspython>=2.0.0 (from email-validator>=2.0.0->pydantic[email])
  Downloading dnspython-2.8.0-py3-none-any.whl.metadata (5.7 kB)
Collecting idna>=2.0.0 (from email-validator>=2.0.0->pydantic[email])
  Using cached idna-3.10-py3-none-any.whl.metadata (10 kB)
Downloading email_validator-2.3.0-py3-none-any.whl (35 kB)
Downloading dnspython-2.8.0-py3-none-any.whl (331 kB)
Using cached idna-3.10-py3-none-any.whl (70 kB)
Installing collected packages: idna, dnspython, email-validator

   ---------------------------------------- 0/3 [idna]
   ------------- -------------------------- 1/3 [dnspython]
   ------------- -------------------------- 1/3 [dnspython]
   ------------- -------------------------- 1/3 [dnspython]
   ------------- -------------------------- 1/3 [dnspython]
   ------------- -------------------------- 1/3 [dnspython]
   ------------- ------

In [None]:
from pydantic import BaseModel, EmailStr, field_validator
import json
from datetime import datetime
from typing import Optional 
from logging import Logger

class Record(BaseModel):
    """
    Record is a Pydantic model (inherits from BaseModel).
    Pydantic models are like data containers with validation.
    Each attribute has a type, and Pydantic will automatically validate and parse the input.
    """
    date: datetime 
    # email: EmailStr | None = None
    email: Optional[EmailStr] = None # same as above but more readable I belive 

    @field_validator('email', mode='before')
    def normalize_email(cls, v):
        """ 
        Runs before Pydantic validation (mode='before').
        Targets the email field specifically.
        Purpose: preprocess the input before Pydantic checks the type/format.
        """
        return v.lower() if v else v 


class DataParser:
    def __init__(self, str_data: str):
        self.raw_data = json.loads(str_data)
        self.records: list[Record] = [] 
        # self.records = [] 
        # : list[Record] → type annotation

        # You can write self.records = [] — your code will work perfectly.
        # Adding : list[Record] adds clarity:
            # Other developers know this list should only contain Record objects.
            # Type checkers can catch mistakes.
            # It’s best practice for production code and interviews.

    def parse(self):
        for entry in self.raw_data:
            try:
                self.records.append(Record(**entry))
                # Record(**entry):
                # This is Python’s “dictionary unpacking” syntax:
                # **entry takes the dictionary and converts it into keyword arguments.
                # entry = {"date": "2023-01-02", "email": "USER@TEST.COM"}
                # Record(**entry) Is equivalent to: Record(date="2023-01-02", email="USER@TEST.COM")

            except Exception as e:
                print(f"Skipping bad record {entry}: {e}") 
        
    def print_data(self):
        for r in self.records:
            print(r.dict())


In [57]:
data = '[{"date":"2023-01-02","email":"USER@TEST.COM"},{"date":"2023-05-07"}]'
parser = DataParser(data)
parser.parse()
parser.print_data()

{'date': datetime.datetime(2023, 1, 2, 0, 0), 'email': 'user@test.com'}
{'date': datetime.datetime(2023, 5, 7, 0, 0), 'email': None}


C:\Users\danial.arab\AppData\Local\Temp\ipykernel_32640\4185233278.py:41: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  print(r.dict())


5. Extensibility

If tomorrow you need to validate phone numbers, add another field in Record (Pydantic takes care of it).

If you need to export → parser.records is already clean structured data.

⚡ Why this matters in interviews
Interviewers want to see you:

Recognize messy code → refactor into clean abstractions.

Handle edge cases gracefully.

Use libraries like Pydantic for type safety + validation.

Write code that looks like it could go into production.

### print vs logging

print is just standard output, not suitable for production.

logging allows you to:

- Set levels (INFO, WARNING, ERROR)
- Redirect output to files, streams, or monitoring systems
- Control formatting

In [3]:
from typing import Optional 
from datetime import datetime
import json 
from pydantic import BaseModel, EmailStr, field_validator
import logging 

logging.basicConfig(
    level=logging.WARNING,
    format="%(levelname)s: %(message)s",
    filename="parser.log",   # file where logs are saved
    filemode="w"             # "w" = overwrite each run, "a" = append
)

class Record(BaseModel):
    date: datetime
    email: Optional[EmailStr] = None 

    @field_validator('email', mode = 'before')
    def normalize_email(cls, v):
        return v.lower() if v else v 
    

class DataParser:
    def __init__(self, str_data: str):
        self.raw_data = json.loads(str_data)
        self.records: list[Record] = [] 


    def parse_data(self):
        for entry in self.raw_data:
            try: 
                self.records.append(Record(**entry))
            except Exception as e:
                logging.warning(f"Skipping bad record {entry}: {e}")

    def print_data(self):
        for r in self.records:
            print(r.model_dump())


data = '[{"date":"2023-01-02","email":"USER@TEST.COM"},{"date":"2023-05-07"}]'
parser = DataParser(data)
parser.parse_data()
parser.print_data()

{'date': datetime.datetime(2023, 1, 2, 0, 0), 'email': 'user@test.com'}
{'date': datetime.datetime(2023, 5, 7, 0, 0), 'email': None}
