```json
{
    "required_ml_terms": [ "duplicates deletion"],
    "problems_to_detect": [
        "Using last duplicate rows can lead to losing information."
    ]
}
```

In [None]:
from pathlib import Path
from typing import List, Tuple
import pandas as pd


def load_data(data_path: Path) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """Loads train, test, and submission files."""
    train = pd.read_csv(data_path / "train.csv")
    test = pd.read_csv(data_path / "test.csv")
    sub = pd.read_csv(data_path / "sample_submission.csv")
    return train, test, sub


def clean_and_prepare_data(df: pd.DataFrame) -> pd.DataFrame:
    """Cleans the dataframe by dropping unnecessary columns, duplicates, and parsing string lists."""
    if "id" in df.columns:
        df = df.drop("id", axis=1)

    df = df.drop_duplicates(keep="last", ignore_index=True)

    for col in ["prompt", "response_a", "response_b"]:
        # Handle cases where the column might already be parsed or doesn't exist
        if col in df.columns and isinstance(df[col].iloc[0], str):
            try:
                # A more robust way to handle 'null' before eval
                df[col] = df[col].apply(lambda x: eval(x.replace("null", "None")))
            except Exception as e:
                print(f"Could not parse column {col}. Error: {e}")
                # Fallback for columns that might not need parsing
                df[col] = df[col].apply(lambda x: eval(x) if isinstance(x, str) else x)
    return df


DATA_PATH = Path("/kaggle/input/lmsys-chatbot-arena")
TARGETS = ["winner_model_a", "winner_model_b", "winner_tie"]

train_df, test_df, sub_df = load_data(DATA_PATH)
train_df_cleaned = clean_and_prepare_data(train_df.copy())

print(f"Original train shape: {train_df.shape}")
print(f"Cleaned train shape: {train_df_cleaned.shape}")
display(train_df_cleaned.head(2))