In [1]:
import re
import pandas as pd

In [26]:
def ordinal(n: int) -> str:
    """Return an integer as an ordinal string (1 -> 1st, 2 -> 2nd, etc.)."""
    if 10 <= n % 100 <= 20:
        suffix = "th"
    else:
        suffix = {1: "st", 2: "nd", 3: "rd"}.get(n % 10, "th")
    return f"{n}{suffix}"

def get_cpu_generation(cpu_name: str) -> str:
    """
    Determine CPU generation from CPU name string.
    Supports:
      - Intel Core i3/i5/i7/i9 (desktop & mobile)
      - Intel Core Ultra (desktop & mobile, with mobile suffixes)
      - AMD Ryzen (desktop & mobile)
    """
    if not isinstance(cpu_name, str):
        return "Unknown generation"

    cpu_lower = cpu_name.lower()

    # ---- Intel Core i3/i5/i7/i9 (desktop & mobile) ----
    intel_match = re.search(r'i[3579]-([0-9]{3,5})[a-z]*', cpu_lower)
    if intel_match:
        digits = intel_match.group(1)
        if len(digits) == 3:       # 1st Gen
            gen = 1
        elif len(digits) == 4:     # 2nd–9th Gen
            gen = int(digits[0])
        elif len(digits) == 5:     # 10th Gen+
            gen = int(digits[:2])
        else:
            return "Unknown generation"
        return f"{ordinal(gen)} Gen"

    # ---- Intel Core Ultra (desktop & mobile) ----
    # Handles: "Core(TM) Ultra" or "Core Ultra"
    # Allows dash or space, and optional suffixes like U, H, KF, K, etc.
    ultra_match = re.search(
        r'core(\(tm\))?\s+ultra\s+(\d)[-\s](\d+)([a-z]{0,3})?', cpu_lower
    )
    if ultra_match:
        return "15th Gen"

    # ---- AMD Ryzen (desktop & mobile) ----
    ryzen_match = re.search(r'ryzen\s+\d+\s+([0-9]{3,5})[a-z]*', cpu_lower)
    if ryzen_match:
        digits = ryzen_match.group(1)
        gen = int(digits[0])
        return f"{ordinal(gen)} Gen"

    return "Unknown generation"


In [39]:
data = {
    "CPU": [
        "Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz",   
        "Intel(R) Core(TM) i5-10600K CPU @ 4.10GHz",  
        "Intel(R) Core(TM) i9-12900HK CPU @ 3.20GHz", 
        "Intel Core Ultra 5 245KF",                   
        "Intel(R) Core(TM) Ultra 5-245KF CPU @ 4.20GHz",
        "AMD Ryzen 7 5800U 8-Core Processor",        
        "AMD Ryzen 5 3600 6-Core Processor",          
        "Apple M1",                                    
        "Intel(R) Core(TM) Ultra 9 285K Processor",
        "Intel(R) Core(TM) Ultra 7 265K Processor",
        "Intel(R) Core(TM) Ultra 5 245K Processor",
        "AMD Ryzen 7 4800H with Radeon Graphics",
        "Intel(R) Core(TM) i7-9700K CPU @ 3.60GHz",


        "Intel Core Ultra 5 245K",
        "Intel Core Ultra 5-245KF",
        "Intel Core Ultra 7 265H",
        "Intel Core Ultra 7-265KF H",
        "Intel Core Ultra 9 285K",
        "Intel Core Ultra 9-285K H"
    ]
}

df = pd.DataFrame(data)

In [40]:
df["CPU_Generation"] = df["CPU"].apply(get_cpu_generation)

In [41]:
print(df)

                                              CPU      CPU_Generation
0        Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz             8th Gen
1       Intel(R) Core(TM) i5-10600K CPU @ 4.10GHz            10th Gen
2      Intel(R) Core(TM) i9-12900HK CPU @ 3.20GHz            12th Gen
3                        Intel Core Ultra 5 245KF            15th Gen
4   Intel(R) Core(TM) Ultra 5-245KF CPU @ 4.20GHz            15th Gen
5              AMD Ryzen 7 5800U 8-Core Processor             5th Gen
6               AMD Ryzen 5 3600 6-Core Processor             3rd Gen
7                                        Apple M1  Unknown generation
8        Intel(R) Core(TM) Ultra 9 285K Processor            15th Gen
9        Intel(R) Core(TM) Ultra 7 265K Processor            15th Gen
10       Intel(R) Core(TM) Ultra 5 245K Processor            15th Gen
11         AMD Ryzen 7 4800H with Radeon Graphics             4th Gen
12       Intel(R) Core(TM) i7-9700K CPU @ 3.60GHz             9th Gen
13                  