In [None]:
import re
txt = "The rain in Spain"

| Symbol  | Meaning                      | Example Match             |      |      |
| ------- | ---------------------------- | ------------------------- | ---- | ---- |
| `.`     | any character except newline | `a.c` → "abc", "axc"      |      |      |
| `^`     | start of string              | `^Hello`                  |      |      |
| `$`     | end of string                | `end$`                    |      |      |
| `*`     | 0 or more repetitions        | `ab*` → "a", "ab", "abbb" |      |      |
| `+`     | 1 or more repetitions        | `ab+` → "ab", "abb"       |      |      |
| `?`     | 0 or 1 repetition            | `colou?r`                 |      |      |
| `{n}`   | exact n repetitions          | `\d{4}` → 4 digits        |      |      |
| `{n,}`  | at least n                   | `a{3,}`                   |      |      |
| `{n,m}` | n to m                       | `a{1,3}`                  |      |      |
| `[]`    | character set                | `[a-z]`, `[0-9]`          |      |      |
| `()`    | grouping                     | `(abc)+`                  |      |      |
| `       | `                            | OR                        | `cat | dog` |
| `\`     | escape                       | `\.`                      |      |      |


| Pattern | Meaning                                      |
| ------- | -------------------------------------------- |
| `\d`    | Digit (0–9)                                  |
| `\D`    | Non-digit                                    |
| `\w`    | Word character (letters, digits, underscore) |
| `\W`    | Non-word                                     |
| `\s`    | Whitespace                                   |
| `\S`    | Non-whitespace                               |
| `[A-Z]` | Uppercase range                              |
| `[a-z]` | Lowercase range                              |
| `[0-9]` | Digits                                       |


| Pattern | Meaning   | Example                                   |
| ------- | --------- | ----------------------------------------- |
| `*`     | 0+ times  | `a*` matches "", "a", "aaaa"              |
| `+`     | 1+ times  | `a+` matches "a", "aaaa"                  |
| `?`     | 0 or 1    | `colou?r` matches both "color" & "colour" |
| `{n}`   | exactly n | `\d{10}` for Indian mobile                |
| `{n,}`  | n or more | `a{2,}`                                   |
| `{n,m}` | n to m    | `\d{2,4}`                                 |


| Anchor | Purpose       |
| ------ | ------------- |
| `^`    | Match start   |
| `$`    | Match end     |
| `\b`   | Word boundary |
| `\B`   | Non-boundary  |


In [None]:
x = re.search("^The.*Spain$", txt)
x


In [None]:
# Extract Emails
"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[A-Za-z]{2,}"

# Extract Phone Numbers (India)
"(\+91[\s-]?)?[6-9]\d{9}"

# Extract URLs
"https?://[^\s]+"

# Extract Dates
"\d{1,2}[-/]\d{1,2}[-/]\d{2,4}"

# Extract Numbers
"\d+(\.\d+)?"

In [None]:
re.findall(r'\d+', 'Price: 120, Tax: 18, 12 3_4 5 56 6 7 89 ')

In [None]:
text = "I like apple applejuice"
re.findall(r"apple", text)


In [None]:
re.findall(r"h.t", "hat hit hot h9t h t h_t h_h")


In [None]:
re.findall(r"ab*", "a ab abb abbb ac abbbbbbbbbb")


In [None]:
re.findall(r"a*b*", "a ab abb abbb ac abbbbbbbbbb")


In [None]:
re.findall(r"ab+", "a ab abb abbb ac")


In [None]:
re.findall(r"colou?r", "color colour colouur")


In [None]:
re.findall(r"\d{4}", "Pin: 282001 Year: 2024")
re.findall(r"\d{7}", "Pin: 2820003 Year: 2024")


In [None]:
re.findall(r"a{2,}", "a aa aaa aaaa")


In [None]:
re.findall(r"a{2,4}", "a aa aaa aaaa aaaaaaaaaaaaa")


In [None]:
re.findall(r"[a-z]", "A b C d 123")


In [None]:
re.findall(r"[A-Za-z]+", "Hello 123 World_")


In [None]:
re.findall(r"\d", "A1B2C3")


In [None]:
re.findall(r"\D", "A1B2C3")


In [None]:
re.findall(r"\w+", "Hello_123 !!!")


In [None]:
re.findall(r"\W+", "Hello_123 !!!")


In [None]:
re.findall(r"\s", "Hello  World")


In [None]:
re.findall(r"\S", "Hello  World")


In [None]:
re.findall(r"^Hello", "Hello World")


In [None]:
re.findall(r"world$", "hello world")


In [None]:
re.match(r"^\d+$", "123456")

In [None]:
bool(re.match(r"^\d+$", "123456"))


In [None]:
bool(re.match(r"^\d+$", "123456"))

In [None]:
bool(re.match(r"^\d+$", "123a456"))


In [None]:
bool(re.match(r"^\D+$", "123 a 456"))


In [None]:
text = "DOB: 12-05-2003"
re.findall(r"(\d{2})-(\d{2})-(\d{4})", text)


In [None]:
re.findall(r"(?:Mr|Mrs|Dr)\. \w+", "Dr. Smith met Mr. John")


In [None]:
pattern = r"[a-zA-Z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}"
re.findall(pattern, "Contact: test123@gmail.com, info@yahoo.in")


In [None]:
re.findall(r"https?://[^\s]+", "Visit https://youtube.com  and http://google.com")


In [None]:
re.findall(r"\d+(\.\d+)?", "Price: 120.50, Tax: 18")


In [None]:
re.findall(r"\d+(?:\.\d+)?", "Price: 120.50, Tax: 18")


In [None]:
re.findall(r"\d{1,2}[-/]\d{1,2}[-/]\d{2,4}", "Dates: 1/1/24, 12-05-2023")


In [None]:
re.search(r"\d+", "Age is 24 33").group()


In [None]:
re.findall(r"\d+", "a1 b22 c333")


In [None]:
re.sub(r"\d+", "X", "Room 24")


In [None]:
re.split(r"\s+", "I   am   Shikher")


In [None]:
pattern = re.compile(r"\d{10}")
pattern.findall("My number is 9876543210")


| Type                | Syntax     | Meaning                 |
| ------------------- | ---------- | ----------------------- |
| Positive Lookahead  | `(?=...)`  | Must be followed by     |
| Negative Lookahead  | `(?!...)`  | Must NOT be followed by |
| Positive Lookbehind | `(?<=...)` | Must be preceded by     |
| Negative Lookbehind | `(?<!...)` | Must NOT be preceded by |

In [None]:
re.findall(r"\w+(?=@gmail\.com)", "test@gmail.com user@yahoo.com user@outlook.com shikherjain786@gmail.com")


In [None]:
re.findall(r"\w+(?!@gmail\.com)", "test@gmail.com user@yahoo.com user@outlook.com shikherjain@gmail.com")


In [None]:
re.findall(r"(?<=Mr\.) \w+", "Mr. John and Mr. David Ms. Alice and Mr. Bob")

In [None]:
re.findall(r"(?<!Dr\.) \w+", "Dr. Smith Mr. John")
