In [15]:
import re
import pandas as pd

# Load dataset
s22 = pd.read_csv('Samsung_S22.csv')

# Step 1: Remove rows with '|' (multiple model options)
s22 = s22[~s22['Title'].str.contains(r'\|', regex=True, na=False)]

# Step 2: Remove titles that list multiple models like "S22 / S22+" or "S22 - S22 Ultra"
pattern_multi = r'\bS?22\b\s*[-/|]\s*\bS?22\s*(?:Ultra|\+|Plus)?\b'
s22 = s22[~s22['Title'].str.contains(pattern_multi, flags=re.IGNORECASE, na=False)]

# ✅ Step 3: Keep only base "Samsung Galaxy S22" — exclude S22+, Plus, Ultra, FE, etc.
s22 = s22[
    # Match "S22" but NOT followed by +, Plus, Ultra, etc.
    s22['Title'].str.contains(r'\b(Samsung\s*)?(Galaxy\s*)?S22(?!\s*(\+|Plus|Ultra|FE|Fan|Pro))\b',
                              flags=re.IGNORECASE, na=False)
]

# Optional: remove duplicates
s22 = s22.drop_duplicates(subset='Title', keep='first')

# Preview results
s22


  s22['Title'].str.contains(r'\b(Samsung\s*)?(Galaxy\s*)?S22(?!\s*(\+|Plus|Ultra|FE|Fan|Pro))\b',


Unnamed: 0,Page,Title,Price,Sold Date,Link,Image Link
4,1,Samsung Galaxy S22 S901 128GB Unlocked - Excel...,$168.95,"Sold Oct 29, 2025",https://www.ebay.com/itm/364498823740,https://i.ebayimg.com/images/g/IzAAAOSwNXFlFYb...
12,1,Samsung Galaxy S22 S901 128GB Unlocked - Very ...,$159.95,"Sold Oct 29, 2025",https://www.ebay.com/itm/404513869457,https://i.ebayimg.com/images/g/u-AAAOSw46tlFYb...
17,1,Samsung Galaxy S22 5G S901U Unlocked (Any Carr...,$158.99,"Sold Oct 28, 2025",https://www.ebay.com/itm/225935482577,https://i.ebayimg.com/images/g/Xp4AAOSwJ4ZjBY2...
29,1,Samsung Galaxy S22 128GB 256GB S901U Unlocked ...,$144.99,"Sold Oct 28, 2025",https://www.ebay.com/itm/157315789642,https://i.ebayimg.com/images/g/xiQAAeSwdaVo3UH...
32,1,Samsung Galaxy S22 SM-S901U Unlocked 128GB Pha...,$147.99,"Sold Oct 28, 2025",https://www.ebay.com/itm/196518586989,https://i.ebayimg.com/images/g/urQAAOSw2QFmn95...
...,...,...,...,...,...,...
672,3,Samsung Galaxy S22 (5G) - 128GB Pink *UNLOCKED...,$170.99,"Sold Aug 17, 2025",https://www.ebay.com/itm/145612601351,https://i.ebayimg.com/images/g/gqIAAOSw6MVlvo-...
699,3,"Samsung Galaxy S22 5G 128GB 6.1"" Smartphone - ...",$228.00,"Sold Aug 8, 2025",https://www.ebay.com/itm/226501317571,https://i.ebayimg.com/images/g/0f8AAOSwHPtnYc2...
705,3,Samsung Galaxy S22 5G Phantom Black 128GB Unlo...,$149.99,"Sold Aug 7, 2025",https://www.ebay.com/itm/277258595879,https://i.ebayimg.com/images/g/TZsAAeSwfQdobRx...
711,3,Samsung Galaxy S22 5G SM-S901U - 128GB - Bora ...,$163.15,"Sold Aug 5, 2025",https://www.ebay.com/itm/306369728645,https://i.ebayimg.com/images/g/Ur0AAOSwLmFoVmf...


In [16]:
import re
import pandas as pd

# Load the dataset
s23 = pd.read_csv('Samsung_S23.csv')

# 1) Drop listings that advertise multiple models in one title (contain a pipe)
s23 = s23[~s23['Title'].str.contains(r'\|', regex=True, na=False)]

# 2) Drop titles that explicitly list multiple S23 variants like "S23 / S23 Ultra" or "S23 - S23+"
pattern_multi = r'\bS?23\b\s*[-/|]\s*\bS?23\s*(?:Ultra|\+|Plus|FE|Fan\s*Edition|Pro)?\b'
s23 = s23[~s23['Title'].str.contains(pattern_multi, flags=re.IGNORECASE, na=False)]

# 3) Keep ONLY the base "Samsung Galaxy S23" (allow "S23 5G", etc.)
#    Use negative lookahead so we don't match S23 when followed by +/Plus/Ultra/FE/Fan/Pro
base_s23_only = r'\b(Samsung\s*)?(Galaxy\s*)?S23(?!\s*(\+|Plus|Ultra|FE|Fan\s*Edition|Fan|Pro))\b'
s23 = s23[s23['Title'].str.contains(base_s23_only, flags=re.IGNORECASE, na=False)]

# 4) Exclude refurbished / renewed / used condition keywords
cond_exclude = r'\b(refurb|renewed?|re[-\s]?conditioned|certified\s*refurbished|used|pre[-\s]?owned|open\s*box)\b'
s23 = s23[~s23['Title'].str.contains(cond_exclude, flags=re.IGNORECASE, na=False)]

# 5) Optional: remove duplicate titles
s23 = s23.drop_duplicates(subset='Title', keep='first')

# Quick preview
s23


  s23 = s23[s23['Title'].str.contains(base_s23_only, flags=re.IGNORECASE, na=False)]
  s23 = s23[~s23['Title'].str.contains(cond_exclude, flags=re.IGNORECASE, na=False)]


Unnamed: 0,Page,Title,Price,Sold Date,Link,Image Link
0,1,Excellent Samsung Galaxy S23 128GB Cream Veriz...,$234.89,"Sold Oct 29, 2025",https://www.ebay.com/itm/187631441629,https://i.ebayimg.com/images/g/tKoAAeSwDdRoeQA...
2,1,Samsung Galaxy S23 128GB S911U Unlocked - Exce...,$232.99,"Sold Oct 29, 2025",https://www.ebay.com/itm/256372271129,https://i.ebayimg.com/images/g/AXAAAeSwl3loL2c...
5,1,Samsung Galaxy S23 SM-S911U1 5G 128GB Lavende...,$219.00,"Sold Oct 29, 2025",https://www.ebay.com/itm/127446681069,https://i.ebayimg.com/images/g/VrMAAeSwaLpo-P1...
9,1,Samsung Galaxy S23 5G 128/256GB Unlocked -Ref...,$259.99,"Sold Oct 29, 2025",https://www.ebay.com/itm/326722087811,https://i.ebayimg.com/images/g/0tsAAOSwAPhl81i...
20,1,Samsung Galaxy S23 128GB S911U Unlocked - Good,$205.99,"Sold Oct 28, 2025",https://www.ebay.com/itm/256372465052,https://i.ebayimg.com/images/g/kvwAAeSwDeVoL2F...
...,...,...,...,...,...,...
671,3,Samsung Galaxy S23 SM-S911U1 - 128GB Green FA...,$254.00,"Sold Aug 22, 2025",https://www.ebay.com/itm/116740479169,https://i.ebayimg.com/images/g/Qv0AAeSwMZVoo1t...
672,3,Samsung Galaxy S23 SM-S911U1 128GB 5G Black F...,$234.00,"Sold Aug 22, 2025",https://www.ebay.com/itm/146780539176,https://i.ebayimg.com/images/g/A50AAeSwRAtoo9k...
683,3,Samsung Galaxy S23 - 128 GB - Lavender - Unloc...,$254.00,"Sold Aug 21, 2025",https://www.ebay.com/itm/127096215233,https://i.ebayimg.com/images/g/61cAAOSwNI9kR7a...
713,3,Samsung Galaxy S23 128GB Phantom Black Unlocke...,$259.99,"Sold Aug 17, 2025",https://www.ebay.com/itm/285200013270,https://i.ebayimg.com/images/g/BpYAAOSw1pxlAzw...


In [17]:
import re
import pandas as pd

# Load the dataset
s24 = pd.read_csv('Samsung_S24.csv')

# 1) Remove rows with '|' (multiple model options)
s24 = s24[~s24['Title'].str.contains(r'\|', regex=True, na=False)]

# 2) Remove titles that list multiple models like "S24 / S24 Ultra" or "S24 - S24+"
pattern_multi = r'\bS?24\b\s*[-/|]\s*\bS?24\s*(?:Ultra|\+|Plus|FE|Fan\s*Edition|Pro)?\b'
s24 = s24[~s24['Title'].str.contains(pattern_multi, flags=re.IGNORECASE, na=False)]

# 3) Keep only base "Samsung Galaxy S24" (allow variants like "S24 5G")
#    Negative lookahead ensures we don't match S24 when followed by +, Plus, Ultra, FE, Fan, or Pro
base_s24_only = r'\b(Samsung\s*)?(Galaxy\s*)?S24(?!\s*(\+|Plus|Ultra|FE|Fan\s*Edition|Fan|Pro))\b'
s24 = s24[s24['Title'].str.contains(base_s24_only, flags=re.IGNORECASE, na=False)]

# 4) Exclude refurbished / renewed / used condition keywords
cond_exclude = r'\b(refurb|renewed?|re[-\s]?conditioned|certified\s*refurbished|used|pre[-\s]?owned|open\s*box)\b'
s24 = s24[~s24['Title'].str.contains(cond_exclude, flags=re.IGNORECASE, na=False)]

# 5) Optional: remove duplicates
s24 = s24.drop_duplicates(subset='Title', keep='first')

# Preview the results
s24


  s24 = s24[s24['Title'].str.contains(base_s24_only, flags=re.IGNORECASE, na=False)]
  s24 = s24[~s24['Title'].str.contains(cond_exclude, flags=re.IGNORECASE, na=False)]


Unnamed: 0,Page,Title,Price,Sold Date,Link,Image Link
0,1,Samsung Galaxy S24 128GB S921U Unlocked - Very...,$317.99,"Sold Oct 29, 2025",https://www.ebay.com/itm/256611130330,https://i.ebayimg.com/images/g/6wYAAeSw6ENoL7q...
12,1,Samsung Galaxy S24 SM-S921U1 5G 256GB violet ...,$319.00,"Sold Oct 28, 2025",https://www.ebay.com/itm/146893972054,https://i.ebayimg.com/images/g/KtoAAeSwVlNo7XQ...
13,1,Samsung Galaxy S24 SM-S921U1 5G 128GB Violet P...,$304.00,"Sold Oct 28, 2025",https://www.ebay.com/itm/116849789783,https://i.ebayimg.com/images/g/WiIAAeSw79Jo2ZL...
16,1,Samsung Galaxy S24 128GB S921U Unlocked - Exce...,$335.99,"Sold Oct 28, 2025",https://www.ebay.com/itm/256496745168,https://i.ebayimg.com/images/g/x8MAAeSwR8toMMG...
26,1,"Samsung Galaxy S24 5G 128GB 6.2"" Smartphone - ...",$382.00,"Sold Oct 28, 2025",https://www.ebay.com/itm/326459470881,https://i.ebayimg.com/images/g/GqIAAOSweypnfbK...
...,...,...,...,...,...,...
698,3,Samsung Galaxy S24 128GB SM-S921U1 Black 5G ...,$329.00,"Sold Aug 24, 2025",https://www.ebay.com/itm/146780020434,https://i.ebayimg.com/images/g/QP8AAeSw1opoo1t...
709,3,Samsung Galaxy S24 128GB - Onyx Black Unlocked...,$369.00,"Sold Aug 21, 2025",https://www.ebay.com/itm/226156376715,https://i.ebayimg.com/images/g/9hQAAeSwb6pob-h...
712,3,CERTIFIED Samsung Galaxy S24 128GB SM-S921U1 ...,$304.00,"Sold Aug 20, 2025",https://www.ebay.com/itm/127318692214,https://i.ebayimg.com/images/g/-Z8AAeSw1iZoc8o...
713,3,Samsung Galaxy S24 128GB SM-S921U1 YELLOW 5...,$319.00,"Sold Aug 21, 2025",https://www.ebay.com/itm/146756287119,https://i.ebayimg.com/images/g/X3EAAeSw5gxok41...


In [18]:
import re
import pandas as pd

# Load the dataset
s25 = pd.read_csv('Samsung_S25.csv')

# 1) Remove rows with '|' (multiple model options)
s25 = s25[~s25['Title'].str.contains(r'\|', regex=True, na=False)]

# 2) Remove titles that list multiple models like "S25 / S25+" or "S25 - S25 Ultra"
pattern_multi = r'\bS?25\b\s*[-/|]\s*\bS?25\s*(?:Ultra|\+|Plus|FE|Fan\s*Edition|Pro)?\b'
s25 = s25[~s25['Title'].str.contains(pattern_multi, flags=re.IGNORECASE, na=False)]

# 3) Keep ONLY the base "Samsung Galaxy S25"
#    Negative lookahead ensures it doesn't capture Plus, Ultra, etc.
base_s25_only = r'\b(Samsung\s*)?(Galaxy\s*)?S25(?!\s*(\+|Plus|Ultra|FE|Fan\s*Edition|Fan|Pro))\b'
s25 = s25[s25['Title'].str.contains(base_s25_only, flags=re.IGNORECASE, na=False)]

# 4) Exclude refurbished / renewed / used condition keywords
cond_exclude = r'\b(refurb|renewed?|re[-\s]?conditioned|certified\s*refurbished|used|pre[-\s]?owned|open\s*box)\b'
s25 = s25[~s25['Title'].str.contains(cond_exclude, flags=re.IGNORECASE, na=False)]

# 5) Optional: remove duplicates
s25 = s25.drop_duplicates(subset='Title', keep='first')

# Preview results
s25


  s25 = s25[s25['Title'].str.contains(base_s25_only, flags=re.IGNORECASE, na=False)]
  s25 = s25[~s25['Title'].str.contains(cond_exclude, flags=re.IGNORECASE, na=False)]


Unnamed: 0,Page,Title,Price,Sold Date,Link,Image Link
5,1,Samsung Galaxy S25 5G - SM-S931U - Unlocked - ...,$463.99,"Sold Oct 28, 2025",https://www.ebay.com/itm/146862403064,https://i.ebayimg.com/images/g/lYUAAeSwi-do2sy...
7,1,Samsung Galaxy S25 Edge 256GB Titanium Silver ...,$599.99,"Sold Oct 28, 2025",https://www.ebay.com/itm/277338151823,https://i.ebayimg.com/images/g/jVYAAeSwPJRo-nF...
12,1,Samsung Galaxy S25 SM-S931U1 128GB Unlocked Ex...,$499.99,"Sold Oct 28, 2025",https://www.ebay.com/itm/376644158819,https://i.ebayimg.com/images/g/DzsAAeSwWyNo8Su...
15,1,Samsung Galaxy S25 Edge 512GB Titanium Silver ...,$629.99,"Sold Oct 28, 2025",https://www.ebay.com/itm/277290482816,https://i.ebayimg.com/images/g/jZgAAeSw1kFo-nD...
18,1,Samsung Galaxy S25 - 128 GB - SM-S931U1 Navy (...,$434.95,"Sold Oct 27, 2025",https://www.ebay.com/itm/405977447569,https://i.ebayimg.com/images/g/wm4AAOSwiqBoUuj...
...,...,...,...,...,...,...
360,2,Samsung Galaxy S25 (5G) SM-S931U - Unlocked - ...,$509.00,"Sold Aug 8, 2025",https://www.ebay.com/itm/146756822773,https://i.ebayimg.com/images/g/oXMAAeSwP4tok5Z...
361,2,Samsung Galaxy S25 - Unlocked - Silver Shadow ...,$574.00,"Sold Aug 8, 2025",https://www.ebay.com/itm/277054239737,https://i.ebayimg.com/images/g/vJAAAeSwvZdo71h...
362,2,Samsung Galaxy S25 - 128 GB - SM-S931U1 Icyblu...,$484.99,"Sold Aug 8, 2025",https://www.ebay.com/itm/127261821539,https://i.ebayimg.com/images/g/~FwAAeSw8n1ogom...
365,2,Samsung Galaxy S25 Edge Silver 512GB Unlocked ...,$749.99,"Sold Aug 5, 2025",https://www.ebay.com/itm/297455815815,https://i.ebayimg.com/images/g/eJUAAeSw1AxobBY...


In [19]:
import re
import pandas as pd

# Load the dataset
s25 = pd.read_csv('Samsung_S25.csv')

# 1) Remove rows with '|' (multiple model options)
s25 = s25[~s25['Title'].str.contains(r'\|', regex=True, na=False)]

# 2) Remove titles that list multiple models like "S25 / S25+" or "S25 - S25 Ultra"
pattern_multi = r'\bS?25\b\s*[-/|]\s*\bS?25\s*(?:Ultra|\+|Plus|FE|Fan\s*Edition|Pro|Edge)?\b'
s25 = s25[~s25['Title'].str.contains(pattern_multi, flags=re.IGNORECASE, na=False)]

# 3) Keep ONLY the base "Samsung Galaxy S25"
base_s25_only = r'\b(Samsung\s*)?(Galaxy\s*)?S25(?!\s*(\+|Plus|Ultra|FE|Fan\s*Edition|Fan|Pro|Edge))\b'
s25 = s25[s25['Title'].str.contains(base_s25_only, flags=re.IGNORECASE, na=False)]

# 4) Exclude refurbished / renewed / used / open-box / edge listings
cond_exclude = r'\b(refurb|renewed?|re[-\s]?conditioned|certified\s*refurbished|used|pre[-\s]?owned|open\s*box|edge)\b'
s25 = s25[~s25['Title'].str.contains(cond_exclude, flags=re.IGNORECASE, na=False)]

# 5) Optional: remove duplicate titles
s25 = s25.drop_duplicates(subset='Title', keep='first')

# Preview results
s25.head()


  s25 = s25[s25['Title'].str.contains(base_s25_only, flags=re.IGNORECASE, na=False)]
  s25 = s25[~s25['Title'].str.contains(cond_exclude, flags=re.IGNORECASE, na=False)]


Unnamed: 0,Page,Title,Price,Sold Date,Link,Image Link
5,1,Samsung Galaxy S25 5G - SM-S931U - Unlocked - ...,$463.99,"Sold Oct 28, 2025",https://www.ebay.com/itm/146862403064,https://i.ebayimg.com/images/g/lYUAAeSwi-do2sy...
12,1,Samsung Galaxy S25 SM-S931U1 128GB Unlocked Ex...,$499.99,"Sold Oct 28, 2025",https://www.ebay.com/itm/376644158819,https://i.ebayimg.com/images/g/DzsAAeSwWyNo8Su...
18,1,Samsung Galaxy S25 - 128 GB - SM-S931U1 Navy (...,$434.95,"Sold Oct 27, 2025",https://www.ebay.com/itm/405977447569,https://i.ebayimg.com/images/g/wm4AAOSwiqBoUuj...
20,1,Samsung Galaxy S25 128GB 256GB SM-S931U1 Mint...,$449.00,"Sold Oct 27, 2025",https://www.ebay.com/itm/146565510947,https://i.ebayimg.com/images/g/9uYAAOSwFd5oGkK...
21,1,Samsung Galaxy S25 256GB Unlocked Silver Shado...,$479.95,"Sold Oct 27, 2025",https://www.ebay.com/itm/127455651033,https://i.ebayimg.com/images/g/mRsAAeSw9fNo~30...


In [22]:
# Combine all cleaned DataFrames above 
combined_s = pd.concat([s22, s23, s24, s25], ignore_index=True)

# Save to a new CSV
combined_s.to_csv('Combined_Samsung_S.csv', index=False)