Shardul369-coder · Shardul369-coder · Oct 27, 2025 · Oct 28, 2025 · Oct 28, 2025 · Oct 28, 2025
diff --git a/.gitignore b/.gitignore
@@ -96,3 +96,4 @@ jupyterlite_contents
 
 # file recognised by vscode IDEs containing env variables
 .env
+scikit-learn/
diff --git a/build_tools/codespell_ignore_words.txt b/build_tools/codespell_ignore_words.txt
@@ -1,63 +1,63 @@
 achin
-aggresive
+aggressive
 aline
 ba
-basf
-boun
+base
+bound
 bre
 bu
 cach
-cant
-chanel
+can't
+channel
 complies
-coo
-copys
-datas
-deine
-didi
-feld
+coup
+copies
+data
+define
+did
+field
 fo
 fpr
 fro
-fwe
-gool
+few
+ghoul
 hart
-heping
+helping
 hist
-ines
+lines
 inout
 ist
-jaques
-lene
-lamas
+jacques
+lens
+llamas
 linke
 lod
-mange
-mape
+manage
+map
 mis
-mor
+more
 nd
-nmae
-ocur
+name
+occur
 pullrequest
-repid
+rapid
 ro
-ser
+set
 soler
 staps
-suh
-suprised
+such
+surprised
 te
-technic
-teh
+technique
+the
 theis
 thi
-usal
-vie
-vor
+usual
+via
+for
 wan
 whis
 wil
-winn
+win
 whis
 yau
diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
@@ -613,3 +613,12 @@ As an alternative, the HTML can be written to a file using
 
 * :ref:`sphx_glr_auto_examples_compose_plot_column_transformer.py`
 * :ref:`sphx_glr_auto_examples_compose_plot_column_transformer_mixed_types.py`
+
+.. _column_transformer_passthrough:
+
+ColumnTransformer with ``remainder='passthrough'`` and Pandas
+-------------------------------------------------------------
+
+.. literalinclude:: ../examples/compose/plot_column_transformer_passthrough.py
+   :language: python
+   :lines: 12-
diff --git a/examples/compose/plot_column_transformer_passthrough.py b/examples/compose/plot_column_transformer_passthrough.py
@@ -0,0 +1,82 @@
+# examples/compose/plot_column_transformer_passthrough.py
+# -*- coding: utf-8 -*-
+
+"""
+=============================================================
+ColumnTransformer with remainder='passthrough' and Pandas
+=============================================================
+
+This example shows how to keep columns untouched with
+``remainder='passthrough'`` while transforming others.
+The input is a pandas DataFrame – the most common real-world case.
+"""
+
+import matplotlib.pyplot as plt
+import pandas as pd
+from sklearn.compose import ColumnTransformer
+from sklearn.preprocessing import StandardScaler, OneHotEncoder
+from sklearn.linear_model import LogisticRegression
+from sklearn.pipeline import Pipeline
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score, classification_report
+
+# -------------------------------------------------
+# 1. Create a realistic Pandas DataFrame
+# -------------------------------------------------
+data = pd.DataFrame({
+    "age":      [25, 30, 35, 40, 45, 50, 22, 33],
+    "salary":   [50000, 60000, 70000, 80000, 90000, 100000, 45000, 55000],
+    "city":     ["NY", "LA", "NY", "SF", "LA", "NY", "SF", "LA"],
+    "is_senior":[0, 0, 0, 1, 1, 1, 0, 0],
+    "target":   [0, 1, 0, 1, 1, 0, 0, 1]
+})
+
+X = data.drop("target", axis=1)
+y = data["target"]
+
+# -------------------------------------------------
+# 2. ColumnTransformer – scale numeric, encode city,
+#     passthrough the binary column `is_senior`
+# -------------------------------------------------
+ct = ColumnTransformer(
+    [
+        ("scale", StandardScaler(), ["age", "salary"]),
+        ("encode", OneHotEncoder(drop="first", sparse_output=False), ["city"]),
+    ],
+    remainder="passthrough",   # <-- keeps `is_senior` unchanged
+)
+
+# -------------------------------------------------
+# 3. Full pipeline + LogisticRegression
+# -------------------------------------------------
+pipe = Pipeline(
+    [
+        ("transform", ct),
+        ("clf", LogisticRegression(max_iter=1000)),
+    ]
+)
+
+# -------------------------------------------------
+# 4. Train / test split & evaluation
+# -------------------------------------------------
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, test_size=0.3, random_state=42, stratify=y
+)
+
+pipe.fit(X_train, y_train)
+y_pred = pipe.predict(X_test)
+
+print(f"Accuracy: {accuracy_score(y_test, y_pred):.3f}")
+print(classification_report(y_test, y_pred))
+
+# -------------------------------------------------
+# 5. Visualise the transformed feature matrix
+# -------------------------------------------------
+transformed = ct.fit_transform(X)
+cols = (
+    ["age_scaled", "salary_scaled"] +
+    [f"city_{c}" for c in ct.named_transformers_["encode"].get_feature_names_out()] +
+    ["is_senior"]
+)
+print("\nTransformed features (first 5 rows):")
+print(pd.DataFrame(transformed, columns=cols).head())
diff --git a/sklearn/feature_extraction/_stop_words.py b/sklearn/feature_extraction/_stop_words.py
@@ -25,7 +25,7 @@
         "am",
         "among",
         "amongst",
-        "amoungst",
+        "amongst",
         "amount",
         "an",
         "and",
@@ -69,7 +69,7 @@
         "co",
         "con",
         "could",
-        "couldnt",
+        "couldn't",
         "cry",
         "de",
         "describe",
@@ -119,7 +119,7 @@
         "go",
         "had",
         "has",
-        "hasnt",
+        "hasn't",
         "have",
         "he",
         "hence",
@@ -183,7 +183,7 @@
         "no",
         "nobody",
         "none",
-        "noone",
+        "no one",
         "nor",
         "not",
         "nothing",
Original file line number	Diff line number	Diff line change
Expand Up		@@ -96,3 +96,4 @@ jupyterlite_contents

		# file recognised by vscode IDEs containing env variables
		.env
		scikit-learn/