In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

data = load_iris()
X, y = data.data, data.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("clf", LogisticRegression(max_iter=1000))
])

pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)

print("Confusion matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification report:\n", classification_report(y_test, y_pred))

scores = cross_val_score(pipe, X, y, cv=5)
print("CV accuracy:", scores.mean().round(3), "+/-", scores.std().round(3))


In [None]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

X, y = fetch_openml("mnist_784", version=1, return_X_y=True, as_frame=False)

X = X / 255.0
y = y.astype("int")

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

clf = LogisticRegression(max_iter=1000, solver="lbfgs", multi_class="multinomial", n_jobs=-1)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred).round(4))
print("\nClassification report:\n", classification_report(y_test, y_pred))


In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score

housing = fetch_california_housing()
X, y = housing.data, housing.target  
feature_names = housing.feature_names

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

medinc_idx = feature_names.index("MedInc")
X_train_simple = X_train[:, [medinc_idx]]
X_test_simple  = X_test[:, [medinc_idx]]

simple_model = LinearRegression()
simple_model.fit(X_train_simple, y_train)
y_pred_simple = simple_model.predict(X_test_simple)

multi_model = LinearRegression()
multi_model.fit(X_train, y_train)
y_pred_multi = multi_model.predict(X_test)

print("Simple LR — MAE:", mean_absolute_error(y_test, y_pred_simple).round(3),
      "| R^2:", r2_score(y_test, y_pred_simple).round(3))
print("Multiple LR — MAE:", mean_absolute_error(y_test, y_pred_multi).round(3),
      "| R^2:", r2_score(y_test, y_pred_multi).round(3))


In [None]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt

df = pd.read_csv("stocks.csv", parse_dates=["Date"])
df = df.sort_values("Date")

for lag in range(1, 6):
    df[f"lag_{lag}"] = df["Close"].shift(lag)

df = df.dropna().reset_index(drop=True)

X = df[[f"lag_{i}" for i in range(1, 6)]]
y = df["Close"]

X_train, X_test = X[:-60], X[-60:]
y_train, y_test = y[:-60], y[-60:]

model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
print("Linear Regression MAE:", round(mae, 3))

# Plot
plt.figure(figsize=(10,5))
plt.plot(df["Date"].iloc[-60:], y_test, label="Actual")
plt.plot(df["Date"].iloc[-60:], y_pred, label="Predicted")
plt.title("Stock Price Forecast (Linear Regression)")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.show()


In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

rng = np.random.RandomState(42)
n = 5000
humidity = rng.uniform(10, 100, n)
pressure = rng.uniform(980, 1030, n)
wind = rng.uniform(0, 20, n)
cloud = rng.uniform(0, 100, n)

temp = 30 - 0.2*humidity + 0.03*(pressure-1000) - 0.1*wind + 0.05*cloud + rng.normal(0, 1, n)
precip = np.clip(0.02*humidity + 0.01*cloud - 0.3*wind + rng.normal(0, 0.5, n), 0, None)

X = np.column_stack([humidity, pressure, wind, cloud])
Y = np.column_stack([temp, precip])

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

model = MultiOutputRegressor(RandomForestRegressor(random_state=42))
model.fit(X_train, y_train)
pred = model.predict(X_test)

mae_temp = mean_absolute_error(y_test[:,0], pred[:,0])
mae_prec = mean_absolute_error(y_test[:,1], pred[:,1])
print("MAE Temperature:", round(mae_temp, 3))
print("MAE Precipitation:", round(mae_prec, 3))


In [None]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

data = load_breast_cancer()
X, y = data.data, data.target

X_tr, X_te, y_tr, y_te = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("clf", LogisticRegression(max_iter=1000))
])

pipe.fit(X_tr, y_tr)
y_pred = pipe.predict(X_te)
print(classification_report(y_te, y_pred, target_names=data.target_names))


In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

rng = np.random.RandomState(0)
n_customers = 1000
recency = rng.gamma(shape=2.0, scale=10.0, size=n_customers) * 10
frequency = rng.gamma(shape=2.0, scale=2.0, size=n_customers)
monetary = rng.lognormal(mean=3.5, sigma=0.7, size=n_customers)

df = pd.DataFrame({"Recency": recency, "Frequency": frequency, "Monetary": monetary})

X = StandardScaler().fit_transform(df)
kmeans = KMeans(n_clusters=5, n_init=20, random_state=42)
labels = kmeans.fit_predict(X)

df["Segment"] = labels
print(df.groupby("Segment").agg({"Recency":"median", "Frequency":"median", "Monetary":"median"}).sort_index())


In [None]:
import numpy as np
import random

grid = np.array([
    [3,0,1,0,0,0],
    [0,0,1,0,1,0],
    [1,0,0,0,1,0],
    [0,1,1,0,0,0],
    [0,0,0,1,0,2],
])

start = tuple(np.argwhere(grid==3)[0])
goal  = tuple(np.argwhere(grid==2)[0])
n_rows, n_cols = grid.shape

actions = [(0,1),(0,-1),(1,0),(-1,0)]  
n_actions = len(actions)

def step(state, action_idx):
    r, c = state
    dr, dc = actions[action_idx]
    nr, nc = r+dr, c+dc
    if not (0 <= nr < n_rows and 0 <= nc < n_cols):  
        return state, -1, False
    if grid[nr, nc] == 1: 
        return state, -1, False
    if (nr, nc) == goal:
        return (nr, nc), 10, True
    return (nr, nc), -0.04, False  

alpha = 0.1
gamma = 0.95
epsilon = 1.0
epsilon_min = 0.05
epsilon_decay = 0.995
episodes = 2000
Q = np.zeros((n_rows, n_cols, n_actions))

def choose_action(state):
    if random.random() < epsilon:
        return random.randrange(n_actions)
    r, c = state
    return int(np.argmax(Q[r, c]))

for ep in range(episodes):
    state = start
    done = False
    while not done:
        a = choose_action(state)
        next_state, reward, done = step(state, a)
        r, c = state
        nr, nc = next_state
        Q[r, c, a] += alpha * (reward + gamma * np.max(Q[nr, nc]) - Q[r, c, a])
        state = next_state
    epsilon = max(epsilon_min, epsilon * epsilon_decay)

state = start
path = [state]
visited = set([state])
for _ in range(200):
    r, c = state
    a = int(np.argmax(Q[r, c]))
    state, _, done = step(state, a)
    if state in visited:
        break
    visited.add(state)
    path.append(state)
    if done:
        break

print("Start:", start, "| Goal:", goal)
print("Path length:", len(path))
print("Path:", path)


In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.datasets import fetch_openml

titanic = fetch_openml("titanic", version=1, as_frame=True)
df = titanic.frame

target = "survived"
features = ["pclass", "sex", "age", "sibsp", "parch", "fare", "embarked"]
df = df[features + [target]].copy()
df[target] = df[target].astype(int)

num_features = ["age", "sibsp", "parch", "fare"]
cat_features = ["pclass", "sex", "embarked"]

numeric_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="median"))
])

categorical_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("onehot", OneHotEncoder(handle_unknown="ignore"))
])

preprocess = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, num_features),
        ("cat", categorical_transformer, cat_features)
    ]
)

clf = Pipeline(steps=[
    ("preprocess", preprocess),
    ("model", LogisticRegression(max_iter=1000))
])

X = df[features]
y = df[target]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, stratify=y, test_size=0.2, random_state=42
)

clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, confusion_matrix


try:
    df = pd.read_csv("spam.csv")
except FileNotFoundError:
    df = pd.DataFrame({
        "text": [
            "Congratulations! You've won a free lottery. Call now!",
            "Meeting at 10am tomorrow. Please confirm.",
            "URGENT! Your account is compromised. Click the link to verify.",
            "Are we still on for lunch today?"
        ],
        "label": ["spam","ham","spam","ham"]
    })

X_train, X_test, y_train, y_test = train_test_split(
    df["text"], df["label"], test_size=0.2, random_state=42, stratify=df["label"]
)

pipe = Pipeline([
    ("tfidf", TfidfVectorizer(stop_words="english", ngram_range=(1,2), min_df=2)),
    ("clf", MultinomialNB())
])

pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)

print("Confusion matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification report:\n", classification_report(y_test, y_pred))
