<a href="https://colab.research.google.com/github/MayankHQ/machine_learning/blob/main/GradientDescent_GradientBoosting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# 1. The Function: y = x^2 (A simple bowl shape)
# The derivative (slope) is 2x
def gradient(x):
    return 2 * x

# 2. Setup
current_x = 10  # We start at x=10 (High up on the hill)
learning_rate = 0.1
steps = [] # To store our path

# 3. The Loop (Walking down the hill)
for i in range(10):
    steps.append(current_x)

    # --- THE MAGIC FORMULA ---
    # New = Old - (Learning Rate * Slope)
    slope = gradient(current_x)
    current_x = current_x - (learning_rate * slope)

    print(f"Step {i}: x = {current_x:.2f}, Slope = {slope:.2f}")

# You will see 'x' getting closer and closer to 0!

Step 0: x = 8.00, Slope = 20.00
Step 1: x = 6.40, Slope = 16.00
Step 2: x = 5.12, Slope = 12.80
Step 3: x = 4.10, Slope = 10.24
Step 4: x = 3.28, Slope = 8.19
Step 5: x = 2.62, Slope = 6.55
Step 6: x = 2.10, Slope = 5.24
Step 7: x = 1.68, Slope = 4.19
Step 8: x = 1.34, Slope = 3.36
Step 9: x = 1.07, Slope = 2.68


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# 1. Create Dummy Data (y = 3x + 5 roughly)
X = np.array([1, 2, 3, 4, 5])
y = np.array([5, 7, 9, 11, 13]) # Exactly 2x + 3 actually. Let's see if it finds m=2, c=3.

# 2. Initialize Parameters (Start from scratch)
m_current = 0
c_current = 0

learning_rate = 0.01 # Step size
epochs = 1000   # Number of steps
n = len(X)           # Number of data points

# 3. The Training Loop (Gradient Descent)
for i in range(epochs):
    # A. Make a prediction with current m and c
    y_predicted = (m_current * X) + c_current

    # B. Calculate the Derivatives (The Slope of the Error)
    # How much is 'm' wrong?
    dm = (-2/n) * sum(X * (y - y_predicted))

    # How much is 'c' wrong?
    dc = (-2/n) * sum(y - y_predicted)

    # C. Update the values (Move down the hill)
    m_current = m_current - (learning_rate * dm)
    c_current = c_current - (learning_rate * dc)

    # Print progress every 100 steps
    if i % 100 == 0:
        print(f"Epoch {i}: m = {m_current:.4f}, c = {c_current:.4f}")

print("------------------------------------------------")
print(f"Final Result: y = {m_current:.2f}x + {c_current:.2f}")
print(f"Actual Answer: y = 2.00x + 3.00")

Epoch 0: m = 0.6200, c = 0.1800
Epoch 100: m = 2.4470, c = 1.3864
Epoch 200: m = 2.3186, c = 1.8499
Epoch 300: m = 2.2270, c = 2.1803
Epoch 400: m = 2.1618, c = 2.4158
Epoch 500: m = 2.1153, c = 2.5836
Epoch 600: m = 2.0822, c = 2.7032
Epoch 700: m = 2.0586, c = 2.7885
Epoch 800: m = 2.0418, c = 2.8493
Epoch 900: m = 2.0298, c = 2.8926
------------------------------------------------
Final Result: y = 2.02x + 2.92
Actual Answer: y = 2.00x + 3.00


### **Gradient Boosting(XGBOOST)**

In [None]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score

# 1. Load Data
digits = load_digits()
X, y = digits.data, digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. Initialize Gradient Boosting
# n_estimators=100: We will have 100 "shots" (trees)
# learning_rate=0.1: The size of each correction (step size from yesterday!)
gb_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)

# 3. Train (This takes longer than Random Forest!)
print("Training Gradient Boosting... (Sequential = Slow)")
gb_model.fit(X_train, y_train)

# 4. Predict & Evaluate
y_pred = gb_model.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"Gradient Boosting Accuracy: {acc * 100:.2f}%")

Training Gradient Boosting... (Sequential = Slow)
Gradient Boosting Accuracy: 96.94%


In [None]:
import xgboost as xgb
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# 1. Load Data
digits = load_digits()
X, y = digits.data, digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. Initialize XGBoost
# use_label_encoder=False removes a warning
# eval_metric='mlogloss' removes another warning
model = xgb.XGBClassifier(n_estimators=100, learning_rate=0.1, eval_metric='mlogloss')

# 3. Train
print("Training XGBoost...")
model.fit(X_train, y_train)

# 4. Predict
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"XGBoost Accuracy: {acc * 100:.2f}%")

Training XGBoost...
XGBoost Accuracy: 96.11%
