# Source A Gulli
https://docs.google.com/document/d/1GWShQ74DwZRUVs4e0yoS3rYmBxUVR-x4N_Xt5xl5dtE/edit?tab=t.kivct9twtv3c

## Traditional Programming -- Rule Based Classification

In [0]:
# We define a function using the 'def' keyword.
# This function takes one input, 'number'.
def classify_number(number):
    """
    This function takes a number and returns a string label:
    "small", "medium", or "large" based on hand-written rules.
    """
    print(f"Analyzing the number: {number}")

    # The 'if' statement checks a condition.
    # If the number is less than 10, the indented code below runs.
    if number < 10:
        return "small"
    
    # 'elif' is short for "else if". It checks another condition.
    elif number < 100:
        return "medium"
    
    # 'else' runs if none of the above conditions were true.
    else:
        return "large"

In [0]:
# Now, let's test our function.
result1 = classify_number(5)
print(f"The result is: {result1}")

result2 = classify_number(50)
print(f"The result is: {result2}")

result3 = classify_number(500)

## Traditional Programming: Rule-based Clustering

In [0]:
# Here is our unlabeled list of words.
words = ["apple", "banana", "ant", "boat", "car", "cat", "anchor"]


In [0]:
words

In [0]:
# We will store our groups in a dictionary.
# A dictionary stores key-value pairs, like {"a": ["apple", "ant"]}.
grouped_words = {}

# A 'for' loop lets us check every word in our list.
for word in words:
    # Get the first letter of the current word.
    first_letter = word[0]
    
    # Check if we have already started a group for this letter.
    if first_letter not in grouped_words:
        # If not, create a new empty list for this letter.
        grouped_words[first_letter] = []
    
    # Add the current word to the group for its first letter.
    grouped_words[first_letter].append(word)


In [0]:
grouped_words

## Machine Learning -- Statiscal Model Classification

In [0]:
import numpy as np
import matplotlib.pyplot as plt

# The Features (X): [Action Score, Comedy Score]
X = np.array([
    [9, 2], [3, 8], [8, 1], [4, 9],  # Liked movies
    [2, 1], [1, 3], [4, 2], [3, 3]   # Disliked movies
])

# The Labels (y): 1 for "Liked", 0 for "Disliked"
y = np.array([1, 1, 1, 1, 0, 0, 0, 0])

In [0]:
import pandas as pd

In [0]:
data = {'Action Score': X[:, 0], 'Comedy Score': X[:, 1], "Sarah's Verdict": y}
df = pd.DataFrame(data)
df

In [0]:
# The new, unknown movie we want to classify
new_movie = np.array([7, 6])
new_movie

## How do we know what to classify it as?

In [0]:


# --- Visualization ---
plt.figure(figsize=(8, 6))

# Plot the "Liked" movies in green
plt.scatter(X[y == 1, 0], X[y == 1, 1], c='green', marker='o', label='Liked')

# Plot the "Disliked" movies in red
plt.scatter(X[y == 0, 0], X[y == 0, 1], c='red', marker='x', label='Disliked')

# Plot the new movie as a blue star
plt.scatter(new_movie[0], new_movie[1], c='blue', marker='*', s=150, label='New Movie')

plt.title("Sarah's Movie Tastes")
plt.xlabel("Action Score")
plt.ylabel("Comedy Score")
plt.legend()
plt.grid(True)
plt.show()


In [0]:
# --- The Three Magic Lines for KNN ---

# 1. Import the model we want to use.
from sklearn.neighbors import KNeighborsClassifier

# 2. Instantiate the model.
# We choose K (the number of neighbors) to be 3.
model = KNeighborsClassifier(n_neighbors=3)

# 3. Fit the model to our data.
# For KNN, this step simply memorizes the data's location.
model.fit(X, y)

print("Training complete! The KNN model has stored the data.")


In [0]:
# Our new movie data, reshaped for Scikit-learn
new_movie = np.array([[7, 6]])

# Use the trained model to make a prediction.
prediction = model.predict(new_movie)

# Let's print the result in a user-friendly way.
if prediction[0] == 1:
    print("Prediction: Sarah will LIKE this movie.")
else:
    print("Prediction: Sarah will DISLIKE this movie.")


In [0]:
# (This code assumes you have run the previous examples)
from matplotlib.colors import ListedColormap

# Create a color map for the background
cmap_light = ListedColormap(['#FFCCCC', '#CCFFCC']) # Red and Green background colors

# Create a grid of points to classify
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
                     np.arange(y_min, y_max, 0.1))

# Get predictions for every point on the grid
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

# --- Plot the Decision Boundary ---
plt.figure(figsize=(8, 6))
plt.contourf(xx, yy, Z, cmap=cmap_light)

# Plot the original data points on top
plt.scatter(X[y == 1, 0], X[y == 1, 1], c='green', marker='o', label='Liked')
plt.scatter(X[y == 0, 0], X[y == 0, 1], c='red', marker='x', label='Disliked')
plt.scatter(new_movie[0,0], new_movie[0,1], c='blue', marker='*', s=150, label='New Movie')

plt.title("KNN Decision Boundary (K=3)")
plt.xlabel("Action Score")
plt.ylabel("Comedy Score")
plt.legend()
plt.grid(True)
plt.show()
