Title: Regression Models

Linear Regression (Simple & Multiple)

Task 1: Differentiate between a labeled dataset of image classifications and an unlabeled dataset for clustering customer segments.

In [2]:
# Write your code here
import pandas as pd

# Labeled dataset example: Image classifications (Supervised Learning)
# Each image is associated with a label like 'cat', 'dog', etc.
labeled_images = pd.DataFrame({
    "ImageID": [101, 102, 103, 104],
    "PixelData": ["img_101.jpg", "img_102.jpg", "img_103.jpg", "img_104.jpg"],
    "Label": ["Cat", "Dog", "Cat", "Dog"]
})

print("Labeled Image Classification Dataset:")
print(labeled_images)

# Unlabeled dataset example: Customer data used for clustering (Unsupervised Learning)
# No labels, used to find patterns or groupings.
unlabeled_customers = pd.DataFrame({
    "CustomerID": [1, 2, 3, 4],
    "AnnualIncome": [45000, 54000, 32000, 72000],
    "SpendingScore": [39, 81, 6, 77]
})

print("\nUnlabeled Customer Segmentation Dataset:")
print(unlabeled_customers)


Labeled Image Classification Dataset:
   ImageID    PixelData Label
0      101  img_101.jpg   Cat
1      102  img_102.jpg   Dog
2      103  img_103.jpg   Cat
3      104  img_104.jpg   Dog

Unlabeled Customer Segmentation Dataset:
   CustomerID  AnnualIncome  SpendingScore
0           1         45000             39
1           2         54000             81
2           3         32000              6
3           4         72000             77


Task 2: Examine a problem statement and determine which learning type is applicable.<br>

In [3]:
# Write your code here
def determine_learning_type(problem_statement):
    """
    Determine the type of machine learning based on the problem statement.
    """
    # Keywords indicating supervised learning problems
    supervised_keywords = ["predict", "classify", "regression", "classification", "label", "target", "output", "supervised"]
    
    # Keywords indicating unsupervised learning problems
    unsupervised_keywords = ["cluster", "group", "segment", "association", "unsupervised", "pattern", "anomaly", "reduce dimensionality"]
    
    problem_lower = problem_statement.lower()
    
    if any(word in problem_lower for word in supervised_keywords):
        return "Supervised Learning"
    elif any(word in problem_lower for word in unsupervised_keywords):
        return "Unsupervised Learning"
    else:
        return "Cannot determine learning type from the statement"

# Examples of problem statements
problem1 = "Predict whether a customer will churn based on their service usage."
problem2 = "Group customers into distinct segments based on their purchase behavior."
problem3 = "Discover patterns in user browsing data."

print(f"Problem: {problem1}\nLearning Type: {determine_learning_type(problem1)}\n")
print(f"Problem: {problem2}\nLearning Type: {determine_learning_type(problem2)}\n")
print(f"Problem: {problem3}\nLearning Type: {determine_learning_type(problem3)}\n")


Problem: Predict whether a customer will churn based on their service usage.
Learning Type: Supervised Learning

Problem: Group customers into distinct segments based on their purchase behavior.
Learning Type: Unsupervised Learning

Problem: Discover patterns in user browsing data.
Learning Type: Unsupervised Learning



Task 3: Identify whether facial recognition systems use supervised or unsupervised learning and justify.

In [4]:
# Write your code here
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import numpy as np

# Simulated feature vectors for faces (e.g., extracted embeddings)
X = np.array([
    [0.1, 0.2, 0.1],  # Person A
    [0.2, 0.1, 0.2],  # Person A
    [0.9, 0.8, 0.7],  # Person B
    [0.8, 0.9, 0.6],  # Person B
    [0.4, 0.5, 0.4],  # Person C
    [0.5, 0.4, 0.5]   # Person C
])

# Labels corresponding to person identities
y = np.array(['Person A', 'Person A', 'Person B', 'Person B', 'Person C', 'Person C'])

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# Train a classifier (SVM here)
model = SVC(kernel='linear')
model.fit(X_train, y_train)

# Predict on test data
y_pred = model.predict(X_test)

# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.0
Classification Report:
               precision    recall  f1-score   support

    Person A       0.00      0.00      0.00       2.0
    Person C       0.00      0.00      0.00       0.0

    accuracy                           0.00       2.0
   macro avg       0.00      0.00      0.00       2.0
weighted avg       0.00      0.00      0.00       2.0



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
