Title: Popular Classification Algorithms


K Nearst Neighbors (KNN)

Task 1: Classify fruits based on weight and color.

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report

# Step 1: Create example dataset
data = {
    'Weight': [150, 170, 140, 130, 180, 160, 120, 110, 100, 90],
    'Color': ['Red', 'Red', 'Green', 'Green', 'Yellow', 'Yellow', 'Red', 'Green', 'Yellow', 'Red'],
    'Fruit': ['Apple', 'Apple', 'Apple', 'Apple', 'Banana', 'Banana', 'Apple', 'Apple', 'Banana', 'Apple']
}

df = pd.DataFrame(data)

# Step 2: Encode color (categorical) to numeric
le_color = LabelEncoder()
df['Color_encoded'] = le_color.fit_transform(df['Color'])

# Step 3: Features and labels
X = df[['Weight', 'Color_encoded']]
y = df['Fruit']

# Step 4: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 5: Train Decision Tree classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Step 6: Predict on test set
y_pred = clf.predict(X_test)

# Step 7: Evaluate
print("Classification Report:\n", classification_report(y_test, y_pred))

# Optional: Show some predictions
print("\nTest set predictions:")
for i, pred in enumerate(y_pred):
    print(f"Input: Weight={X_test.iloc[i, 0]}, Color={le_color.inverse_transform([X_test.iloc[i,1]])[0]} --> Predicted Fruit: {pred}")


Classification Report:
               precision    recall  f1-score   support

       Apple       0.00      0.00      0.00       1.0
      Banana       0.00      0.00      0.00       2.0

    accuracy                           0.00       3.0
   macro avg       0.00      0.00      0.00       3.0
weighted avg       0.00      0.00      0.00       3.0


Test set predictions:
Input: Weight=100, Color=Yellow --> Predicted Fruit: Apple
Input: Weight=170, Color=Red --> Predicted Fruit: Banana
Input: Weight=160, Color=Yellow --> Predicted Fruit: Apple


Task 2: Predict customer clothing size based on height and weight.

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import numpy as np

# Example dataset
data = {
    'Height_cm': [160, 170, 180, 155, 165, 175, 185, 150, 172, 168],
    'Weight_kg': [55, 65, 80, 50, 60, 75, 85, 45, 68, 63],
    'Size': ['S', 'M', 'L', 'S', 'M', 'L', 'L', 'S', 'M', 'M']
}

df = pd.DataFrame(data)

# Features and labels
X = df[['Height_cm', 'Weight_kg']]
y = df['Size']

# Encode target labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.3, random_state=42)

# Train classifier
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

# Predict on test set
y_pred = clf.predict(X_test)

# Fix: Get unique labels in test set
unique_labels = np.unique(y_test)

# Print classification report with matching labels and target_names
print("Classification Report:")
print(classification_report(
    y_test,
    y_pred,
    labels=unique_labels,
    target_names=le.inverse_transform(unique_labels)
))

# Optional: Show predictions
print("\nTest set predictions:")
for i in range(len(y_pred)):
    print(f"Height: {X_test.iloc[i]['Height_cm']} cm, Weight: {X_test.iloc[i]['Weight_kg']} kg --> Predicted Size: {le.inverse_transform([y_pred[i]])[0]}")


Classification Report:
              precision    recall  f1-score   support

           L       1.00      1.00      1.00         1
           M       1.00      1.00      1.00         2

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3


Test set predictions:
Height: 172 cm, Weight: 68 kg --> Predicted Size: M
Height: 170 cm, Weight: 65 kg --> Predicted Size: M
Height: 175 cm, Weight: 75 kg --> Predicted Size: L


Task 3: Determine optimal movie recommendation based on viewer preferences.

In [3]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Sample movie dataset with features
movies = pd.DataFrame({
    'MovieID': [1, 2, 3, 4, 5],
    'Title': ['Action Movie', 'Romantic Comedy', 'Sci-Fi Epic', 'Drama Film', 'Horror Thriller'],
    'Action': [1, 0, 1, 0, 0],
    'Romance': [0, 1, 0, 1, 0],
    'SciFi': [0, 0, 1, 0, 0],
    'Drama': [0, 0, 0, 1, 0],
    'Horror': [0, 0, 0, 0, 1]
})

# User preferences: importance scores for each genre (scale 0-5)
user_preferences = pd.DataFrame({
    'Action': [5],
    'Romance': [2],
    'SciFi': [4],
    'Drama': [1],
    'Horror': [0]
})

# Step 1: Extract movie feature vectors
movie_features = movies.loc[:, 'Action':'Horror']

# Step 2: Scale features (optional but recommended)
scaler = StandardScaler()
movie_features_scaled = scaler.fit_transform(movie_features)
user_pref_scaled = scaler.transform(user_preferences)

# Step 3: Compute cosine similarity between user preferences and each movie
similarity_scores = cosine_similarity(user_pref_scaled, movie_features_scaled)

# Step 4: Add similarity scores to movies DataFrame
movies['Similarity'] = similarity_scores.flatten()

# Step 5: Recommend movies sorted by similarity score
recommended_movies = movies.sort_values(by='Similarity', ascending=False)

print("Movies recommended based on viewer preferences:")
print(recommended_movies[['Title', 'Similarity']])


Movies recommended based on viewer preferences:
             Title  Similarity
2      Sci-Fi Epic    0.754387
0     Action Movie    0.140372
3       Drama Film   -0.116060
1  Romantic Comedy   -0.386024
4  Horror Thriller   -0.508699
