Title: Popular Classification Algorithms


K Nearst Neighbors (KNN)

Task 1: Classify fruits based on weight and color.

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Sample dataset
data = {
    'weight': [150, 170, 140, 130, 180, 120, 160, 110],
    'color': ['red', 'green', 'red', 'yellow', 'green', 'yellow', 'red', 'green'],
    'fruit': ['apple', 'apple', 'apple', 'banana', 'apple', 'banana', 'apple', 'banana']
}

df = pd.DataFrame(data)

# Encode color and fruit labels
le_color = LabelEncoder()
le_fruit = LabelEncoder()

df['color_encoded'] = le_color.fit_transform(df['color'])   # red=2, green=1, yellow=0
df['fruit_encoded'] = le_fruit.fit_transform(df['fruit'])   # apple=0, banana=1

# Features and target
X = df[['weight', 'color_encoded']]
y = df['fruit_encoded']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Train a classifier
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred, target_names=le_fruit.classes_))

# Optional: predict on a new fruit
new_data = pd.DataFrame({'weight': [145], 'color': ['yellow']})
new_data['color_encoded'] = le_color.transform(new_data['color'])
prediction = model.predict(new_data[['weight', 'color_encoded']])
predicted_fruit = le_fruit.inverse_transform(prediction)
print("Predicted Fruit:", predicted_fruit[0])



Task 2: Predict customer clothing size based on height and weight.

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

# Sample dataset: height (cm), weight (kg), and clothing size
data = {
    'height': [160, 165, 170, 175, 180, 185, 150, 155, 190, 195],
    'weight': [55, 60, 65, 70, 75, 80, 45, 50, 90, 95],
    'size': ['S', 'S', 'M', 'M', 'L', 'L', 'S', 'S', 'XL', 'XL']
}

df = pd.DataFrame(data)

# Encode clothing size labels
df['size_encoded'] = df['size'].map({'S': 0, 'M': 1, 'L': 2, 'XL': 3})

# Features and target
X = df[['height', 'weight']]
y = df['size_encoded']

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Decision Tree Classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Predict
y_pred = clf.predict(X_test)

# Decode predicted size
size_map = {0: 'S', 1: 'M', 2: 'L', 3: 'XL'}
decoded_pred = [size_map[i] for i in y_pred]
decoded_true = [size_map[i] for i in y_test]

# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(decoded_true, decoded_pred))

# Optional: Predict a new customer's size
new_customer = pd.DataFrame({'height': [172], 'weight': [68]})
predicted_size = size_map[clf.predict(new_customer)[0]]
print("Predicted clothing size for customer:", predicted_size)



Task 3: Determine optimal movie recommendation based on viewer preferences.

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Sample movie dataset
data = {
    'title': [
        'The Matrix', 'Inception', 'The Godfather', 'The Dark Knight',
        'Pulp Fiction', 'Interstellar', 'The Hangover', 'Titanic'
    ],
    'genre': [
        'Action Sci-Fi', 'Action Sci-Fi Thriller', 'Crime Drama', 'Action Crime Drama',
        'Crime Drama', 'Adventure Drama Sci-Fi', 'Comedy', 'Romance Drama'
    ]
}

df = pd.DataFrame(data)

# Viewer preference (this would usually come from user profile or input)
viewer_preferences = "Action Sci-Fi Thriller"

# Convert genres and preferences into TF-IDF vectors
vectorizer = TfidfVectorizer()
genre_matrix = vectorizer.fit_transform(df['genre'])

# Transform viewer preference using the same vectorizer
preference_vector = vectorizer.transform([viewer_preferences])

# Compute cosine similarity between viewer preference and each movie
similarities = cosine_similarity(preference_vector, genre_matrix).flatten()

# Get the top recommended movie
top_index = similarities.argmax()
recommended_movie = df.iloc[top_index]['title']

# Display result
print("🎬 Recommended Movie:", recommended_movie)
print("🔍 Similarity Score:", similarities[top_index])

