In [None]:
#1. Why does this feature selection with chi2 fail?
from sklearn.feature_selection import SelectKBest, chi2

X = [[-1, 2, 3], [4, 0, -6], [7, 8, 9], [1, 2, 3]]
y = [0, 1, 0, 1]

selector = SelectKBest(score_func=chi2, k=2)
X_new = selector.fit_transform(X, y)


In [9]:
#ans – chi2 requires non-negative values, so scaling is needed.
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import SelectKBest, chi2

X = [[-1, 2, 3], [4, 0, -6], [7, 8, 9], [1, 2, 3]]
y = [0, 1, 0, 1]

X_scaled = MinMaxScaler().fit_transform(X)
selector = SelectKBest(score_func=chi2, k=2)
X_new = selector.fit_transform(X_scaled, y)

print(X_new)


[[0.25 0.6 ]
 [0.   0.  ]
 [1.   1.  ]
 [0.25 0.6 ]]


In [None]:
#2.
from sklearn.metrics.pairwise import cosine_similarity

# Error: strings instead of numerical features
user1 = ['action', 'comedy', 'romance']
user2 = ['action', 'thriller', 'comedy']

similarity = cosine_similarity([user1], [user2])


In [10]:
# ✅ Fix: Use binary/encoded vectors
from sklearn.metrics.pairwise import cosine_similarity

user1 = [1, 1, 1, 0, 0]  # action, comedy, romance
user2 = [1, 1, 0, 1, 0]  # action, comedy, thriller

similarity = cosine_similarity([user1], [user2])
print(similarity)


[[0.66666667]]


In [None]:
#3.
from sklearn.decomposition import PCA

X = [[1, 2], [3, 4], [5, 6]]
pca = PCA(n_components=5)  # more components than features
X_new = pca.fit_transform(X)


In [11]:
# ✅ Fix: n_components must be <= number of features
from sklearn.decomposition import PCA

X = [[1, 2], [3, 4], [5, 6]]
pca = PCA(n_components=2)
X_new = pca.fit_transform(X)


In [None]:
#4.
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

ratings = np.array([[5, 3, np.nan], [4, np.nan, 3]])

similarity = cosine_similarity(ratings)


In [12]:
# ✅ Fix: Fill missing values before similarity
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

ratings = np.array([[5, 3, np.nan], [4, np.nan, 3]])
ratings_filled = np.nan_to_num(ratings)

similarity = cosine_similarity(ratings_filled)


In [None]:
#5.
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression

selector = RFE(estimator=LogisticRegression(), n_features_to_select=5)
selector.fit(X, y)  # Error: X and y not defined


In [13]:
# ✅ Fix: Define X and y before using selector
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression

X = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [2, 3, 4]]
y = [0, 1, 0, 1]

selector = RFE(estimator=LogisticRegression(), n_features_to_select=2)
selector.fit(X, y)
print(selector.support_)


[False  True  True]


In [None]:
#6.from sklearn.metrics.pairwise import cosine_similarity

items = ["action movie", "romantic comedy", "sci-fi thriller"]
similarity = cosine_similarity([items[0]], [items[1]])


In [14]:
# ✅ Fix: Convert text to vector using TF-IDF
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

items = ["action movie", "romantic comedy", "sci-fi thriller"]
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(items)

similarity = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
print(similarity)


[[0.]]


In [None]:
#7.
from sklearn.feature_selection import SelectKBest, f_classif

X = [[1, 2], [3, 4], [5, 6]]
y = [0, 1]  # Error: mismatched dimensions
selector = SelectKBest(score_func=f_classif, k=1)
X_new = selector.fit_transform(X, y)


In [15]:
# ✅ Fix: Length of y must match X
from sklearn.feature_selection import SelectKBest, f_classif

X = [[1, 2], [3, 4], [5, 6]]
y = [0, 1, 0]

selector = SelectKBest(score_func=f_classif, k=1)
X_new = selector.fit_transform(X, y)


In [None]:
#8.
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

ratings = np.array([5, 4, 3])
cosine_similarity(ratings, ratings)


In [16]:
# ✅ Fix: Reshape to 2D array
ratings = np.array([5, 4, 3]).reshape(1, -1)
cosine_similarity(ratings, ratings)


array([[1.]])

In [None]:
#9.
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest, chi2

X = [[1, 2], [3, None], [4, 5]]
y = [0, 1, 0]

pipeline = Pipeline([
    ('select', SelectKBest(score_func=chi2, k=1))
])

pipeline.fit(X, y)


In [17]:
# ✅ Fix: Fill or remove missing values
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest, chi2
import numpy as np

X = [[1, 2], [3, 2], [4, 5]]
y = [0, 1, 0]

pipeline = Pipeline([
    ('select', SelectKBest(score_func=chi2, k=1))
])

pipeline.fit(X, y)


Pipeline(steps=[('select',
                 SelectKBest(k=1,
                             score_func=<function chi2 at 0x000001E204A83948>))])

In [None]:
#10.
from scipy.sparse import csr_matrix
from sklearn.feature_selection import SelectKBest, chi2

X = csr_matrix([[0, -1, 3], [0, 2, 1], [3, -1, 0]])
y = [0, 1, 0]

selector = SelectKBest(score_func=chi2, k=2)
X_new = selector.fit_transform(X, y)


In [18]:
# ✅ Fix: chi2 requires non-negative input
from scipy.sparse import csr_matrix
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import SelectKBest, chi2

X = [[0, -1, 3], [0, 2, 1], [3, -1, 0]]
y = [0, 1, 0]

X_scaled = MinMaxScaler().fit_transform(X)
selector = SelectKBest(score_func=chi2, k=2)
X_new = selector.fit_transform(X_scaled, y)
