In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

In [3]:
# Generate a synthetic dataset
np.random.seed(0)
X = np.random.rand(100, 5)
y = np.random.choice(['class1', 'class2'], size=100)

In [4]:
# One-hot encoding for the target variable if necessary
encoder = OneHotEncoder(sparse=False)
y_encoded = encoder.fit_transform(y.reshape(-1, 1))



In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [6]:
# Train a k-NN classifier
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
knn_predictions = knn.predict(X_test)
knn_predictions

array(['class1', 'class2', 'class2', 'class1', 'class2', 'class1',
       'class2', 'class2', 'class2', 'class1', 'class1', 'class2',
       'class1', 'class2', 'class2', 'class2', 'class1', 'class1',
       'class1', 'class2'], dtype='<U6')

In [7]:
# Calculate accuracy for k-NN
knn_accuracy = accuracy_score(y_test, knn_predictions)
print(f'k-NN Accuracy: {knn_accuracy}')

k-NN Accuracy: 0.55


In [8]:
# Print classification report for k-NN
print('k-NN Classification Report:')
print(classification_report(y_test, knn_predictions))

k-NN Classification Report:
              precision    recall  f1-score   support

      class1       0.67      0.50      0.57        12
      class2       0.45      0.62      0.53         8

    accuracy                           0.55        20
   macro avg       0.56      0.56      0.55        20
weighted avg       0.58      0.55      0.55        20



In [9]:
# Train a Logistic Regression model
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
log_reg_predictions = log_reg.predict(X_test)

In [10]:
# Train a Decision Tree classifier
decision_tree = DecisionTreeClassifier()
decision_tree.fit(X_train, y_train)
decision_tree_predictions = decision_tree.predict(X_test)

In [11]:
# Evaluate a model with cross-validation (example with Decision Tree)
cv_scores = cross_val_score(decision_tree, X, y, cv=5)
print(f'Cross-Validation Scores for Decision Tree: {cv_scores}')
print(f'Mean CV Score: {cv_scores.mean()}')

Cross-Validation Scores for Decision Tree: [0.35 0.6  0.55 0.5  0.4 ]
Mean CV Score: 0.48


In [12]:
# Perform one-hot encoding for features if necessary (example with a new categorical feature)
X_new = np.random.choice(['A', 'B', 'C'], size=(100, 1))  # New categorical feature
encoder = OneHotEncoder(sparse=False)
X_encoded = encoder.fit_transform(X_new)
print('One-Hot Encoded Features:')
print(X_encoded)

One-Hot Encoded Features:
[[0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [1. 0. 0.]
 [1. 0. 0.]
 [

