# Import Required Libraries
Import the necessary libraries such as pandas, numpy, sklearn, and others.

In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# Load Dataset
Load the dataset that will be used for feature extraction.

In [None]:
data = pd.read_csv('path/to/your/dataset.csv')


data.head()

# Extract Numerical Features
Extract numerical features using techniques such as normalization, standardization, and polynomial features.

In [None]:
numerical_cols = data.select_dtypes(include=[np.number]).columns

min_max_scaler = MinMaxScaler()
normalized_features = min_max_scaler.fit_transform(data[numerical_cols])
normalized_df = pd.DataFrame(normalized_features, columns=numerical_cols)


standard_scaler = StandardScaler()
standardized_features = standard_scaler.fit_transform(data[numerical_cols])
standardized_df = pd.DataFrame(standardized_features, columns=numerical_cols)

from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=2, include_bias=False)
poly_features = poly.fit_transform(data[numerical_cols])
poly_feature_names = poly.get_feature_names_out(numerical_cols)
poly_df = pd.DataFrame(poly_features, columns=poly_feature_names)


normalized_df.head(), standardized_df.head(), poly_df.head()

# Extract Categorical Features
Extract categorical features using techniques such as one-hot encoding, label encoding, and frequency encoding.

In [None]:
from sklearn.preprocessing import OneHotEncoder, LabelEncoder

categorical_cols = data.select_dtypes(include=[object]).columns

one_hot_encoder = OneHotEncoder(sparse=False)
one_hot_encoded_features = one_hot_encoder.fit_transform(data[categorical_cols])
one_hot_encoded_df = pd.DataFrame(one_hot_encoded_features, columns=one_hot_encoder.get_feature_names_out(categorical_cols))

label_encoder = LabelEncoder()
label_encoded_df = data[categorical_cols].apply(label_encoder.fit_transform)

frequency_encoded_df = data[categorical_cols].apply(lambda x: x.map(x.value_counts()) / len(x))

one_hot_encoded_df.head(), label_encoded_df.head(), frequency_encoded_df.head()


# Extract Text Features
Extract text features using techniques such as TF-IDF, word embeddings, and n-grams.

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer

text_cols = data.select_dtypes(include=[object]).columns

tfidf_vectorizer = TfidfVectorizer()
tfidf_features = tfidf_vectorizer.fit_transform(data[text_cols[0]])
tfidf_df = pd.DataFrame(tfidf_features.toarray(), columns=tfidf_vectorizer.get_feature_names_out())

ngram_vectorizer = CountVectorizer(ngram_range=(1, 2))
ngram_features = ngram_vectorizer.fit_transform(data[text_cols[0]])
ngram_df = pd.DataFrame(ngram_features.toarray(), columns=ngram_vectorizer.get_feature_names_out())

tfidf_df.head(), ngram_df.head()


# Extract Image Features
Extract image features using techniques such as edge detection, color histograms, and convolutional neural networks.

In [None]:
import cv2
import matplotlib.pyplot as plt
from skimage.feature import hog
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.preprocessing import image
from keras.models import Model

img_path = 'path/to/your/image.jpg'
img = cv2.imread(img_path)
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

plt.imshow(img_rgb)
plt.axis('off')
plt.show()

edges = cv2.Canny(img, 100, 200)

plt.imshow(edges, cmap='gray')
plt.axis('off')
plt.show()

color_hist = cv2.calcHist([img], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
color_hist = cv2.normalize(color_hist, color_hist).flatten()

plt.plot(color_hist)
plt.show()

gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
hog_features, hog_image = hog(gray_img, pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualize=True, multichannel=False)

plt.imshow(hog_image, cmap='gray')
plt.axis('off')
plt.show()

model = VGG16(weights='imagenet', include_top=False)
model = Model(inputs=model.inputs, outputs=model.layers[-1].output)

img_resized = cv2.resize(img_rgb, (224, 224))
x = image.img_to_array(img_resized)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)

vgg16_features = model.predict(x)
vgg16_features = vgg16_features.flatten()

vgg16_features.shape
