In [8]:
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures

# Sample housing dataset
data = {
    'Bedrooms': [2, 3, 4, 3, 5],
    'Bathrooms': [1, 2, 3, 2, 4],
    'Sq Footage': [1000, 1500, 2000, 1300, 2500]
}
 
# Convert to DataFrame
df = pd.DataFrame(data)

# Initialize PolynomialFeatures with degree 2 (quadratic)
poly = PolynomialFeatures(degree = 2, include_bias = False)

 
# Apply polynomial transformation to the dataset
poly_feature = poly.fit_transform(df[['Bedrooms', 'Bathrooms', 'Sq Footage']])

# Get the feature names after polynomial transformation
poly_feature_name = poly.get_feature_names_out(['Bedrooms', 'Bathrooms', 'Sq Footage'])

# Convert the transformed data back to a DataFrame with appropriate column names
poly_df = pd.DataFrame(poly_feature, columns = poly_feature_name)
poly_df


#columns are already present in the transformed columns its better to drop the original columns it will increase the reductancy to the dataset
 #Join the original data with the new polynomial features (adding suffix to avoid overlap)
#df_combine = df.join(poly_df, rsuffix = '_poly')

#df_combine
 

Unnamed: 0,Bedrooms,Bathrooms,Sq Footage,Bedrooms^2,Bedrooms Bathrooms,Bedrooms Sq Footage,Bathrooms^2,Bathrooms Sq Footage,Sq Footage^2
0,2.0,1.0,1000.0,4.0,2.0,2000.0,1.0,1000.0,1000000.0
1,3.0,2.0,1500.0,9.0,6.0,4500.0,4.0,3000.0,2250000.0
2,4.0,3.0,2000.0,16.0,12.0,8000.0,9.0,6000.0,4000000.0
3,3.0,2.0,1300.0,9.0,6.0,3900.0,4.0,2600.0,1690000.0
4,5.0,4.0,2500.0,25.0,20.0,12500.0,16.0,10000.0,6250000.0


In [None]:
#Interaction Features Using Python

In [9]:
from sklearn.preprocessing import PolynomialFeatures
import pandas as pd

# Structured dataset
data = {
    'Bedrooms': [2, 3, 4],
    'Bathrooms': [1, 2, 3]
}

df = pd.DataFrame(data)

# Create interaction features
poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)
X_interact = poly.fit_transform(df)

# Get feature names (including interactions)
columns = poly.get_feature_names_out(['Bedrooms', 'Bathrooms'])
df_interact = pd.DataFrame(X_interact, columns=columns)

print("🔹 Feature Creation Result (with Interaction):")
print(df_interact)


🔹 Feature Creation Result (with Interaction):
   Bedrooms  Bathrooms  Bedrooms Bathrooms
0       2.0        1.0                 2.0
1       3.0        2.0                 6.0
2       4.0        3.0                12.0


In [10]:
#Feature Extraction (from raw text data)

In [15]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Raw unstructured text data
text_data = [
    "Machine learning is amazing",
    "Deep learning and machine learning",
    "Natural language processing is part of AI"
]

vectorizer = TfidfVectorizer()

x_tfidv = vectorizer.fit_transform(text_data)

df_tfidv = pd.DataFrame(x_tfidv.toarray(), columns = vectorizer.get_feature_names_out())


print("Feature Extraction Result (TF-IDF):")
print(df_tfidv)

Feature Extraction Result (TF-IDF):
         ai   amazing       and      deep        is  language  learning  \
0  0.000000  0.604652  0.000000  0.000000  0.459854  0.000000  0.459854   
1  0.000000  0.000000  0.452123  0.452123  0.000000  0.000000  0.687703   
2  0.389888  0.000000  0.000000  0.000000  0.296520  0.389888  0.000000   

    machine   natural        of      part  processing  
0  0.459854  0.000000  0.000000  0.000000    0.000000  
1  0.343851  0.000000  0.000000  0.000000    0.000000  
2  0.000000  0.389888  0.389888  0.389888    0.389888  
