## Dataset Creation

In [5]:
import pandas as pd
import numpy as np

# Creating a sample dataset
data = {
    'feature1': np.random.randn(100) * 10 + 50,   # Normally distributed data
    'feature2': np.random.rand(100) * 100,        # Uniformly distributed data
    'feature3': np.random.randint(1, 10, 100),    # Discrete data
    'feature4': np.random.exponential(scale=2, size=100)  # Exponentially distributed data
}

df = pd.DataFrame(data)
df.head()

Unnamed: 0,feature1,feature2,feature3,feature4
0,48.415547,19.319199,8,1.916139
1,37.457147,11.007735,9,3.310267
2,45.723769,56.941749,5,1.348817
3,46.220445,37.645629,4,0.035996
4,53.885336,58.753865,3,0.049964


## Log Transformation

In [6]:
# Log transformation
df['log_feature1'] = np.log(df['feature1'] - df['feature1'].min() + 1)
df['log_feature4'] = np.log(df['feature4'] - df['feature4'].min() + 1)

In [7]:
df.head()

Unnamed: 0,feature1,feature2,feature3,feature4,log_feature1,log_feature4
0,48.415547,19.319199,8,1.916139,3.17728,1.05784
1,37.457147,11.007735,9,3.310267,2.566719,1.452613
2,45.723769,56.941749,5,1.348817,3.058221,0.838468
3,46.220445,37.645629,4,0.035996,3.081283,0.0
4,53.885336,58.753865,3,0.049964,3.382735,0.013871


## Scaling

In [8]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
df[['scaled_feature1', 'scaled_feature2', 'scaled_feature3', 'scaled_feature4']] = scaler.fit_transform(df[['feature1', 'feature2', 'feature3', 'feature4']])


In [9]:
df.head()

Unnamed: 0,feature1,feature2,feature3,feature4,log_feature1,log_feature4,scaled_feature1,scaled_feature2,scaled_feature3,scaled_feature4
0,48.415547,19.319199,8,1.916139,3.17728,1.05784,-0.179342,-1.017223,1.050594,-0.01221
1,37.457147,11.007735,9,3.310267,2.566719,1.452613,-1.210347,-1.301358,1.424471,0.831275
2,45.723769,56.941749,5,1.348817,3.058221,0.838468,-0.432594,0.268937,-0.071037,-0.355456
3,46.220445,37.645629,4,0.035996,3.081283,0.0,-0.385865,-0.390718,-0.444913,-1.149749
4,53.885336,58.753865,3,0.049964,3.382735,0.013871,0.335275,0.330886,-0.81879,-1.141298


## Engineer New Features

In [10]:
# Create a new feature by combining feature1 and feature2.
df['feature1_plus_feature2'] = df['feature1'] + df['feature2']

In [12]:
# Extracting Relevant Information
# Extracting the square root of feature2 to create a new feature.
df['sqrt_feature2'] = np.sqrt(df['feature2'])

In [13]:
df.head()

Unnamed: 0,feature1,feature2,feature3,feature4,log_feature1,log_feature4,scaled_feature1,scaled_feature2,scaled_feature3,scaled_feature4,feature1_plus_feature2,sqrt_feature2
0,48.415547,19.319199,8,1.916139,3.17728,1.05784,-0.179342,-1.017223,1.050594,-0.01221,67.734746,4.395361
1,37.457147,11.007735,9,3.310267,2.566719,1.452613,-1.210347,-1.301358,1.424471,0.831275,48.464883,3.317791
2,45.723769,56.941749,5,1.348817,3.058221,0.838468,-0.432594,0.268937,-0.071037,-0.355456,102.665518,7.545976
3,46.220445,37.645629,4,0.035996,3.081283,0.0,-0.385865,-0.390718,-0.444913,-1.149749,83.866074,6.135603
4,53.885336,58.753865,3,0.049964,3.382735,0.013871,0.335275,0.330886,-0.81879,-1.141298,112.639201,7.665107
