In [6]:
import pandas as pd
import numpy as np

clean_df = pd.read_excel('./SpotifyDataCleaned.xlsx')

# Feature Engineering

# 1. Date and Time Features from 'release_date'
clean_df['release_date'] = pd.to_datetime(clean_df['release_date'])
clean_df['release_year'] = clean_df['release_date'].dt.year
clean_df['release_month'] = clean_df['release_date'].dt.month
clean_df['release_day'] = clean_df['release_date'].dt.day
clean_df['release_dayofweek'] = clean_df['release_date'].dt.dayofweek


# 2. Numerical Feature Transformation
# Example: Log transformation of 'loudness' (ensure no zero or negative values before transformation)
clean_df['loudness_log'] = np.log(clean_df['loudness'] - clean_df['loudness'].min() + 1)

# 3. Interaction Features
# Example: Interaction between 'energy' and 'danceability'
clean_df['energy_danceability_interaction'] = clean_df['energy'] * clean_df['danceability']


# Drop 'release_date' and 'collab' and, it is not used in the project
clean_df.drop(['release_date', 'collab',], axis=1, inplace=True)

# Save the engineered dataset

clean_df.to_excel('SpotifyDataEngineer.xlsx', index=False)
