## 7. Transforming Variables in the Bike Sharing Dataset
   - Task: Apply transformations like log, square root, and Box-Cox transformations to skewed variables in the Bike Sharing dataset.
   - Dataset: Bike Sharing Dataset



## Data Loading



In [None]:
import pandas as pd
import numpy as np
from scipy.stats import boxcox
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('/kaggle/input/bike-sharing/day.csv')
df.head()

In [None]:
df.columns

##  Identify Skewed Variables

In [None]:
# Plot histograms for relevant variables
skewed_vars = ['temp', 'atemp', 'hum', 'windspeed', 'casual', 'registered', 'cnt']
df[skewed_vars].hist(bins=30, figsize=(15, 10))
plt.tight_layout()
plt.show()


## Apply Transformations

In [None]:
# Apply log transformation, adding a small constant to avoid log(0)

df['log_casual'] = np.log1p(df['casual'])
df['log_registered'] = np.log1p(df['registered'])
df['log_cnt'] = np.log1p(df['cnt'])


In [None]:
# Apply square root transformation

df['sqrt_casual'] = np.sqrt(df['casual'])
df['sqrt_registered'] = np.sqrt(df['registered'])
df['sqrt_cnt'] = np.sqrt(df['cnt'])


In [None]:
# Apply Box-Cox transformation to windspeed and humidity

df['windspeed_boxcox'], _ = boxcox(df['windspeed'] + 1)  # Adding 1 to avoid zero values
df['hum_boxcox'], _ = boxcox(df['hum'] + 1)


In [None]:
# Plot histograms for transformed variables

transformed_vars = ['log_casual', 'log_registered', 'log_cnt', 'sqrt_casual', 'sqrt_registered', 'sqrt_cnt', 'windspeed_boxcox', 'hum_boxcox']
df[transformed_vars].hist(bins=30, figsize=(15, 10))
plt.tight_layout()
plt.show()
