In [12]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

df = pd.read_csv(r"D:\Rehan Working\Working\Datasets\bike_sharing_daily.csv")

# Display Dataset information
df.head(3)

Unnamed: 0,instant,dteday,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,1,0,1,0,6,0,2,0.344167,0.363625,0.805833,0.160446,331,654,985
1,2,2011-01-02,1,0,1,0,0,0,2,0.363478,0.353739,0.696087,0.248539,131,670,801
2,3,2011-01-03,1,0,1,0,1,1,1,0.196364,0.189405,0.437273,0.248309,120,1229,1349


In [8]:
# Convert dteday to Date Time
df["dteday"] = pd.to_datetime(df["dteday"])
df.head(2)

Unnamed: 0,instant,dteday,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,1,0,1,0,6,0,2,0.344167,0.363625,0.805833,0.160446,331,654,985
1,2,2011-01-02,1,0,1,0,0,0,2,0.363478,0.353739,0.696087,0.248539,131,670,801


In [9]:
# Create new features
df["day_of_week"] = df["dteday"].dt.day_name()
df["month"] = df["dteday"].dt.month
df["year"] = df["dteday"].dt.year

# Display Newly Created Feature
print("New features Derived from dtetime Column: \n", df[["day_of_week", "month", "year"]].head(3))

New features Derived from dtetime Column: 
   day_of_week  month  year
0    Saturday      1  2011
1      Sunday      1  2011
2      Monday      1  2011


In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 731 entries, 0 to 730
Data columns (total 19 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   instant      731 non-null    int64         
 1   dteday       731 non-null    datetime64[ns]
 2   season       731 non-null    int64         
 3   yr           731 non-null    int64         
 4   mnth         731 non-null    int64         
 5   holiday      731 non-null    int64         
 6   weekday      731 non-null    int64         
 7   workingday   731 non-null    int64         
 8   weathersit   731 non-null    int64         
 9   temp         731 non-null    float64       
 10  atemp        731 non-null    float64       
 11  hum          731 non-null    float64       
 12  windspeed    731 non-null    float64       
 13  casual       731 non-null    int64         
 14  registered   731 non-null    int64         
 15  cnt          731 non-null    int64         
 16  day_of_w

In [11]:
# Select Feature and Target
X = df[["temp"]]
y = df['cnt']

# Apply Polynomial Feature
poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X)

# Display Transformed Feature
print("Original and Polynomial Feature: ")
print(pd.DataFrame(X_poly, columns=['temp', 'temp^2']).head())

Original and Polynomial Feature: 
       temp    temp^2
0  0.344167  0.118451
1  0.363478  0.132116
2  0.196364  0.038559
3  0.200000  0.040000
4  0.226957  0.051509


In [13]:
# Split the Data Set for comparison
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train_poly, X_test_poly = train_test_split(X_poly, test_size=0.2, random_state=42)

# Train and Evaluate model with Original Features
model_orig = LinearRegression()
model_orig.fit(X_train, y_train)
y_pred_orig = model_orig.predict(X_test)
mse_orig = mean_squared_error(y_test, y_pred_orig)

# Train and Evaluate Model with Polynomial Features
model_poly = LinearRegression()
model_poly.fit(X_train_poly, y_train)
y_pred_poly = model_poly.predict(X_test_poly)
mse_poly = mean_squared_error(y_test, y_pred_poly)

# Compare Results
print(f"MSE Original: {mse_orig:.2f}")
print(f"MSE with Polynomial Feature: {mse_poly:.2f}")

MSE Original: 2391051.89
MSE with Polynomial Feature: 2431396.49
