# Bike Sharing Market Research

dataset source link [Click here](https://archive.ics.uci.edu/ml/datasets/bike+sharing+dataset)

In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go

pio.templates.default = "plotly_dark"
pd.options.display.float_format = '{:,.3f}'.format

In [2]:
def polynomial_regression_predict(x, y, degree, num_points=200):
    coeffs = np.polyfit(x, y, deg=degree)
    poly_fn = np.poly1d(coeffs)
    
    x_range = np.linspace(np.min(x), np.max(x), num_points)
    y_pred = poly_fn(x_range)
    
    return x_range, y_pred

In [3]:
filepath = "../data/Bike_sharing_market_research.xlsx"
data = pd.read_excel(filepath)
data['date'] = pd.to_datetime(data['date'])

date = 'date'
season = 'season'
year = 'year'
month = 'month'
holiday = 'holiday'
weekday = 'weekday'
workingday = 'workingday'
weather_situation = 'weather situation'
temperature = 'temperature'
feel_temperature = 'feel temperature'
humidity = 'humidity'
windspeed = 'windspeed'
casual = 'casual'
registered = 'registered'
total = 'total'

In [4]:
print(f"The bike rentals data spans from 2011 to 2012")
print(f"The maximum total rentals in a day is {data[total].max():,} on {data.loc[data[total].idxmax(), date].date()}")
print(f"The minimum total rentals in a day is {data[total].min():,} on {data.loc[data[total].idxmin(), date].date()}")

The bike rentals data spans from 2011 to 2012
The maximum total rentals in a day is 8,714 on 2012-09-15
The minimum total rentals in a day is 22 on 2012-10-29


In [5]:
px.line(
    data,
    x=date,
    y=total,
    title="Total Bike Rentals Over Time",
    height=450,
    width=2400,
)

In [6]:
color_map = {
    casual: 'skyblue',
    registered: 'darkcyan'
}

fig = px.line(
    data,
    x=date,
    y=[casual, registered],
    title="Total Bike Rentals by User Type Over Time",
    height=450,
    width=2400,
    color_discrete_map=color_map
)

print("This shows that most rentals are from registered users.")
fig.show()

This shows that most rentals are from registered users.


In [7]:
moving_avg_total = data[total].rolling(window=10).mean()
moving_avg_total.name = 'moving_avg_total'

legend_names = {total: 'Total Rentals', 'moving_avg_total': 'Moving Average'}

fig = px.line(
    data,
    x=date,
    y=[total, moving_avg_total],
    title="Total Bike Rentals Over Time (Moving Average)",
    height=450,
    width=2400,
    color_discrete_map={total: 'blue', 'moving_avg_total': 'cyan'}
)

fig.update_layout(
    xaxis_title="Date",
    yaxis_title="Total Rentals",
)


print("The seasonality pattern holds for both 2011 and 2012.\nThe number of rentals are low during the winter months December to early March.")

fig.show()

The seasonality pattern holds for both 2011 and 2012.
The number of rentals are low during the winter months December to early March.


In [8]:
fig = px.area(
    data,
    x=date,
    y=[moving_avg_total],
    title="Total Bike Rentals Over Time (Area Chart)",
    height=450,
    width=2400,
    color_discrete_sequence=["#FF6347"],  # tomato color
)

fig.update_traces(opacity=0.5)

rentals_2011 = data[data[year] == 2011][total].sum()
rentals_2012 = data[data[year] == 2012][total].sum()

# display the % change from 2011 - 2012
pct_change = ((rentals_2012 - rentals_2011) / rentals_2011) * 100
print(f"Percentage change in total rentals from 2011 to 2012: {pct_change:.2f}%")

fig.show()

Percentage change in total rentals from 2011 to 2012: 64.88%


In [9]:
total_rental_per_weekday = data[[weekday, total]].groupby(weekday).sum()
day_order = ['Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat']
total_rental_per_weekday = total_rental_per_weekday.reindex(day_order)

fig = px.bar(
    total_rental_per_weekday,
    x=total_rental_per_weekday.index,
    y=total,
    text=total_rental_per_weekday[total],
    title="Total Bike Rentals by Weekday",
    color_discrete_sequence=['brown'],
    width=500
)

fig.update_layout(
    yaxis={'range': (440_000, 490_000)},
)
fig.update_traces(textposition="outside")


print("The rentals increase from weekday to weekend, with the highest rentals on Thursday to Saturday.")
fig.show()

The rentals increase from weekday to weekend, with the highest rentals on Thursday to Saturday.


In [10]:
weather_situation_total = data.groupby(weather_situation)[total].sum()

fig = px.pie(
    weather_situation_total,
    values=weather_situation_total.values,
    names=weather_situation_total.index,
    title="Proportion of Total Bike Rentals by Weather Situation",
    color_discrete_sequence=['green', 'orange', 'red'],
    width=500
)

fig.show()

In [11]:
total_rentals_weekday_weather = pd.pivot_table(
    data,
    values=total,
    index=weekday,
    columns=weather_situation,
    aggfunc='sum'
).loc[day_order]

In [12]:
fig = px.bar(
    total_rentals_weekday_weather,
    y=total_rentals_weekday_weather.columns,
    barmode='group',
    title="Total Rentals by Weekday and Weather",
    labels={"value": "Total Rentals", "variable": "Weather Situation"},
    color_discrete_sequence=['green', 'orange', 'red'],
)

print("the weekday to weekend pattern depends on the weather situation")
fig.show()

the weekday to weekend pattern depends on the weather situation


In [13]:
season_weather_rentals = data.pivot_table(
        values=total,
        index=season,
        columns=weather_situation,
        aggfunc='sum'
    )

color_scale_rentals_season_weather = ['skyblue', '#000064']

fig = px.imshow(
    season_weather_rentals,
    text_auto=True,
    color_continuous_scale=color_scale_rentals_season_weather,
    height=600,
    width=700,
)
fig.update_xaxes(side="top")

fig.update_xaxes(
    tickmode='array',
    tickvals=season_weather_rentals.columns,
    ticktext=season_weather_rentals.columns
)

fig.update_layout(
    coloraxis_colorbar=dict(title="Total Rentals")
)

print("Non-winter months and good weather situation have the highest rentals")
fig.show()

Non-winter months and good weather situation have the highest rentals


In [14]:
temperature_total_trend_line = polynomial_regression_predict(data[temperature], data[total], degree=2)

x, y = temperature_total_trend_line

fig = px.scatter(
    data,
    x=temperature,
    y=total,
    title="Temperature vs Total Rentals",
    labels={"temperature": "Temperature", "total": "Total Rentals"},
    color_discrete_sequence=['blue'],
    height=600,
    width=1400,
)

fig.add_trace(
    go.Scatter(
        x=x,
        y=y,
        mode='lines',
        line=dict(color='cyan', width=4),
        name='Trend Line (Poly Deg 2)',
    )
)

print("in-general the higher the temperature, the more the rentals")
fig.show()

in-general the higher the temperature, the more the rentals


In [15]:
humidity_total_trend_line = polynomial_regression_predict(data[humidity], data[total], degree=2)

fig = px.scatter(
    data,
    x=humidity,
    y=total,
    color=weather_situation,
    title="Humidity vs Total Rentals",
    labels={"humidity": "Humidity", "total": "Total Rentals"},
    color_continuous_scale=['green', 'orange', 'red'],
    height=500,
    width=1200,
)

fig.add_trace(
    go.Scatter(
        x=humidity_total_trend_line[0],
        y=humidity_total_trend_line[1],
        mode='lines',
        line=dict(color='cyan', width=4),
        name='Trend Line (Poly Deg 2)',
    )
)

print("high humidity corresponds to worst weather situation so low rentals")
print("humidity between 0.4 to 0.8 has the better rentals")
fig.show()

high humidity corresponds to worst weather situation so low rentals
humidity between 0.4 to 0.8 has the better rentals
