<a href="https://colab.research.google.com/github/Shaswattiwari/Regression---Bike-Sharing-Demand-Prediction/blob/main/demand_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Install required packages
!pip install pandas
!pip install Tensorflow
!pip install sciket-learn
!pip install keras
!pip install matplotlib
!pip install Funcyimpute
!pip install shap
!pip install PyOD
!pip install eli5 scikit-learn
!pip install statsmodels
!pip install altair_viewer


In [3]:
#Suppress warnings for cleaner output
import warnings
warnings.simplefilter("ignore")
warnings.simplefilter("ignore", category=DeprecationWarning)
warnings.simplefilter("ignore", category=FutureWarning)
warnings.filterwarnings("ignore")

In [4]:
#Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D,GRU,Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import LSTM
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
from pyod.models.auto_encoder import AutoEncoder
import altair as alt

In [5]:
#Load the dataset
df = pd.read_csv("SeoulBikeData.CSV.csv", encoding='latin1')

In [None]:
df

Unnamed: 0,Date,Rented Bike Count,Hour,Temperature(°C),Humidity(%),Wind speed (m/s),Visibility (10m),Dew point temperature(°C),Solar Radiation (MJ/m2),Rainfall(mm),Snowfall (cm),Seasons,Holiday,Functioning Day
0,01/12/2017,254,0,-5.2,37,2.2,2000,-17.6,0.0,0.0,0.0,Winter,No Holiday,Yes
1,01/12/2017,204,1,-5.5,38,0.8,2000,-17.6,0.0,0.0,0.0,Winter,No Holiday,Yes
2,01/12/2017,173,2,-6.0,39,1.0,2000,-17.7,0.0,0.0,0.0,Winter,No Holiday,Yes
3,01/12/2017,107,3,-6.2,40,0.9,2000,-17.6,0.0,0.0,0.0,Winter,No Holiday,Yes
4,01/12/2017,78,4,-6.0,36,2.3,2000,-18.6,0.0,0.0,0.0,Winter,No Holiday,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,30/11/2018,1003,19,4.2,34,2.6,1894,-10.3,0.0,0.0,0.0,Autumn,No Holiday,Yes
8756,30/11/2018,764,20,3.4,37,2.3,2000,-9.9,0.0,0.0,0.0,Autumn,No Holiday,Yes
8757,30/11/2018,694,21,2.6,39,0.3,1968,-9.9,0.0,0.0,0.0,Autumn,No Holiday,Yes
8758,30/11/2018,712,22,2.1,41,1.0,1859,-9.8,0.0,0.0,0.0,Autumn,No Holiday,Yes


In [7]:
# Copy the data for manipulation
data = df.copy()

In [8]:
# Convert 'Date' column to datetime format
data['Date'] = pd.to_datetime(data['Date'],format='%d/%m/%Y')

In [9]:
# Aggregate data on a daily basis
data = data.groupby('Date').agg({
    'Rented Bike Count': 'sum',
    'Hour': 'mean',  # You might want to use another aggregation method for 'Hour'
    'Temperature(°C)': 'mean',
    'Humidity(%)': 'mean',
    'Wind speed (m/s)': 'mean',
    'Visibility (10m)': 'mean',
    'Dew point temperature(°C)': 'mean',
    'Solar Radiation (MJ/m2)': 'mean',
    'Rainfall(mm)': 'sum',
    'Snowfall (cm)': 'sum',
    'Seasons': 'first',  # Assuming seasons don't change within a day
    'Holiday': 'first'   # Assuming holiday doesn't change within a day
}).reset_index()


In [10]:
# Round numerical values to two decimal places
data=data.round(2)

In [11]:
# Explore the time series data
train_dates = pd.to_datetime((data['Date']))

In [12]:
train_dates.tail()

360   2018-11-26
361   2018-11-27
362   2018-11-28
363   2018-11-29
364   2018-11-30
Name: Date, dtype: datetime64[ns]

In [13]:
# Extract relevant columns for modeling
working_df = data.drop(columns=["Date","Hour"], axis=1)

In [14]:
working_df.head()

Unnamed: 0,Rented Bike Count,Temperature(°C),Humidity(%),Wind speed (m/s),Visibility (10m),Dew point temperature(°C),Solar Radiation (MJ/m2),Rainfall(mm),Snowfall (cm),Seasons,Holiday
0,9539,-2.45,45.88,1.54,1870.75,-13.55,0.25,0.0,0.0,Winter,No Holiday
1,8523,1.32,61.96,1.71,1471.08,-5.72,0.26,0.0,0.0,Winter,No Holiday
2,7222,4.88,81.54,1.61,455.75,1.88,0.13,4.0,0.0,Winter,No Holiday
3,8729,-0.3,52.5,3.45,1362.83,-9.93,0.28,0.1,0.0,Winter,No Holiday
4,8307,-4.46,36.42,1.11,1959.46,-17.42,0.04,0.0,0.0,Winter,No Holiday


In [15]:
# explore data statistics
working_df.isnull().sum()

Rented Bike Count            0
Temperature(°C)              0
Humidity(%)                  0
Wind speed (m/s)             0
Visibility (10m)             0
Dew point temperature(°C)    0
Solar Radiation (MJ/m2)      0
Rainfall(mm)                 0
Snowfall (cm)                0
Seasons                      0
Holiday                      0
dtype: int64

In [16]:
working_df.describe()

Unnamed: 0,Rented Bike Count,Temperature(°C),Humidity(%),Wind speed (m/s),Visibility (10m),Dew point temperature(°C),Solar Radiation (MJ/m2),Rainfall(mm),Snowfall (cm)
count,365.0,365.0,365.0,365.0,365.0,365.0,365.0,365.0,365.0
mean,16910.449315,12.882904,58.226301,1.724685,1436.82589,4.073726,0.569151,3.568493,1.801644
std,10258.639202,11.555965,14.673402,0.601652,491.249692,12.808962,0.31437,11.84693,8.664456
min,0.0,-14.74,22.25,0.66,214.29,-27.75,0.03,0.0,0.0
25%,6500.0,3.81,47.92,1.3,1087.54,-4.86,0.29,0.0,0.0
50%,17730.0,13.84,57.21,1.64,1557.75,5.01,0.57,0.0,0.0
75%,26075.0,22.42,67.54,1.95,1877.25,14.57,0.82,0.5,0.0
max,36149.0,33.74,95.88,4.0,2000.0,25.04,1.22,95.5,78.7


# I observe the following insights:

**High correlation between temperature and bike rentals:** The data suggests that there is a direct relationship between bike rentals and temperature, with more bikes being rented as the temperature increases.

**Moderate correlation between humidity and bike rentals:** Humidity is negatively correlated with bike rentals, meaning that the higher the humidity, the fewer bike rentals occur.

**Low correlation between wind speed and bike rentals:** The data shows little correlation between wind speed and bike rentals.

**Visibility has a weak positive correlation with bike rentals:** As visibility increases, there is a slight uptick in bike rentals, but this relationship is not consistent.

**Dew point temperature has a moderate negative correlation with bike rentals:** A higher dew point temperature seems to reduce bike rentals.

**Solar radiation has a strong positive correlation with bike rentals:** As solar radiation increases, bike rentals also increase significantly.

**Rainfall and snowfall have a strong negative correlation with bike rentals:** Unsurprisingly, more rain and snow lead to fewer bike rentals.

Overall, the weather conditions significantly affect bike rentals, and temperature, solar radiation, rainfall, and snowfall appear to be key factors in determining bike rental demand. This information can be useful for improving demand forecasting and resource planning in bike-sharing systems.

In [17]:
working_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 365 entries, 0 to 364
Data columns (total 11 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Rented Bike Count          365 non-null    int64  
 1   Temperature(°C)            365 non-null    float64
 2   Humidity(%)                365 non-null    float64
 3   Wind speed (m/s)           365 non-null    float64
 4   Visibility (10m)           365 non-null    float64
 5   Dew point temperature(°C)  365 non-null    float64
 6   Solar Radiation (MJ/m2)    365 non-null    float64
 7   Rainfall(mm)               365 non-null    float64
 8   Snowfall (cm)              365 non-null    float64
 9   Seasons                    365 non-null    object 
 10  Holiday                    365 non-null    object 
dtypes: float64(8), int64(1), object(2)
memory usage: 31.5+ KB


In [18]:
# Explore distributions using Altair charts
columns_to_explore = ['Rented Bike Count', 'Temperature(°C)', 'Humidity(%)',
                      'Wind speed (m/s)', 'Visibility (10m)', 'Dew point temperature(°C)',
                      'Solar Radiation (MJ/m2)', 'Rainfall(mm)', 'Snowfall (cm)']

# Convert 'Seasons' and 'Holiday' columns to categorical type
working_df['Seasons'] = pd.Categorical(working_df['Seasons'], categories=['Spring', 'Summer', 'Fall', 'Winter'], ordered=True)
working_df['Holiday'] = pd.Categorical(working_df['Holiday'])

# Initialize a list to store Altair charts
charts = []

# Generate charts for each column
for col in columns_to_explore:
    chart = alt.Chart(working_df).mark_bar().encode(
        alt.X(col, bin=alt.Bin(maxbins=30)),
        y='count()',
        tooltip=['count()', alt.X(col, bin=alt.Bin(maxbins=30))]
    ).properties(
        width=200,
        height=150,
        title=f'Histogram of {col}'
    ).interactive()  # Add interactivity
    charts.append(chart)

# Create a grid layout of charts (3 by 3)
grid_chart = alt.vconcat(
    alt.hconcat(*charts[:3]),
    alt.hconcat(*charts[3:6]),
    alt.hconcat(*charts[6:9]),
    alt.hconcat(*charts[9:]),
    spacing=20  # Adjust spacing between charts
)

# Show the grid chart in the notebook
grid_chart


In [19]:
# Enable Altair's built-in renderer for Jupyter Notebook
alt.renderers.enable('default')


# Select numeric columns (excluding 'Rented Bike Count')
numeric_cols = ['Temperature(°C)', 'Humidity(%)', 'Wind speed (m/s)',
                'Visibility (10m)', 'Dew point temperature(°C)',
                'Solar Radiation (MJ/m2)', 'Rainfall(mm)', 'Snowfall (cm)']

# Initialize a list to store Altair charts
scatter_plots = []

# Create scatter plots for 'Rented Bike Count' vs each numeric column
for col in numeric_cols:
    scatter_plot = alt.Chart(working_df).mark_circle().encode(
        x=alt.X(col, type='quantitative', title=col),
        y=alt.Y('Rented Bike Count', type='quantitative', title='Rented Bike Count'),
        tooltip=['Rented Bike Count', alt.Tooltip(col, type='quantitative')]
    ).properties(
        width=200,
        height=150,
        title=f'Scatter Plot: Rented Bike Count vs {col}'
    ).interactive()  # Add interactivity
    scatter_plots.append(scatter_plot)

# Combine scatter plots into a 2 by 4 grid
scatter_grid = alt.vconcat(alt.hconcat(*scatter_plots[:4]), alt.hconcat(*scatter_plots[4:]))

# Show the scatter plot grid in the notebook
scatter_grid


# I can make the following observations:

**Rented Bike Count vs Temperature (°C):** There is a general trend of increasing rented bike count as the temperature rises, with most bike rentals occurring between 10°C and 25°C.

**Rented Bike Count vs Humidity (%):** There is no clear trend between rented bike count and humidity. However, bike rentals are less frequent when humidity is above 80%.

**Rented Bike Count vs Dew point temperature (°C)**: There is no clear trend between rented bike count and dew point temperature. There are fewer bike rentals when the dew point temperature is below -10°C or above 20°C.

**Rented Bike Count vs Solar Radiation (MJ/m2):** There is no clear trend between rented bike count and solar radiation. Bike rentals are less frequent when solar radiation is above 0.8 MJ/m2.

**Rented Bike Count vs Wind speed (m/s):** There is no clear trend between rented bike count and wind speed. However, bike rentals are less frequent when wind speed is above 1.2 m/s.

**Rented Bike Count vs Rainfall (mm):** There is a decreasing trend in bike rentals as rainfall increases. There are very few bike rentals when rainfall is above 60 mm.

**Rented Bike Count vs Visibility (m):** There is a general trend of increasing bike rentals as visibility improves. However, there is a drop in bike rentals when visibility is above 2,000m.

**Rented Bike Count vs Snowfall (cm):** There is a decreasing trend in bike rentals as snowfall increases. There are very few bike rentals when snowfall is above 3 cm.

In [22]:
# Enable Altair's built-in renderer for Jupyter Notebook
alt.renderers.enable('default')

# Select numeric columns
numeric_cols = ['Rented Bike Count', 'Temperature(°C)', 'Humidity(%)',
                'Wind speed (m/s)', 'Visibility (10m)', 'Dew point temperature(°C)',
                'Solar Radiation (MJ/m2)', 'Rainfall(mm)', 'Snowfall (cm)']

# Calculate correlation matrix
correlation_matrix = working_df[numeric_cols].corr()

# Reshape the correlation matrix into long format
correlation_long = correlation_matrix.stack().reset_index()
correlation_long.columns = ['variable1', 'variable2', 'correlation']

# Create a heatmap using Altair
heatmap = alt.Chart(correlation_long).mark_rect().encode(
    x='variable1:N',
    y='variable2:N',
    color='correlation:Q',
    tooltip=['variable1', 'variable2', 'correlation']
).properties(
    width=500,
    height=500,
    title='Correlation Heatmap'
)

# Show the heatmap in the notebook
heatmap


#  I can observe the following correlation coefficients:

Rented Bike Count vs Temperature (°C): ~0.6 (positive correlation)
Rented Bike Count vs Humidity (%): ~-0.2 (weak negative correlation)
Rented Bike Count vs Dew point temperature (°C): ~0.4 (positive correlation)
Rented Bike Count vs Solar Radiation (MJ/m2): ~0.3 (weak positive correlation)
Rented Bike Count vs Wind speed (m/s): ~-0.2 (weak negative correlation)
Rented Bike Count vs Rainfall (mm): ~-0.7 (strong negative correlation)
Rented Bike Count vs Snowfall (cm): ~-0.8 (strong negative correlation)
Rented Bike Count vs Visibility (10m): ~0.4 (positive correlation)

In [None]:
working_df.isnull().sum()

Rented Bike Count             0
Temperature(°C)               0
Humidity(%)                   0
Wind speed (m/s)              0
Visibility (10m)              0
Dew point temperature(°C)     0
Solar Radiation (MJ/m2)       0
Rainfall(mm)                  0
Snowfall (cm)                 0
Seasons                      91
Holiday                       0
dtype: int64

In [None]:
working_df=working_df.dropna()

In [None]:
working_df

In [None]:
categorical_features = working_df[['Seasons','Holiday']].columns


In [None]:
categorical_features

Index(['Seasons', 'Holiday'], dtype='object')

In [None]:
# One-hot encode categorical columns
encoder = OneHotEncoder()  # Initialize OneHotEncoder
encoded_categorical = encoder.fit_transform(working_df[categorical_features])  # Fit and transform categorical features
encoded_categorical_df = pd.DataFrame(encoded_categorical.toarray(), columns=encoder.get_feature_names_out())  # Convert encoded categorical features to DataFrame
encoded = pd.concat([working_df.drop(categorical_features, axis=1), encoded_categorical_df], axis=1)  # Concatenate encoded categorical features with original DataFrame


In [None]:
encoded.head()

Unnamed: 0,Rented Bike Count,Temperature(°C),Humidity(%),Wind speed (m/s),Visibility (10m),Dew point temperature(°C),Solar Radiation (MJ/m2),Rainfall(mm),Snowfall (cm),Seasons_Spring,Seasons_Summer,Seasons_Winter,Holiday_Holiday,Holiday_No Holiday
0,9539,-2.45,45.88,1.54,1870.75,-13.55,0.25,0.0,0.0,0.0,0.0,1.0,0.0,1.0
1,8523,1.32,61.96,1.71,1471.08,-5.72,0.26,0.0,0.0,0.0,0.0,1.0,0.0,1.0
2,7222,4.88,81.54,1.61,455.75,1.88,0.13,4.0,0.0,0.0,0.0,1.0,0.0,1.0
3,8729,-0.3,52.5,3.45,1362.83,-9.93,0.28,0.1,0.0,0.0,0.0,1.0,0.0,1.0
4,8307,-4.46,36.42,1.11,1959.46,-17.42,0.04,0.0,0.0,0.0,0.0,1.0,0.0,1.0


In [None]:
autoencoder = AutoEncoder(hidden_neurons=[2, 1, 1, 2], epochs=5, batch_size=1, contamination=0.1)  # Initialize AutoEncoder with specified parameters
autoencoder.fit(encoded)  # Fit the AutoEncoder model on the encoded data

# Anomaly detection
outlier_scores = autoencoder.decision_function(encoded)  # Calculate outlier scores using the AutoEncoder model
threshold = np.percentile(outlier_scores, 95)  # Set the threshold for outlier detection
outliers = outlier_scores > threshold  # Identify outliers based on the threshold

# Count the number of anomalies and outliers detected
num_anomalies = sum(outliers)  # Count the number of anomalies detected
num_outliers = len(outliers) - num_anomalies  # Calculate the number of outliers detected

# Print the number of anomalies and outliers detected
print(f"Number of anomalies detected: {num_anomalies}")
print(f"Number of outliers detected: {num_outliers}")

# Remove outliers
cleaned_data_scaled = encoded[~outliers]  # Remove outliers from the encoded data


Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_12 (Dense)            (None, 14)                210       
                                                                 
 dropout_11 (Dropout)        (None, 14)                0         
                                                                 
 dense_13 (Dense)            (None, 14)                210       
                                                                 
 dropout_12 (Dropout)        (None, 14)                0         
                                                                 
 dense_14 (Dense)            (None, 2)                 30        
                                                                 
 dropout_13 (Dropout)        (None, 2)                 0         
                                                                 
 dense_15 (Dense)            (None, 1)                



Number of anomalies detected: 14
Number of outliers detected: 260


In [None]:
scaler = MinMaxScaler()  # Initialize MinMaxScaler
scaled_numerical = scaler.fit_transform(cleaned_data_scaled)  # Scale the cleaned data using MinMaxScaler


In [None]:
def timestep(data, time_steps):
    """
    Generate sequences of input-output pairs for time series forecasting.

    Parameters:
    - data: numpy array, the input time series data
    - time_steps: int, the number of time steps to consider for each sequence

    Returns:
    - X: numpy array, input sequences of shape (num_samples, time_steps, num_features)
    - y: numpy array, output sequences of shape (num_samples, num_features)
    """
    X = []  # Initialize list to store input sequences
    y = []  # Initialize list to store output sequences
    for i in range(len(data) - time_steps):
        X.append(data[i:i + time_steps])  # Append input sequence of length 'time_steps'
        y.append(data[i + time_steps])    # Append output sequence corresponding to next time step
    return np.array(X), np.array(y)  # Return input and output sequences as numpy arrays

# Generate input-output pairs using timestep function
X, y = timestep(scaled_numerical, 5)  # Generate sequences with a time window of 5 time steps


Data Preprocessing:
    Handle missing values and outliers

In [None]:
gru = Sequential()  # Initialize a sequential model

# Add GRU layers with specified configurations
gru.add(GRU(128, activation='relu', input_shape=(X.shape[1], X.shape[2]), return_sequences=True))  # GRU layer with 128 units, relu activation, and return sequences
gru.add(Dropout(0.3))  # Dropout layer with dropout rate of 0.3
gru.add(GRU(64, activation='relu'))  # GRU layer with 64 units and relu activation
gru.add(Dropout(0.3))  # Dropout layer with dropout rate of 0.3
gru.add(Dense(32, activation="relu"))  # Dense layer with 32 units and relu activation
gru.add(Dense(y.shape[1]))  # Dense output layer with number of units equal to number of output features

# Compile the model
gru.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')  # Compile the model with Adam optimizer and mean squared error loss

# Train the model
history_gru = gru.fit(X, y, epochs=10)  # Fit the model to the input-output pairs for 10 epochs


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


no missing vals
using pyod for outliers

In [None]:
cnn = Sequential()  # Initialize a sequential model

# Add Convolutional layers with specified configurations
cnn.add(Conv1D(128, activation='relu', kernel_size=3, input_shape=(X.shape[1], X.shape[2])))  # Convolutional layer with 128 filters, relu activation, kernel size of 3, and input shape
cnn.add(Dropout(0.3))  # Dropout layer with dropout rate of 0.3
cnn.add(Conv1D(64, kernel_size=3, activation='relu'))  # Convolutional layer with 64 filters, relu activation, and kernel size of 3
cnn.add(Dropout(0.3))  # Dropout layer with dropout rate of 0.3
cnn.add(Dense(32, activation='relu'))  # Dense layer with 32 units and relu activation
cnn.add(Dense(y.shape[1]))  # Dense output layer with number of units equal to number of output features

# Compile the model
cnn.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')  # Compile the model with Adam optimizer and mean squared error loss

# Train the model
history_cnn = cnn.fit(X, y, epochs=10)  # Fit the model to the input-output pairs for 10 epochs


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
rnn = Sequential()  # Initialize a sequential model

# Add LSTM layers with specified configurations
rnn.add(LSTM(64, activation="relu", input_shape=(X.shape[1], X.shape[2]), return_sequences=True))  # LSTM layer with 64 units, relu activation, input shape, and return sequences
rnn.add(LSTM(32, activation="relu", return_sequences=False))  # LSTM layer with 32 units, relu activation, and return single output
rnn.add(Dropout(0.2))  # Dropout layer with dropout rate of 0.2
rnn.add(Dense(y.shape[1]))  # Dense output layer with number of units equal to number of output features

# Compile the model
rnn.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')  # Compile the model with Adam optimizer and mean squared error loss

# Train the model
history_rnn = rnn.fit(X, y, epochs=10)  # Fit the model to the input-output pairs for 10 epochs


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
day_in_future = 30

In [None]:
forecast_dates = (pd.date_range(list(train_dates)[-1], periods=day_in_future, freq='1d').strftime('%d/%m/%Y'))

In [None]:
forecast_dates

Index(['30/11/2018', '01/12/2018', '02/12/2018', '03/12/2018', '04/12/2018',
       '05/12/2018', '06/12/2018', '07/12/2018', '08/12/2018', '09/12/2018',
       '10/12/2018', '11/12/2018', '12/12/2018', '13/12/2018', '14/12/2018',
       '15/12/2018', '16/12/2018', '17/12/2018', '18/12/2018', '19/12/2018',
       '20/12/2018', '21/12/2018', '22/12/2018', '23/12/2018', '24/12/2018',
       '25/12/2018', '26/12/2018', '27/12/2018', '28/12/2018', '29/12/2018'],
      dtype='object')

In [None]:
# Make predictions using each model
predictions_model1 = gru.predict(X[-day_in_future:])
predictions_model2 = cnn.predict(X[-day_in_future:])
predictions_model3 = rnn.predict(X[-day_in_future:])



Inverse transform the scaled predictions to original scale using the scaler

In [None]:
predictions_model3=scaler.inverse_transform(predictions_model3)


In [None]:
predictions_model1=scaler.inverse_transform(predictions_model1)


In [None]:
predictions_model2=scaler.inverse_transform(np.squeeze(predictions_model2))


In [None]:
# Convert them into pandas DataFrame objects
df1 = pd.DataFrame(predictions_model1).iloc[:,:1].round(0)
df2 = pd.DataFrame(predictions_model2).iloc[:,:1].round(0)
df3 = pd.DataFrame(predictions_model3).iloc[:,:1].round(0)


In [None]:
combined_df = pd.concat([df1, df2, df3],axis=1)

In [None]:
combined_df

Unnamed: 0,0,0.1,0.2
0,23027.0,16591.0,25520.0
1,23221.0,16741.0,25498.0
2,23391.0,16682.0,25937.0
3,23397.0,17063.0,26032.0
4,23378.0,16607.0,26074.0
5,23401.0,16719.0,26011.0
6,23404.0,16495.0,25865.0
7,23405.0,16651.0,26055.0
8,23446.0,16631.0,26116.0
9,23434.0,16233.0,26274.0


In [None]:
predictions = combined_df.mean(axis=1).round(0)

In [None]:
predictions.head()

0    21713.0
1    21820.0
2    22003.0
3    22164.0
4    22020.0
dtype: float32

In [None]:
known_data=data[['Date','Rented Bike Count']]
known_data.head()

Unnamed: 0,Date,Rented Bike Count
0,2017-12-01,9539
1,2017-12-02,8523
2,2017-12-03,7222
3,2017-12-04,8729
4,2017-12-05,8307


In [None]:
# prompt: Using dataframe known_data: line

import altair as alt
alt.Chart(known_data).mark_line().encode(
    alt.X('Date:T'),
    alt.Y('Rented Bike Count:Q'),
).properties(
    width=600
)


In [None]:
forecast_data=pd.DataFrame({'Date':forecast_dates,'value':predictions})

In [None]:
known_data["Date"] = pd.to_datetime(known_data["Date"],format='%d/%m/%Y')
forecast_data["Date"] = pd.to_datetime(forecast_data["Date"],format='%d/%m/%Y')

In [None]:
forecast_data = forecast_data.iloc[1:]

In [None]:
forecast_data.head()

Unnamed: 0,Date,value
1,2018-12-01,21820.0
2,2018-12-02,22003.0
3,2018-12-03,22164.0
4,2018-12-04,22020.0
5,2018-12-05,22044.0


In [None]:


# Enable the dark theme


# Create line charts for known and forecasted data
known_line = alt.Chart(known_data).mark_line(color='light blue').encode(
    x='Date:T',
    y='Rented Bike Count:Q',
    tooltip=['Date', 'Rented Bike Count']
)

forecasted_line = alt.Chart(forecast_data).mark_line(color='gray').encode(
    x='Date:T',
    y='value:Q',
    tooltip=['Date', 'value']
)

# Combine the line charts and set properties
combined_chart = (known_line + forecasted_line).properties(
    width=800,
    height=400,
    title='Predicted Values'
)

# Show the combined chart
combined_chart.interactive()
