> **Citation**: The data used in this exercise is derived from [Capital Bikeshare](https://www.capitalbikeshare.com/system-data) and is used in accordance with the published [license agreement](https://www.capitalbikeshare.com/data-license-agreement).


> **Citation**: Some of the code used in this exercise was derived from examples on the Microsoft Learn website.


### Exploratory Data Analysis

The first step is to explore the data used for training in order to understand the relationship between the features (X) and the label being predicted (Y).

During this stage you might carry out the following:
- Data cleaning
- Handling missing values
- Feature engineering
- Normalization
- Data encoding

In [None]:
# Load the bike data into a pandas DataFrame
import pandas as pd
bike_share_data = pd.read_csv('daily-bike-share.csv')
bike_share_data.head()

In [None]:
bike_share_data.dtypes

In [None]:
bike_share_data['day'] = pd.DatetimeIndex(bike_share_data['dteday']).day
bike_share_data.head(3)

In [None]:
features_label = ['temp','atemp','hum','windspeed','rentals']
bike_share_data[features_label].describe()

In [None]:
# Visualize using matplotlib
import matplotlib.pyplot as plt

# Display the plots inside Jupyter notebook
%matplotlib inline

# Get a series of rentals
rentals = bike_share_data['rentals']

# A figure with 2 rows and 1 column to plot 2 graphs
figure, axes = plt.subplots(2,1,figsize = (9,12))

# Histogram plot
axes[0].hist(rentals, bins=100)
axes[0].set_ylabel('Frequency of rentals')

# Add lines for descriptive statistics
mean, median = rentals.mean(), rentals.median()
axes[0].axvline(mean, color='purple', linestyle='dashed', linewidth=2)
axes[0].axvline(median, color='green', linestyle='dashed', linewidth=2)

# Boxplot
axes[1].boxplot(rentals, vert=False)
axes[1].set_xlabel('Rentals')

figure.suptitle('Rental distribution')
figure.show()

In [None]:
# Separate features and labels
X, y = bike_share_data[['season','mnth', 'holiday','weekday','workingday','weathersit','temp', 'atemp', 'hum', 'windspeed']].values, bike_share_data['rentals'].values
print('Features:',X[:10], '\nLabels:', y[:10], sep='\n')

In [None]:
%pip install scikit-learn

In [None]:
from sklearn.model_selection import \
    train_test_split as tts

#Split the dataset into 
# 80% for the training and 20% for validation
X_train, X_test, y_train, y_test = \
tts(X,y, test_size = 0.20, random_state = 0)

In [None]:
from sklearn.linear_model import LinearRegression

# Fit the linear regression model on the training dataset
model = LinearRegression().fit(X_train, y_train)
print(model)

In [None]:
# Evaluate the model with test data
import numpy as np

predicted_rentals = model.predict(X_test)
print('Predicted rentals: ', np.round(predicted_rentals)[:5])
print('Actual rentals: ', np.round(y_test)[:5])

In [None]:
# Plot a trendline to compare the output
import matplotlib.pyplot as plt

%matplotlib inline

plt.scatter(y_test, predicted_rentals)
plt.xlabel('Actual Rentals')
plt.ylabel('Predicted Rentals')
plt.title('Daily Bike Share Predictions')

z = np.polyfit(y_test, predicted_rentals, 1)
p = np.poly1d(z)
plt.plot(y_test,p(y_test), color='green')
plt.show()

In [None]:
# Quantify errors using evaluation metrics
from sklearn.metrics import mean_squared_error, r2_score

mse = mean_squared_error(y_test, predicted_rentals)
print("MSE:", mse)

rmse = np.sqrt(mse)
print("RMSE:", rmse)

r2 = r2_score(y_test, predicted_rentals)
print("R2:", r2)

In [5]:
5//1.005

4.0

In [17]:
a = 5
b = 10  

print('A =',id(a))
print('B =',id(b))
b = a

print('A =',id(a))
print('B =',id(b))

A = 1480675098992
B = 1480675099152
A = 1480675098992
B = 1480675098992


In [23]:
names=["Carol", "Bob", "Ted", "Alice"]  
actors=["Natalie Wood", "Robert Culp",   
	"Elliot Gould", "Dyan Cannon"]  
role=zip(actors, names)
for person in role:  
	print(person)


('Natalie Wood', 'Carol')
('Robert Culp', 'Bob')
('Elliot Gould', 'Ted')
('Dyan Cannon', 'Alice')


In [27]:
names=(["Carol"], "Bob", "Ted", "Alice")

In [28]:
names[0][0] = 10

In [29]:
names

([10], 'Bob', 'Ted', 'Alice')

In [30]:

glob = 1

def foo():
    loc = 5
    
print('loc in foo():', \
      'loc' in locals())
foo()
print('loc in global:', \
      'loc' in globals())

print('glob in global:', \
      'foo' in globals())


loc in foo(): False
loc in global: False
glob in global: True
