In [None]:
# Chargement des modules pour l'algo et calcul
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
import matplotlib.pyplot as plt

# lecture du fichier et affichage des 5 1ere lignes
bike_data = pd.read_csv('daily-bike-share.csv')
bike_data.head(5)

In [None]:
bike_data['day'] = pd.DatetimeIndex(bike_data['dteday']).day
bike_data.head(32)

In [None]:
# Separation variables et etiquettes
X, y = bike_data[['season','mnth', 'holiday','weekday','workingday','weathersit','temp', 'atemp', 'hum', 'windspeed']].values, bike_data['rentals'].values
print('Variables:',X[:10], '\nEtiquettes:', y[:10], sep='\n')

In [None]:
from sklearn.model_selection import train_test_split

# Split data 70%-30% en données entrainement et données de test 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)

print ('Données entrainement: %d rows\nDonnées de test: %d rows' % (X_train.shape[0], X_test.shape[0]))

In [None]:
# Entrainement du model
model = RandomForestRegressor().fit(X_train, y_train)
print (model, "\n")

# Evaluation du model en utilisant les données de test
predictions = model.predict(X_test)

r2 = r2_score(y_test, predictions)
print("R2:", r2)

In [None]:
# Tracé prévu vs réel
plt.scatter(y_test, predictions)
plt.xlabel('Étiquettes réelles')
plt.ylabel('Libellés prédits')
plt.title('Prédictions quotidiennes de vélo en libre-service')
# overlay the regression line
z = np.polyfit(y_test, predictions, 1)
p = np.poly1d(z)
plt.plot(y_test,p(y_test), color='magenta')
plt.show()

In [None]:
import joblib

# Save the model as a pickle file
filename = './velos_Algo2.pkl'
joblib.dump(model, filename)

In [None]:
# Charger le modèle à partir du fichier
loaded_model = joblib.load(filename)

# Créer un tableau numpy contenant une nouvelle observation (par exemple, les informations saisonnières et météorologiques de demain)
X_new = np.array([[1,1,0,3,1,1,0.226957,0.22927,0.436957,0.1869]]).astype('float64')
print ('Nouvel échantillon : {}'.format(list(X_new[0])))

# Use the model to predict tomorrow's rentals
result = loaded_model.predict(X_new)
print('Prédiction: {:.0f} locations'.format(np.round(result[0])))

In [None]:
# Un éventail de fonctionnalités basées sur les prévisions météorologiques à cinq jours
X_new = np.array([[0,1,1,0,0,1,0.344167,0.363625,0.805833,0.160446],
                  [0,1,0,1,0,1,0.363478,0.353739,0.696087,0.248539],
                  [0,1,0,2,0,1,0.196364,0.189405,0.437273,0.248309],
                  [0,1,0,3,0,1,0.2,0.212122,0.590435,0.160296],
                  [0,1,0,4,0,1,0.226957,0.22927,0.436957,0.1869]])

# Utiliser le modèle pour prédire les locations
results = loaded_model.predict(X_new)
print('Prédictions de location sur 5 jours :')
for prediction in results:
    print(np.round(prediction))