## CitiBike data is collected for 1 year 2018
https://ride.citibikenyc.com/system-data

Data Schema:
Trip Duration (seconds)

*   Start Time and Date
*   Stop Time and Date
*   Start Station Name
*   End Station Name
*   Station ID
*   Station Lat/Long
*   Bike ID
*   User Type (Customer = 24-hour pass or 3-day pass user; Subscriber = Annual    Member)
*   Gender (Zero=unknown; 1=male; 2=female)
*   Year of Birth
 

In [1]:
import pandas as pd
import numpy as np
import haversine as hs
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
%matplotlib inline
from glob import iglob

import warnings
warnings.filterwarnings("ignore")


In [2]:
#!pip install haversine

In [3]:
path = r'D:\Beuth Sems\Thesis\dataset\JC-2018*.csv'

all_rec = iglob(path, recursive=True)     
dataframes = (pd.read_csv(f) for f in all_rec)
tripdata = pd.concat(dataframes, ignore_index=True)

  

In [4]:
tripdata.describe()

Unnamed: 0,tripduration,start station id,start station latitude,start station longitude,end station id,end station latitude,end station longitude,bikeid,birth year,gender
count,353892.0,353892.0,353892.0,353892.0,353892.0,353892.0,353892.0,353892.0,353892.0,353892.0
mean,673.393,3264.996606,40.722724,-74.046039,3258.408418,40.722326,-74.045505,29452.498808,1980.387875,1.152388
std,7004.022,138.429108,0.007251,0.010755,147.610023,0.007095,0.010759,2529.992371,10.268528,0.500198
min,61.0,3183.0,40.69264,-74.096937,127.0,40.679331,-74.096937,14697.0,1887.0,0.0
25%,228.0,3192.0,40.718211,-74.050444,3186.0,40.717732,-74.049968,26315.0,1974.0,1.0
50%,335.0,3205.0,40.721525,-74.043845,3203.0,40.721124,-74.043117,29493.0,1983.0,1.0
75%,549.0,3272.0,40.727224,-74.038051,3272.0,40.727224,-74.037683,29679.0,1988.0,1.0
max,2061932.0,3694.0,40.748716,-74.032108,3694.0,40.814326,-73.947821,35009.0,2002.0,2.0


In [5]:
tripdata.isnull().sum()

tripduration               0
starttime                  0
stoptime                   0
start station id           0
start station name         0
start station latitude     0
start station longitude    0
end station id             0
end station name           0
end station latitude       0
end station longitude      0
bikeid                     0
usertype                   0
birth year                 0
gender                     0
dtype: int64

In [6]:
tripdata.shape

(353892, 15)

In [7]:
tripdata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 353892 entries, 0 to 353891
Data columns (total 15 columns):
 #   Column                   Non-Null Count   Dtype  
---  ------                   --------------   -----  
 0   tripduration             353892 non-null  int64  
 1   starttime                353892 non-null  object 
 2   stoptime                 353892 non-null  object 
 3   start station id         353892 non-null  int64  
 4   start station name       353892 non-null  object 
 5   start station latitude   353892 non-null  float64
 6   start station longitude  353892 non-null  float64
 7   end station id           353892 non-null  int64  
 8   end station name         353892 non-null  object 
 9   end station latitude     353892 non-null  float64
 10  end station longitude    353892 non-null  float64
 11  bikeid                   353892 non-null  int64  
 12  usertype                 353892 non-null  object 
 13  birth year               353892 non-null  int64  
 14  gend

### calculate travelled distances based on lat and lon


In [8]:
#!pip install haversine

In [9]:
from haversine import Unit
import haversine as hs

def find_distance(slat, slon, elat, elon):
    loc1=(slat,slon)
    loc2=(elat, elon)
    return round(hs.haversine(loc1,loc2,unit=Unit.MILES),1)

In [10]:

tripdata['dist'] = tripdata.apply(lambda r: find_distance(r['start station latitude'],r['start station longitude'],r['end station latitude'],r['end station longitude']) , axis=1)

In [11]:
tripdata.head(5)

Unnamed: 0,tripduration,starttime,stoptime,start station id,start station name,start station latitude,start station longitude,end station id,end station name,end station latitude,end station longitude,bikeid,usertype,birth year,gender,dist
0,932,2018-01-01 02:06:17.5410,2018-01-01 02:21:50.0270,3183,Exchange Place,40.716247,-74.033459,3199,Newport Pkwy,40.728745,-74.032108,31929,Subscriber,1992,1,0.9
1,550,2018-01-01 12:06:18.0390,2018-01-01 12:15:28.4430,3183,Exchange Place,40.716247,-74.033459,3199,Newport Pkwy,40.728745,-74.032108,31845,Subscriber,1969,2,0.9
2,510,2018-01-01 12:06:56.9780,2018-01-01 12:15:27.8100,3183,Exchange Place,40.716247,-74.033459,3199,Newport Pkwy,40.728745,-74.032108,31708,Subscriber,1946,1,0.9
3,354,2018-01-01 14:53:10.1860,2018-01-01 14:59:05.0960,3183,Exchange Place,40.716247,-74.033459,3267,Morris Canal,40.712419,-74.038526,31697,Subscriber,1994,1,0.4
4,250,2018-01-01 17:34:30.1920,2018-01-01 17:38:40.9840,3183,Exchange Place,40.716247,-74.033459,3639,Harborside,40.719252,-74.034234,31861,Subscriber,1991,1,0.2


## Explode the start and end date into different columns

In [12]:
tripdata['starttime'] = pd.to_datetime(tripdata['starttime'])
tripdata['stoptime'] = pd.to_datetime(tripdata['stoptime'])
tripdata['year'] = tripdata["starttime"].dt.year
tripdata["start_month"] = tripdata["starttime"].dt.month_name()
tripdata["end_month"] = tripdata["stoptime"].dt.month_name()
tripdata['start_day'] = tripdata["starttime"].dt.day_name()
tripdata['end_day'] = tripdata["starttime"].dt.day_name()
tripdata["start_hour"] = tripdata["starttime"].dt.hour
tripdata["end_hour"] = tripdata["stoptime"].dt.hour
tripdata["start_minute"] = tripdata["starttime"].dt.minute
tripdata["end_minute"] = tripdata["stoptime"].dt.minute

In [13]:
tripdata[tripdata["end_month"] != tripdata["start_month"]]

Unnamed: 0,tripduration,starttime,stoptime,start station id,start station name,start station latitude,start station longitude,end station id,end station name,end station latitude,...,dist,year,start_month,end_month,start_day,end_day,start_hour,end_hour,start_minute,end_minute
29147,159351,2018-03-31 14:31:51.868,2018-04-02 10:47:43.781,3184,Paulus Hook,40.714145,-74.033552,3183,Exchange Place,40.716247,...,0.1,2018,March,April,Saturday,Saturday,14,10,31,47
32263,196050,2018-03-31 11:36:00.512,2018-04-02 18:03:31.456,3187,Warren St,40.721124,-74.038051,3184,Paulus Hook,40.714145,...,0.5,2018,March,April,Saturday,Saturday,11,18,36,3
36486,270188,2018-03-30 15:59:08.517,2018-04-02 19:02:16.778,3203,Hamilton Park,40.727596,-74.044247,3214,Essex Light Rail,40.712774,...,1.1,2018,March,April,Friday,Friday,15,19,59,2
42038,120931,2018-03-31 08:13:15.850,2018-04-01 17:48:47.152,3273,Manila & 1st,40.721651,-74.042884,3186,Grove St PATH,40.719586,...,0.1,2018,March,April,Saturday,Saturday,8,17,13,48
59727,235117,2018-04-29 17:50:47.757,2018-05-02 11:09:25.359,3212,Christ Hospital,40.734786,-74.050444,3677,Glenwood Ave,40.727551,...,1.2,2018,April,May,Sunday,Sunday,17,11,50,9
63194,571578,2018-04-26 18:49:23.200,2018-05-03 09:35:41.241,3270,Jersey & 6th St,40.725289,-74.045572,3207,Oakland Ave,40.737604,...,0.9,2018,April,May,Thursday,Thursday,18,9,49,35
68219,375,2018-04-30 23:55:57.124,2018-05-01 00:02:12.886,3640,Journal Square,40.73367,-74.0625,3678,Fairmount Ave,40.725726,...,0.7,2018,April,May,Monday,Monday,23,0,55,2
71072,32293,2018-05-31 21:18:25.283,2018-06-01 06:16:38.412,3184,Paulus Hook,40.714145,-74.033552,3268,Lafayette Park,40.713464,...,1.5,2018,May,June,Thursday,Thursday,21,6,18,16
77331,917900,2018-05-26 18:29:16.344,2018-06-06 09:27:36.708,3192,Liberty Light Rail,40.711242,-74.055701,3426,JCBS Depot,40.709651,...,0.7,2018,May,June,Saturday,Saturday,18,9,29,27
87404,1307,2018-05-31 23:57:44.904,2018-06-01 00:19:31.997,3209,Brunswick St,40.724176,-74.050656,3186,Grove St PATH,40.719586,...,0.5,2018,May,June,Thursday,Thursday,23,0,57,19


In [14]:
tripdata[tripdata["start_day"] != tripdata["end_day"]]

Unnamed: 0,tripduration,starttime,stoptime,start station id,start station name,start station latitude,start station longitude,end station id,end station name,end station latitude,...,dist,year,start_month,end_month,start_day,end_day,start_hour,end_hour,start_minute,end_minute


In [15]:
tripdata[tripdata["start_hour"] != tripdata["end_hour"]]

Unnamed: 0,tripduration,starttime,stoptime,start station id,start station name,start station latitude,start station longitude,end station id,end station name,end station latitude,...,dist,year,start_month,end_month,start_day,end_day,start_hour,end_hour,start_minute,end_minute
8,318,2018-01-02 13:55:58.243,2018-01-02 14:01:16.881,3183,Exchange Place,40.716247,-74.033459,3275,Columbus Drive,40.718355,...,0.3,2018,January,January,Tuesday,Tuesday,13,14,55,1
9,1852,2018-01-02 16:55:29.639,2018-01-02 17:26:22.305,3183,Exchange Place,40.716247,-74.033459,3281,Leonard Gordon Park,40.745910,...,2.4,2018,January,January,Tuesday,Tuesday,16,17,55,26
18,230,2018-01-02 17:58:24.636,2018-01-02 18:02:15.632,3183,Exchange Place,40.716247,-74.033459,3267,Morris Canal,40.712419,...,0.4,2018,January,January,Tuesday,Tuesday,17,18,58,2
40,5045,2018-01-03 16:26:50.169,2018-01-03 17:50:55.973,3183,Exchange Place,40.716247,-74.033459,3183,Exchange Place,40.716247,...,0.0,2018,January,January,Wednesday,Wednesday,16,17,26,50
49,12537,2018-01-03 17:29:09.311,2018-01-03 20:58:07.287,3183,Exchange Place,40.716247,-74.033459,3214,Essex Light Rail,40.712774,...,0.3,2018,January,January,Wednesday,Wednesday,17,20,29,58
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
353858,577,2018-12-09 10:56:42.298,2018-12-09 11:06:19.965,3694,Jackson Square,40.711130,-74.078900,3195,Sip Ave,40.730743,...,1.6,2018,December,December,Sunday,Sunday,10,11,56,6
353868,640,2018-12-12 11:54:45.529,2018-12-12 12:05:25.763,3694,Jackson Square,40.711130,-74.078900,3195,Sip Ave,40.730743,...,1.6,2018,December,December,Wednesday,Wednesday,11,12,54,5
353875,649,2018-12-16 11:55:07.882,2018-12-16 12:05:57.426,3694,Jackson Square,40.711130,-74.078900,3195,Sip Ave,40.730743,...,1.6,2018,December,December,Sunday,Sunday,11,12,55,5
353887,1081,2018-12-22 11:51:46.060,2018-12-22 12:09:47.473,3694,Jackson Square,40.711130,-74.078900,3269,Brunswick & 6th,40.726012,...,1.8,2018,December,December,Saturday,Saturday,11,12,51,9


In [16]:
#!pip install holidays

In [17]:
from datetime import date
import holidays

# Select country
us_holidays = holidays.US()
data=[]
for ptr in holidays.US(years = 2018).items():
    data.append([ptr[0],ptr[1]])
    
df_holiday = pd.DataFrame(data, columns=['date','holiday'])
  
# print dataframe.
df_holiday   

Unnamed: 0,date,holiday
0,2018-01-01,New Year's Day
1,2018-01-15,Martin Luther King Jr. Day
2,2018-02-19,Washington's Birthday
3,2018-05-28,Memorial Day
4,2018-07-04,Independence Day
5,2018-09-03,Labor Day
6,2018-10-08,Columbus Day
7,2018-11-11,Veterans Day
8,2018-11-12,Veterans Day (Observed)
9,2018-11-22,Thanksgiving


### Add holiday details to trip data

In [18]:
tripdata['date'] = tripdata['starttime'].dt.date

In [19]:
df = pd.merge(
    left=tripdata, 
    right=df_holiday, 
    on='date',
    how='left'
)

In [20]:
df['holiday'].fillna('No Holiday', inplace=True)

In [21]:
df.head(10)

Unnamed: 0,tripduration,starttime,stoptime,start station id,start station name,start station latitude,start station longitude,end station id,end station name,end station latitude,...,start_month,end_month,start_day,end_day,start_hour,end_hour,start_minute,end_minute,date,holiday
0,932,2018-01-01 02:06:17.541,2018-01-01 02:21:50.027,3183,Exchange Place,40.716247,-74.033459,3199,Newport Pkwy,40.728745,...,January,January,Monday,Monday,2,2,6,21,2018-01-01,New Year's Day
1,550,2018-01-01 12:06:18.039,2018-01-01 12:15:28.443,3183,Exchange Place,40.716247,-74.033459,3199,Newport Pkwy,40.728745,...,January,January,Monday,Monday,12,12,6,15,2018-01-01,New Year's Day
2,510,2018-01-01 12:06:56.978,2018-01-01 12:15:27.810,3183,Exchange Place,40.716247,-74.033459,3199,Newport Pkwy,40.728745,...,January,January,Monday,Monday,12,12,6,15,2018-01-01,New Year's Day
3,354,2018-01-01 14:53:10.186,2018-01-01 14:59:05.096,3183,Exchange Place,40.716247,-74.033459,3267,Morris Canal,40.712419,...,January,January,Monday,Monday,14,14,53,59,2018-01-01,New Year's Day
4,250,2018-01-01 17:34:30.192,2018-01-01 17:38:40.984,3183,Exchange Place,40.716247,-74.033459,3639,Harborside,40.719252,...,January,January,Monday,Monday,17,17,34,38,2018-01-01,New Year's Day
5,613,2018-01-01 22:05:05.874,2018-01-01 22:15:19.419,3183,Exchange Place,40.716247,-74.033459,3203,Hamilton Park,40.727596,...,January,January,Monday,Monday,22,22,5,15,2018-01-01,New Year's Day
6,290,2018-01-02 12:13:51.794,2018-01-02 12:18:42.107,3183,Exchange Place,40.716247,-74.033459,3267,Morris Canal,40.712419,...,January,January,Tuesday,Tuesday,12,12,13,18,2018-01-02,No Holiday
7,381,2018-01-02 12:50:03.343,2018-01-02 12:56:24.644,3183,Exchange Place,40.716247,-74.033459,3205,JC Medical Center,40.71654,...,January,January,Tuesday,Tuesday,12,12,50,56,2018-01-02,No Holiday
8,318,2018-01-02 13:55:58.243,2018-01-02 14:01:16.881,3183,Exchange Place,40.716247,-74.033459,3275,Columbus Drive,40.718355,...,January,January,Tuesday,Tuesday,13,14,55,1,2018-01-02,No Holiday
9,1852,2018-01-02 16:55:29.639,2018-01-02 17:26:22.305,3183,Exchange Place,40.716247,-74.033459,3281,Leonard Gordon Park,40.74591,...,January,January,Tuesday,Tuesday,16,17,55,26,2018-01-02,No Holiday


In [22]:
df['date'] = pd.to_datetime(df['date'])

In [23]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 353892 entries, 0 to 353891
Data columns (total 27 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   tripduration             353892 non-null  int64         
 1   starttime                353892 non-null  datetime64[ns]
 2   stoptime                 353892 non-null  datetime64[ns]
 3   start station id         353892 non-null  int64         
 4   start station name       353892 non-null  object        
 5   start station latitude   353892 non-null  float64       
 6   start station longitude  353892 non-null  float64       
 7   end station id           353892 non-null  int64         
 8   end station name         353892 non-null  object        
 9   end station latitude     353892 non-null  float64       
 10  end station longitude    353892 non-null  float64       
 11  bikeid                   353892 non-null  int64         
 12  usertype        

## Read a weather data

In [24]:
weather_data = pd.read_csv("../dataset/nyc_weather_data.csv")
weather_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 365 entries, 0 to 364
Data columns (total 14 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   datetime        365 non-null    object 
 1   tempmax         365 non-null    float64
 2   tempmin         365 non-null    float64
 3   temp            365 non-null    float64
 4   dew             365 non-null    float64
 5   humidity        365 non-null    float64
 6   snow            365 non-null    float64
 7   snowdepth       365 non-null    float64
 8   windspeed       365 non-null    float64
 9   visibility      365 non-null    float64
 10  solarradiation  365 non-null    float64
 11  cloudcover      365 non-null    float64
 12  conditions      365 non-null    object 
 13  description     365 non-null    object 
dtypes: float64(11), object(3)
memory usage: 40.0+ KB


In [25]:
weather_data.isnull().sum()

datetime          0
tempmax           0
tempmin           0
temp              0
dew               0
humidity          0
snow              0
snowdepth         0
windspeed         0
visibility        0
solarradiation    0
cloudcover        0
conditions        0
description       0
dtype: int64

In [26]:
weather_data['date'] = pd.to_datetime(weather_data['datetime'])
weather_data.dtypes

datetime                  object
tempmax                  float64
tempmin                  float64
temp                     float64
dew                      float64
humidity                 float64
snow                     float64
snowdepth                float64
windspeed                float64
visibility               float64
solarradiation           float64
cloudcover               float64
conditions                object
description               object
date              datetime64[ns]
dtype: object

In [27]:
complete_df = pd.merge(
    left=df, 
    right=weather_data, 
    on='date',
    how='left'
)

In [28]:
complete_df.head()

Unnamed: 0,tripduration,starttime,stoptime,start station id,start station name,start station latitude,start station longitude,end station id,end station name,end station latitude,...,dew,humidity,snow,snowdepth,windspeed,visibility,solarradiation,cloudcover,conditions,description
0,932,2018-01-01 02:06:17.541,2018-01-01 02:21:50.027,3183,Exchange Place,40.716247,-74.033459,3199,Newport Pkwy,40.728745,...,-19.9,47.8,0.0,0.1,18.5,16.0,106.7,0.3,Clear,Clear conditions throughout the day.
1,550,2018-01-01 12:06:18.039,2018-01-01 12:15:28.443,3183,Exchange Place,40.716247,-74.033459,3199,Newport Pkwy,40.728745,...,-19.9,47.8,0.0,0.1,18.5,16.0,106.7,0.3,Clear,Clear conditions throughout the day.
2,510,2018-01-01 12:06:56.978,2018-01-01 12:15:27.810,3183,Exchange Place,40.716247,-74.033459,3199,Newport Pkwy,40.728745,...,-19.9,47.8,0.0,0.1,18.5,16.0,106.7,0.3,Clear,Clear conditions throughout the day.
3,354,2018-01-01 14:53:10.186,2018-01-01 14:59:05.096,3183,Exchange Place,40.716247,-74.033459,3267,Morris Canal,40.712419,...,-19.9,47.8,0.0,0.1,18.5,16.0,106.7,0.3,Clear,Clear conditions throughout the day.
4,250,2018-01-01 17:34:30.192,2018-01-01 17:38:40.984,3183,Exchange Place,40.716247,-74.033459,3639,Harborside,40.719252,...,-19.9,47.8,0.0,0.1,18.5,16.0,106.7,0.3,Clear,Clear conditions throughout the day.


In [29]:
complete_df.isnull().sum()

tripduration               0
starttime                  0
stoptime                   0
start station id           0
start station name         0
start station latitude     0
start station longitude    0
end station id             0
end station name           0
end station latitude       0
end station longitude      0
bikeid                     0
usertype                   0
birth year                 0
gender                     0
dist                       0
year                       0
start_month                0
end_month                  0
start_day                  0
end_day                    0
start_hour                 0
end_hour                   0
start_minute               0
end_minute                 0
date                       0
holiday                    0
datetime                   0
tempmax                    0
tempmin                    0
temp                       0
dew                        0
humidity                   0
snow                       0
snowdepth     

## add seasonality data
1. spring (March-May)
2. summer (June-August)
3. autumn (September-November) 
4. winter (December-February

In [30]:
def add_seasonality(month):
    if (month == 'March' or month == 'April' or month == 'May'):
        return 'spring'
    elif month == 'June' or month == 'July' or month == 'August':
        return 'summer'
    elif month == 'September' or month == 'October' or month == 'November':
        return 'autumn'
    elif month == 'December' or month == 'January' or month == 'February':
        return 'winter'

In [33]:

complete_df['seasons'] = complete_df.apply(lambda r: add_seasonality(r['start_month']) , axis=1)

In [34]:
complete_df

Unnamed: 0,tripduration,starttime,stoptime,start station id,start station name,start station latitude,start station longitude,end station id,end station name,end station latitude,...,humidity,snow,snowdepth,windspeed,visibility,solarradiation,cloudcover,conditions,description,seasons
0,932,2018-01-01 02:06:17.541,2018-01-01 02:21:50.027,3183,Exchange Place,40.716247,-74.033459,3199,Newport Pkwy,40.728745,...,47.8,0.0,0.1,18.5,16.0,106.7,0.3,Clear,Clear conditions throughout the day.,winter
1,550,2018-01-01 12:06:18.039,2018-01-01 12:15:28.443,3183,Exchange Place,40.716247,-74.033459,3199,Newport Pkwy,40.728745,...,47.8,0.0,0.1,18.5,16.0,106.7,0.3,Clear,Clear conditions throughout the day.,winter
2,510,2018-01-01 12:06:56.978,2018-01-01 12:15:27.810,3183,Exchange Place,40.716247,-74.033459,3199,Newport Pkwy,40.728745,...,47.8,0.0,0.1,18.5,16.0,106.7,0.3,Clear,Clear conditions throughout the day.,winter
3,354,2018-01-01 14:53:10.186,2018-01-01 14:59:05.096,3183,Exchange Place,40.716247,-74.033459,3267,Morris Canal,40.712419,...,47.8,0.0,0.1,18.5,16.0,106.7,0.3,Clear,Clear conditions throughout the day.,winter
4,250,2018-01-01 17:34:30.192,2018-01-01 17:38:40.984,3183,Exchange Place,40.716247,-74.033459,3639,Harborside,40.719252,...,47.8,0.0,0.1,18.5,16.0,106.7,0.3,Clear,Clear conditions throughout the day.,winter
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
353887,1081,2018-12-22 11:51:46.060,2018-12-22 12:09:47.473,3694,Jackson Square,40.711130,-74.078900,3269,Brunswick & 6th,40.726012,...,67.4,0.0,0.0,45.4,15.3,47.8,90.7,"Rain, Overcast",Cloudy skies throughout the day with early mor...,winter
353888,344,2018-12-25 21:40:09.866,2018-12-25 21:45:54.267,3694,Jackson Square,40.711130,-74.078900,3280,Astor Place,40.719282,...,52.8,0.0,0.0,24.9,16.0,86.5,33.7,Partially cloudy,Partly cloudy throughout the day.,winter
353889,1233,2018-12-29 12:55:45.969,2018-12-29 13:16:19.596,3694,Jackson Square,40.711130,-74.078900,3186,Grove St PATH,40.719586,...,63.9,0.0,0.0,39.2,15.8,94.2,64.6,Partially cloudy,Partly cloudy throughout the day.,winter
353890,1057,2018-12-30 15:32:09.332,2018-12-30 15:49:46.351,3694,Jackson Square,40.711130,-74.078900,3213,Van Vorst Park,40.718489,...,66.0,0.0,0.0,20.8,15.5,35.6,73.2,Partially cloudy,Partly cloudy throughout the day.,winter


In [35]:
complete_df['seasons'].unique()

array(['winter', 'spring', 'summer', 'autumn'], dtype=object)

In [36]:
complete_df.to_csv("../dataset/complete_bike_sharing.csv", index=False,)