## CitiBike data is collected for 1 year 2018
https://ride.citibikenyc.com/system-data

Data Schema:
Trip Duration (seconds)

*   Start Time and Date
*   Stop Time and Date
*   Start Station Name
*   End Station Name
*   Station ID
*   Station Lat/Long
*   Bike ID
*   User Type (Customer = 24-hour pass or 3-day pass user; Subscriber = Annual    Member)
*   Gender (Zero=unknown; 1=male; 2=female)
*   Year of Birth
 

In [128]:
import pandas as pd
import numpy as np
import haversine as hs
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
%matplotlib inline
from glob import iglob

import warnings
warnings.filterwarnings("ignore")


In [129]:
#!pip install haversine

In [130]:
path = r'D:\Beuth Sems\Thesis\dataset\JC-2018*.csv'

all_rec = iglob(path, recursive=True)     
dataframes = (pd.read_csv(f) for f in all_rec)
tripdata = pd.concat(dataframes, ignore_index=True)  

In [131]:
tripdata.describe()

Unnamed: 0,tripduration,start station id,start station latitude,start station longitude,end station id,end station latitude,end station longitude,bikeid,birth year,gender
count,353892.0,353892.0,353892.0,353892.0,353892.0,353892.0,353892.0,353892.0,353892.0,353892.0
mean,673.393,3264.996606,40.722724,-74.046039,3258.408418,40.722326,-74.045505,29452.498808,1980.387875,1.152388
std,7004.022,138.429108,0.007251,0.010755,147.610023,0.007095,0.010759,2529.992371,10.268528,0.500198
min,61.0,3183.0,40.69264,-74.096937,127.0,40.679331,-74.096937,14697.0,1887.0,0.0
25%,228.0,3192.0,40.718211,-74.050444,3186.0,40.717732,-74.049968,26315.0,1974.0,1.0
50%,335.0,3205.0,40.721525,-74.043845,3203.0,40.721124,-74.043117,29493.0,1983.0,1.0
75%,549.0,3272.0,40.727224,-74.038051,3272.0,40.727224,-74.037683,29679.0,1988.0,1.0
max,2061932.0,3694.0,40.748716,-74.032108,3694.0,40.814326,-73.947821,35009.0,2002.0,2.0


In [132]:
tripdata.isnull().sum()

tripduration               0
starttime                  0
stoptime                   0
start station id           0
start station name         0
start station latitude     0
start station longitude    0
end station id             0
end station name           0
end station latitude       0
end station longitude      0
bikeid                     0
usertype                   0
birth year                 0
gender                     0
dtype: int64

In [133]:
tripdata.shape

(353892, 15)

In [134]:
tripdata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 353892 entries, 0 to 353891
Data columns (total 15 columns):
 #   Column                   Non-Null Count   Dtype  
---  ------                   --------------   -----  
 0   tripduration             353892 non-null  int64  
 1   starttime                353892 non-null  object 
 2   stoptime                 353892 non-null  object 
 3   start station id         353892 non-null  int64  
 4   start station name       353892 non-null  object 
 5   start station latitude   353892 non-null  float64
 6   start station longitude  353892 non-null  float64
 7   end station id           353892 non-null  int64  
 8   end station name         353892 non-null  object 
 9   end station latitude     353892 non-null  float64
 10  end station longitude    353892 non-null  float64
 11  bikeid                   353892 non-null  int64  
 12  usertype                 353892 non-null  object 
 13  birth year               353892 non-null  int64  
 14  gend

### Explode the start and end date into different columns


In [135]:
tripdata['starttime'] = pd.to_datetime(tripdata['starttime'])
tripdata['stoptime'] = pd.to_datetime(tripdata['stoptime'])
tripdata['date'] = tripdata['starttime'].dt.date
tripdata["month"] = tripdata["starttime"].dt.month
# tripdata['day'] = tripdata["starttime"].dt.day_name()
# tripdata["month"] = tripdata["starttime"].dt.month
tripdata['day'] = tripdata["starttime"].dt.day
tripdata["hour"] = tripdata["starttime"].dt.hour
tripdata["min"] = tripdata["starttime"].dt.minute
tripdata["year"] =tripdata['starttime'].dt.year

In [136]:
## Convert seconds tripduration into miles
tripdata['tripduration'] = np.round(tripdata['tripduration']/60,2)

## https://www.visualcrossing.com/weather/weather-data-services

temp ---	C
dew	 --- DC
feelslike --- C
precip --- mm
precipprob----%
snow----cm
snowdepth---cm
windspeed---kph
winddir---degrees
visibility---km
cloudcover---%
humidity---%
pressure----mb
solarradiation---W/m2
solarenergy---MJ/m2

In [137]:
data1 = pd.read_csv("../../dataset/weather/jan2018.csv")

In [138]:
data2 = pd.read_csv("../../dataset/weather/dec2018.csv")

In [139]:
data1.isnull().sum()

name                  0
datetime              0
temp                  0
feelslike             0
dew                   0
humidity              0
precip                0
precipprob            0
preciptype          638
snow                  0
snowdepth             0
windgust            383
windspeed             0
winddir               0
sealevelpressure      0
cloudcover            0
visibility            0
solarradiation        0
solarenergy         353
uvindex               0
severerisk          744
conditions            0
icon                  0
stations              0
dtype: int64

In [140]:
data2.isnull().sum()

name                   0
datetime               0
temp                   0
feelslike              0
dew                    0
humidity               0
precip                 0
precipprob             0
preciptype          6715
snow                   0
snowdepth              0
windgust            5709
windspeed              0
winddir                0
sealevelpressure       0
cloudcover             0
visibility             0
solarradiation         0
solarenergy         3334
uvindex                0
severerisk          8016
conditions             0
icon                   0
stations               0
dtype: int64

In [141]:
data1.drop(['preciptype', 'windgust','solarenergy','severerisk'], axis=1, inplace=True)
data2.drop(['preciptype', 'windgust','solarenergy','severerisk'], axis=1, inplace=True)

In [142]:
d_con = [data1, data2]
weather_data = pd.concat(d_con)

In [143]:
weather_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8760 entries, 0 to 8015
Data columns (total 20 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   name              8760 non-null   object 
 1   datetime          8760 non-null   object 
 2   temp              8760 non-null   float64
 3   feelslike         8760 non-null   float64
 4   dew               8760 non-null   float64
 5   humidity          8760 non-null   float64
 6   precip            8760 non-null   float64
 7   precipprob        8760 non-null   int64  
 8   snow              8760 non-null   float64
 9   snowdepth         8760 non-null   float64
 10  windspeed         8760 non-null   float64
 11  winddir           8760 non-null   float64
 12  sealevelpressure  8760 non-null   float64
 13  cloudcover        8760 non-null   float64
 14  visibility        8760 non-null   float64
 15  solarradiation    8760 non-null   float64
 16  uvindex           8760 non-null   int64  


In [144]:
weather_data['datetime'] = pd.to_datetime(weather_data['datetime'])
weather_data['date'] = weather_data['datetime'].dt.date
# data["month"] = data["datetime"].dt.month_name()
weather_data['day'] = weather_data["datetime"].dt.day
weather_data["month"] = weather_data["datetime"].dt.month
# data['day'] = data["datetime"].dt.day_name()
weather_data["hour"] = weather_data["datetime"].dt.hour
weather_data["min"] = weather_data["datetime"].dt.minute
weather_data["year"] = weather_data['datetime'].dt.year

In [145]:
## Convert windspeed from kph to mph
weather_data['windspeed'] = np.round(weather_data['windspeed']/1.609344)

In [146]:
## Convert visibility km to miles
##1 km is equals to 0.62137119 miles
weather_data['visibility'] = np.round(weather_data['visibility']*0.62137119)

In [147]:
tripdata.drop(["stoptime","year","date"], axis=1, inplace=True)

In [148]:
weather_data.drop(["name","datetime","year","stations", "icon", "min"], axis=1, inplace=True)

In [149]:
result = pd.merge(tripdata, weather_data, on=["month", "day", "hour"], how="left")

In [150]:
result.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 353897 entries, 0 to 353896
Data columns (total 35 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   tripduration             353897 non-null  float64       
 1   starttime                353897 non-null  datetime64[ns]
 2   start station id         353897 non-null  int64         
 3   start station name       353897 non-null  object        
 4   start station latitude   353897 non-null  float64       
 5   start station longitude  353897 non-null  float64       
 6   end station id           353897 non-null  int64         
 7   end station name         353897 non-null  object        
 8   end station latitude     353897 non-null  float64       
 9   end station longitude    353897 non-null  float64       
 10  bikeid                   353897 non-null  int64         
 11  usertype                 353897 non-null  object        
 12  birth year      

In [151]:
result.head(3)

Unnamed: 0,tripduration,starttime,start station id,start station name,start station latitude,start station longitude,end station id,end station name,end station latitude,end station longitude,...,snowdepth,windspeed,winddir,sealevelpressure,cloudcover,visibility,solarradiation,uvindex,conditions,date
0,15.53,2018-01-01 02:06:17.541,3183,Exchange Place,40.716247,-74.033459,3199,Newport Pkwy,40.728745,-74.032108,...,0.0,3.0,295.0,1026.8,0.0,6.0,0.0,0,Clear,2018-01-01
1,9.17,2018-01-01 12:06:18.039,3183,Exchange Place,40.716247,-74.033459,3199,Newport Pkwy,40.728745,-74.032108,...,0.0,4.0,302.0,1028.0,0.2,6.0,493.0,5,Clear,2018-01-01
2,8.5,2018-01-01 12:06:56.978,3183,Exchange Place,40.716247,-74.033459,3199,Newport Pkwy,40.728745,-74.032108,...,0.0,4.0,302.0,1028.0,0.2,6.0,493.0,5,Clear,2018-01-01


manhatten distnace calculated using :
https://medium.com/@simplyjk/why-manhattan-distance-formula-doesnt-apply-to-manhattan-7db0ebb1c5f6


In [152]:
def haversine(lat1, lon1, lat2, lon2):     
    R = 3958.76 # Earth radius in miles          
    dLat, dLon, lat1, lat2 = np.radians(lat2 - lat1), np.radians(lon2 - lon1),  np.radians(lat1), np.radians(lat2)     
    
    a =  np.sin(dLat/2) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(dLon/2) ** 2     
    c = 2 * np.arcsin(np.sqrt(a))          
    return R * c

In [153]:
def manhattan_dist(lat1, lon1, lat2, lon2):
    
    # Pickup coordinates
    p = np.stack([lat1, lon1], axis = 1)
    
    # Dropoff coordinates
    d = np.stack([lat2, lon2], axis = 1)
    
    theta1 = np.radians(-28.904)
    theta2 = np.radians(28.904)
    
    ## Rotation matrix
    R1 = np.array([[np.cos(theta1), np.sin(theta1)], 
                   [-np.sin(theta1), np.cos(theta1)]]
                 )
    R2 = np.array([[np.cos(theta2), np.sin(theta2)], 
                   [-np.sin(theta2), np.cos(theta2)]]
                 )
    
    # Rotate Pickup and Dropoff coordinates by -29 degress in World2
    pT = R1 @ p.T  
    dT = R1 @ d.T  
    
    # Coordinates of Hinge point in the rotated world 
    vT = np.stack((pT[0,:], dT[1,:]))
    # Coordinates of Hinge point in the real world 
    v = R2 @ vT
    """ Finally,
    
    Manhattan distance 
            = 
    Haversine dist between Pickup & Hingept
            +
    Haversine dist between Hinge pt & Dropoff location 
    """
    
    return (haversine(p.T[0], p.T[1], v[0], v[1]) + 
            haversine(v[0], v[1], d.T[0], d.T[1])
           )

In [154]:
result["dist"] = manhattan_dist(
    result["start station latitude"], result["start station longitude"],
    result["end station latitude"], result["end station longitude"]
)

In [155]:
result["dist"]

0         1.084267
1         1.084267
2         1.084267
3         0.415696
4         0.240932
            ...   
353892    1.872980
353893    0.828647
353894    2.657139
353895    2.315132
353896    0.902881
Name: dist, Length: 353897, dtype: float64

In [156]:
result.head(5)

Unnamed: 0,tripduration,starttime,start station id,start station name,start station latitude,start station longitude,end station id,end station name,end station latitude,end station longitude,...,windspeed,winddir,sealevelpressure,cloudcover,visibility,solarradiation,uvindex,conditions,date,dist
0,15.53,2018-01-01 02:06:17.541,3183,Exchange Place,40.716247,-74.033459,3199,Newport Pkwy,40.728745,-74.032108,...,3.0,295.0,1026.8,0.0,6.0,0.0,0,Clear,2018-01-01,1.084267
1,9.17,2018-01-01 12:06:18.039,3183,Exchange Place,40.716247,-74.033459,3199,Newport Pkwy,40.728745,-74.032108,...,4.0,302.0,1028.0,0.2,6.0,493.0,5,Clear,2018-01-01,1.084267
2,8.5,2018-01-01 12:06:56.978,3183,Exchange Place,40.716247,-74.033459,3199,Newport Pkwy,40.728745,-74.032108,...,4.0,302.0,1028.0,0.2,6.0,493.0,5,Clear,2018-01-01,1.084267
3,5.9,2018-01-01 14:53:10.186,3183,Exchange Place,40.716247,-74.033459,3267,Morris Canal,40.712419,-74.038526,...,0.0,354.0,1027.0,0.2,6.0,392.0,4,Clear,2018-01-01,0.415696
4,4.17,2018-01-01 17:34:30.192,3183,Exchange Place,40.716247,-74.033459,3639,Harborside,40.719252,-74.034234,...,4.0,300.0,1027.1,0.2,6.0,10.0,0,Clear,2018-01-01,0.240932


## Explode the start and end date into different columns

In [157]:
result.columns

Index(['tripduration', 'starttime', 'start station id', 'start station name',
       'start station latitude', 'start station longitude', 'end station id',
       'end station name', 'end station latitude', 'end station longitude',
       'bikeid', 'usertype', 'birth year', 'gender', 'month', 'day', 'hour',
       'min', 'temp', 'feelslike', 'dew', 'humidity', 'precip', 'precipprob',
       'snow', 'snowdepth', 'windspeed', 'winddir', 'sealevelpressure',
       'cloudcover', 'visibility', 'solarradiation', 'uvindex', 'conditions',
       'date', 'dist'],
      dtype='object')

In [158]:
result['birthyear'] = pd.to_numeric(result['birth year'], downcast='integer')
result['years_old'] = 2018 - result['birthyear'] 
Age_Groups = ["<20", "20-29", "30-39", "40-49", "50-59", "60-64","65+"]
Age_Groups_Limits = [0, 20, 30, 40, 50, 60, 65, np.inf]
Age_Min = 0
Age_Max = 100
result["age_group"] = pd.cut(result["years_old"], Age_Groups_Limits, labels=Age_Groups)

In [159]:
#!pip install holidays

In [160]:
from datetime import date
import holidays

# Select country
Germany_holidays = holidays.NYSE()
data=[]
for ptr in holidays.NYSE(years = 2018).items():
    data.append([ptr[0],ptr[1]])
    
df_holiday = pd.DataFrame(data, columns=['date','holiday'])
  
# print dataframe.
df_holiday 

Unnamed: 0,date,holiday
0,2018-01-01,New Year's Day
1,2018-01-15,Martin Luther King Jr. Day
2,2018-02-19,Washington's Birthday
3,2018-03-30,Good Friday
4,2018-05-28,Memorial Day
5,2018-07-04,Independence Day
6,2018-09-03,Labor Day
7,2018-11-22,Thanksgiving Day
8,2018-12-25,Christmas Day


### Add holiday details to trip data

In [161]:
df = pd.merge(
    left=result, 
    right=df_holiday, 
    on='date',
    how='left'
)

In [162]:
df['holiday'].fillna('No Holiday', inplace=True)

In [163]:
df.head(3)

Unnamed: 0,tripduration,starttime,start station id,start station name,start station latitude,start station longitude,end station id,end station name,end station latitude,end station longitude,...,visibility,solarradiation,uvindex,conditions,date,dist,birthyear,years_old,age_group,holiday
0,15.53,2018-01-01 02:06:17.541,3183,Exchange Place,40.716247,-74.033459,3199,Newport Pkwy,40.728745,-74.032108,...,6.0,0.0,0,Clear,2018-01-01,1.084267,1992,26,20-29,New Year's Day
1,9.17,2018-01-01 12:06:18.039,3183,Exchange Place,40.716247,-74.033459,3199,Newport Pkwy,40.728745,-74.032108,...,6.0,493.0,5,Clear,2018-01-01,1.084267,1969,49,40-49,New Year's Day
2,8.5,2018-01-01 12:06:56.978,3183,Exchange Place,40.716247,-74.033459,3199,Newport Pkwy,40.728745,-74.032108,...,6.0,493.0,5,Clear,2018-01-01,1.084267,1946,72,65+,New Year's Day


In [164]:
df.drop(['day','month'], axis=1, inplace=True)

In [165]:
df['starttime'] = pd.to_datetime(df['starttime'])
df['day'] = df["starttime"].dt.day_name()
df["month"] = df["starttime"].dt.month_name()

## add seasonality data
1. spring (March-May)
2. summer (June-August)
3. autumn (September-November) 
4. winter (December-February

In [166]:
def add_seasonality(month):
    if (month == 'March' or month == 'April' or month == 'May'):
        return 'spring'
    elif month == 'June' or month == 'July' or month == 'August':
        return 'summer'
    elif month == 'September' or month == 'October' or month == 'November':
        return 'autumn'
    elif month == 'December' or month == 'January' or month == 'February':
        return 'winter'

In [167]:
df['month'].unique()

array(['January', 'February', 'March', 'April', 'May', 'June', 'July',
       'August', 'September', 'October', 'November', 'December'],
      dtype=object)

In [168]:
df['seasons'] = df.apply(lambda r: add_seasonality(r['month']) , axis=1)

In [169]:
df.head(5)

Unnamed: 0,tripduration,starttime,start station id,start station name,start station latitude,start station longitude,end station id,end station name,end station latitude,end station longitude,...,conditions,date,dist,birthyear,years_old,age_group,holiday,day,month,seasons
0,15.53,2018-01-01 02:06:17.541,3183,Exchange Place,40.716247,-74.033459,3199,Newport Pkwy,40.728745,-74.032108,...,Clear,2018-01-01,1.084267,1992,26,20-29,New Year's Day,Monday,January,winter
1,9.17,2018-01-01 12:06:18.039,3183,Exchange Place,40.716247,-74.033459,3199,Newport Pkwy,40.728745,-74.032108,...,Clear,2018-01-01,1.084267,1969,49,40-49,New Year's Day,Monday,January,winter
2,8.5,2018-01-01 12:06:56.978,3183,Exchange Place,40.716247,-74.033459,3199,Newport Pkwy,40.728745,-74.032108,...,Clear,2018-01-01,1.084267,1946,72,65+,New Year's Day,Monday,January,winter
3,5.9,2018-01-01 14:53:10.186,3183,Exchange Place,40.716247,-74.033459,3267,Morris Canal,40.712419,-74.038526,...,Clear,2018-01-01,0.415696,1994,24,20-29,New Year's Day,Monday,January,winter
4,4.17,2018-01-01 17:34:30.192,3183,Exchange Place,40.716247,-74.033459,3639,Harborside,40.719252,-74.034234,...,Clear,2018-01-01,0.240932,1991,27,20-29,New Year's Day,Monday,January,winter


In [170]:
df['seasons'].unique()

array(['winter', 'spring', 'summer', 'autumn'], dtype=object)

In [171]:
## Convert gender into categorical value
##Gender (Zero=unknown; 1=male; 2=female)
def add_gender_cat_values(gender):
    if (gender == 0):
        return 'unknown'
    elif gender == 1:
        return 'male'
    elif gender == 2:
        return 'female'

In [172]:
df['gender_cat'] = df.apply(lambda r: add_gender_cat_values(r['gender']) , axis=1)

In [173]:
df['gender_cat'].unique()

array(['male', 'female', 'unknown'], dtype=object)

In [174]:
df.tail(2)

Unnamed: 0,tripduration,starttime,start station id,start station name,start station latitude,start station longitude,end station id,end station name,end station latitude,end station longitude,...,date,dist,birthyear,years_old,age_group,holiday,day,month,seasons,gender_cat
353895,17.62,2018-12-30 15:32:09.332,3694,Jackson Square,40.71113,-74.0789,3213,Van Vorst Park,40.718489,-74.047727,...,2018-12-30,2.315132,1991,27,20-29,No Holiday,Sunday,December,winter,female
353896,5.02,2018-12-31 16:34:11.934,3694,Jackson Square,40.71113,-74.0789,3277,Communipaw & Berry Lane,40.714358,-74.066611,...,2018-12-31,0.902881,1991,27,20-29,No Holiday,Monday,December,winter,male


In [175]:
df.columns

Index(['tripduration', 'starttime', 'start station id', 'start station name',
       'start station latitude', 'start station longitude', 'end station id',
       'end station name', 'end station latitude', 'end station longitude',
       'bikeid', 'usertype', 'birth year', 'gender', 'hour', 'min', 'temp',
       'feelslike', 'dew', 'humidity', 'precip', 'precipprob', 'snow',
       'snowdepth', 'windspeed', 'winddir', 'sealevelpressure', 'cloudcover',
       'visibility', 'solarradiation', 'uvindex', 'conditions', 'date', 'dist',
       'birthyear', 'years_old', 'age_group', 'holiday', 'day', 'month',
       'seasons', 'gender_cat'],
      dtype='object')

In [176]:
df.drop(['gender','starttime'], axis=1, inplace=True)

In [177]:
df.columns

Index(['tripduration', 'start station id', 'start station name',
       'start station latitude', 'start station longitude', 'end station id',
       'end station name', 'end station latitude', 'end station longitude',
       'bikeid', 'usertype', 'birth year', 'hour', 'min', 'temp', 'feelslike',
       'dew', 'humidity', 'precip', 'precipprob', 'snow', 'snowdepth',
       'windspeed', 'winddir', 'sealevelpressure', 'cloudcover', 'visibility',
       'solarradiation', 'uvindex', 'conditions', 'date', 'dist', 'birthyear',
       'years_old', 'age_group', 'holiday', 'day', 'month', 'seasons',
       'gender_cat'],
      dtype='object')

In [178]:
df.rename(columns={"start station id": "start_station_id", "start station name": "start_station_name", "start station latitude": "start_lat",
                  "start station longitude":"start_lon","end station id":"end_station_id","end station name":"end_station_name","end station latitude":"end_station_name",
                  "end station longitude":"end_lon","end station latitude":"end_lat", "gender_cat": "gender"},inplace=True)

In [179]:
df.drop(['birth year'], axis=1, inplace=True)

In [180]:
df.columns

Index(['tripduration', 'start_station_id', 'start_station_name', 'start_lat',
       'start_lon', 'end_station_id', 'end_station_name', 'end_lat', 'end_lon',
       'bikeid', 'usertype', 'hour', 'min', 'temp', 'feelslike', 'dew',
       'humidity', 'precip', 'precipprob', 'snow', 'snowdepth', 'windspeed',
       'winddir', 'sealevelpressure', 'cloudcover', 'visibility',
       'solarradiation', 'uvindex', 'conditions', 'date', 'dist', 'birthyear',
       'years_old', 'age_group', 'holiday', 'day', 'month', 'seasons',
       'gender'],
      dtype='object')

### convert start_station_id, end_station_id and bikeidas categorical values

In [181]:
def convert_to_categorical_end_station_id(x):
    val = "es_"+ str(x)
    return val
    
df['end_station_id'] = df['end_station_id'].apply(convert_to_categorical)


In [182]:
def convert_to_categorical_start_station_id(x):
    val = "ss_"+ str(x)
    return val
    
df['start_station_id'] = df['start_station_id'].apply(convert_to_categorical_start_station_id)


In [183]:
def convert_to_categorical_bikeid(x):
    val = "b_"+ str(x)
    return val
    
df['bikeid'] = df['bikeid'].apply(convert_to_categorical_bikeid)


In [184]:
df.columns

Index(['tripduration', 'start_station_id', 'start_station_name', 'start_lat',
       'start_lon', 'end_station_id', 'end_station_name', 'end_lat', 'end_lon',
       'bikeid', 'usertype', 'hour', 'min', 'temp', 'feelslike', 'dew',
       'humidity', 'precip', 'precipprob', 'snow', 'snowdepth', 'windspeed',
       'winddir', 'sealevelpressure', 'cloudcover', 'visibility',
       'solarradiation', 'uvindex', 'conditions', 'date', 'dist', 'birthyear',
       'years_old', 'age_group', 'holiday', 'day', 'month', 'seasons',
       'gender'],
      dtype='object')

In [185]:
df.head()

Unnamed: 0,tripduration,start_station_id,start_station_name,start_lat,start_lon,end_station_id,end_station_name,end_lat,end_lon,bikeid,...,date,dist,birthyear,years_old,age_group,holiday,day,month,seasons,gender
0,15.53,ss_3183,Exchange Place,40.716247,-74.033459,es_3199,Newport Pkwy,40.728745,-74.032108,b_31929,...,2018-01-01,1.084267,1992,26,20-29,New Year's Day,Monday,January,winter,male
1,9.17,ss_3183,Exchange Place,40.716247,-74.033459,es_3199,Newport Pkwy,40.728745,-74.032108,b_31845,...,2018-01-01,1.084267,1969,49,40-49,New Year's Day,Monday,January,winter,female
2,8.5,ss_3183,Exchange Place,40.716247,-74.033459,es_3199,Newport Pkwy,40.728745,-74.032108,b_31708,...,2018-01-01,1.084267,1946,72,65+,New Year's Day,Monday,January,winter,male
3,5.9,ss_3183,Exchange Place,40.716247,-74.033459,es_3267,Morris Canal,40.712419,-74.038526,b_31697,...,2018-01-01,0.415696,1994,24,20-29,New Year's Day,Monday,January,winter,male
4,4.17,ss_3183,Exchange Place,40.716247,-74.033459,es_3639,Harborside,40.719252,-74.034234,b_31861,...,2018-01-01,0.240932,1991,27,20-29,New Year's Day,Monday,January,winter,male


In [186]:
df.to_csv("../../dataset/complete_bike_sharing.csv", index=False,)