In [None]:
import pandas as pd
import numpy as np
import datetime

In [None]:
df= pd.read_csv('/content/drive/MyDrive/CSCI 5502 Data Mining/01 Project/Data/Bcycle_data_to_date.csv')
df.head()

Unnamed: 0,station_last_updated,status_last_reported,station_id,station_name,station_address,station_longitude,station_latitude,station_is_returning,station_is_renting,station_is_installed,station_type,docks_available,bikes_available,electic_bikes_available,smart_bikes_available,classic_bikes_available,datatime_mtd
0,1697507102,1697507104,bcycle_boulder_1855,Folsom & Colorado,SE corner of Folsom & Colorado,-105.26385,40.00811,1,1,1,Kiosk and Station,15,0,0,0,0,10-16-23 19:45:03
1,1697507102,1697507104,bcycle_boulder_1858,15th & Pearl,15th Street & Pearl Street,-105.27584,40.01872,1,1,1,Kiosk and Station,6,9,9,0,0,10-16-23 19:45:03
2,1697507102,1697507104,bcycle_boulder_1859,11th & Pearl,11th Street & Pearl Street,-105.28116,40.01747,1,1,1,Kiosk and Station,14,0,0,0,0,10-16-23 19:45:03
3,1697507102,1697507104,bcycle_boulder_1860,13th & Spruce,13th Street & Spruce Street,-105.2789,40.01909,1,1,1,Kiosk and Station,11,3,3,0,0,10-16-23 19:45:03
4,1697507102,1697507104,bcycle_boulder_1861,UCAR Center Green,3080 Center Green Drive,-105.24611,40.03154,1,1,1,Kiosk and Station,11,0,0,0,0,10-16-23 19:45:03


## 1. Format and roundoff Datetime

In [None]:
# Combine other additional features: Temperate, CU class and event schedule


# Fix the datetime column format
df['datatime_mtd'] = pd.to_datetime(df['datatime_mtd'], format='%m-%d-%y %H:%M:%S')

# Round off the datatime to the previous ideal time interval (3 or 1 min)
time_interval_to_round= 3
df['datatime_mtd'] = pd.to_datetime(df['datatime_mtd'], format='%Y-%m-%d %H:%M:%S')
df['datatime_rnd'] = df['datatime_mtd'].apply(lambda x: x - pd.to_timedelta(x.minute % time_interval_to_round, unit='m') - pd.to_timedelta(x.second, unit='s')) # This rounds off the minute and makes the seconds zero

# However, rounding off values will create duplicates, so the below line will take only the first entry of every duplicate. For example, Minute 3, 4, and 5 will become 3- so this takes the first entry (3) as that is the closest representation
df.drop_duplicates(subset=['station_id', 'datatime_rnd'], keep='first', inplace=True)
  # print("Number of duplicates:", df.duplicated(subset=['station_id', 'datatime_rnd']).sum()) # To check duplicates
  # print("Number of duplicates per combination:\n\n", df.groupby(['station_id', 'datatime_rnd']).size().reset_index(name='count_of_duplicates').sort_values(by='count_of_duplicates', ascending=False)) # To check duplicates per combination
'''
  #Alternate way: Just make the seconds zero and multiply the numerator of minute divided by [req_time_interval] with [req_time_interval]

  req_time_interval= 3
  df[min_rounded] = int(df['min']/req_time_interval) * req_time_interval
'''
# Split dates and times of from the rounded off datatime

df['date_rnd']= df['datatime_rnd'].dt.date
df['year_rnd'] = df['datatime_rnd'].dt.year
df['month_rnd'] = df['datatime_rnd'].dt.month
df['day_rnd'] = df['datatime_rnd'].dt.day
df['day_of_week_rnd'] = df['datatime_rnd'].dt.day_name()
df['time_rnd']= df['datatime_rnd'].dt.time
df['hour_rnd'] = df['datatime_rnd'].dt.hour
df['minute_rnd'] = df['datatime_rnd'].dt.minute
df['second_rnd'] = df['datatime_rnd'].dt.second

df.head(5)

Unnamed: 0,station_last_updated,status_last_reported,station_id,station_name,station_address,station_longitude,station_latitude,station_is_returning,station_is_renting,station_is_installed,...,datatime_rnd,date_rnd,year_rnd,month_rnd,day_rnd,day_of_week_rnd,time_rnd,hour_rnd,minute_rnd,second_rnd
0,1697507102,1697507104,bcycle_boulder_1855,Folsom & Colorado,SE corner of Folsom & Colorado,-105.26385,40.00811,1,1,1,...,2023-10-16 19:45:00,2023-10-16,2023,10,16,Monday,19:45:00,19,45,0
1,1697507102,1697507104,bcycle_boulder_1858,15th & Pearl,15th Street & Pearl Street,-105.27584,40.01872,1,1,1,...,2023-10-16 19:45:00,2023-10-16,2023,10,16,Monday,19:45:00,19,45,0
2,1697507102,1697507104,bcycle_boulder_1859,11th & Pearl,11th Street & Pearl Street,-105.28116,40.01747,1,1,1,...,2023-10-16 19:45:00,2023-10-16,2023,10,16,Monday,19:45:00,19,45,0
3,1697507102,1697507104,bcycle_boulder_1860,13th & Spruce,13th Street & Spruce Street,-105.2789,40.01909,1,1,1,...,2023-10-16 19:45:00,2023-10-16,2023,10,16,Monday,19:45:00,19,45,0
4,1697507102,1697507104,bcycle_boulder_1861,UCAR Center Green,3080 Center Green Drive,-105.24611,40.03154,1,1,1,...,2023-10-16 19:45:00,2023-10-16,2023,10,16,Monday,19:45:00,19,45,0


## 2. Calculate the total station capacity

It might have high correlation with the target. For example, busy station having low station capacity might always have less bikes availabe.

In [None]:
# Add station capacity = docks + bikes available
df['station_capacity']= df['docks_available'] + df['bikes_available']

## 3 Check for missing dates and manage them

* If just a few rows are missing due to latency in data capturing, then just replace them with the previous datetime status.
* If anything more than a quarter day is missing, then replicate the values with similar availabe date.

## 4. Calculate the number of new cycles and docks available in each station for every time instance

In [None]:
# Check for missing dates by creating an ideal dataframe and manage them

### 4.1 Create a grouped dataframe and calculate new docks and bikes available for every station at every datetime

In [None]:
# Sort the date by datetime
df = df.sort_values(by='datatime_mtd')

# Group by station_id and calculate # new cycles and new docks available
  # Why group by? If not the new-docks_avlb will be calculated by just subtracting the above row- which will be a different station_id
grouped_df = df.groupby(['station_id', 'datatime_mtd']).agg({'docks_available': 'sum', 'bikes_available': 'sum'}).reset_index()

# Calculate the differences with the previous row and handle the cases when it's NULL or <= 0
grouped_df['new_docks_available'] = grouped_df['docks_available'].diff().fillna(0).clip(lower=0).astype(int)
grouped_df['new_bikes_available'] = grouped_df['bikes_available'].diff().fillna(0).clip(lower=0).astype(int)

grouped_df.head(10)

Unnamed: 0,station_id,datatime_mtd,docks_available,bikes_available,new_docks_available,new_bikes_available
0,bcycle_boulder_1855,2023-10-16 19:45:03,15,0,0,0
1,bcycle_boulder_1855,2023-10-16 20:00:03,14,1,0,1
2,bcycle_boulder_1855,2023-10-16 20:16:20,15,0,1,0
3,bcycle_boulder_1855,2023-10-16 20:38:01,15,0,0,0
4,bcycle_boulder_1855,2023-10-16 20:49:14,14,1,0,1
5,bcycle_boulder_1855,2023-10-16 21:03:41,15,0,1,0
6,bcycle_boulder_1855,2023-10-16 21:19:04,15,0,0,0
7,bcycle_boulder_1855,2023-10-16 22:01:03,12,3,0,3
8,bcycle_boulder_1855,2023-10-16 22:18:27,13,2,1,0
9,bcycle_boulder_1855,2023-10-16 22:33:03,15,0,2,0


SQL Query to perform the same operation

SELECT
	STATION_ID,
	DATATIME_MTD,
	SUM(DOCKS_AVAILABLE) AS DOCKS_AVAILABLE,
	CASE
		WHEN SUM(DOCKS_AVAILABLE) - LAG(SUM(DOCKS_AVAILABLE)) OVER (ORDER BY STATION_ID, DATATIME_MTD) IS NULL THEN 0
        WHEN SUM(DOCKS_AVAILABLE) - LAG(SUM(DOCKS_AVAILABLE)) OVER (ORDER BY STATION_ID, DATATIME_MTD) <= 0 THEN 0
        ELSE SUM(DOCKS_AVAILABLE) - LAG(SUM(DOCKS_AVAILABLE)) OVER (ORDER BY STATION_ID, DATATIME_MTD)
    END AS NEW_DOCKS_AVAILABILE,
	SUM(BIKES_AVAILABLE) AS BIKES_AVAILABLE,
	CASE
		WHEN SUM(BIKES_AVAILABLE) - LAG(SUM(BIKES_AVAILABLE)) OVER (ORDER BY STATION_ID, DATATIME_MTD) IS NULL THEN 0
        WHEN SUM(BIKES_AVAILABLE) - LAG(SUM(BIKES_AVAILABLE)) OVER (ORDER BY STATION_ID, DATATIME_MTD) <= 0 THEN 0
        ELSE SUM(BIKES_AVAILABLE) - LAG(SUM(BIKES_AVAILABLE)) OVER (ORDER BY STATION_ID, DATATIME_MTD)
    END AS NEW_BIKES_AVAILABILE
FROM
	mastertables."Bcycle_Status_TD"
WHERE
	STATION_IS_RETURNING = 1
	AND STATION_IS_INSTALLED = 1
	AND STATION_IS_RENTING = 1
GROUP BY
	STATION_ID,
	DATATIME_MTD
ORDER BY
	DATATIME_MTD ASC
;

--SELECT * FROM mastertables."Bcycle_Status_TD" LIMIT 100;

### 4.2 Combine the grouped data containing new bikes/docks available with the actual data

In [None]:
# Outer join tables
df_updated= pd.merge(df, grouped_df, left_on=['station_id','datatime_mtd'], right_on=['station_id','datatime_mtd'], how='outer')

if(df_updated.isna().sum().sum() == 0):
  print("Join Successfull!!")
else:
  print("Join Failed: Nulls in the data indicates the grouped data has less number of entries")

Join Successfull!!


In [None]:
# Rename, remove unwanted and rearrange columns
df_updated.rename(columns={'docks_available_x': 'docks_available', 'bikes_available_x': 'bikes_available'}, inplace=True)
df_updated = df_updated[['station_last_updated', 'status_last_reported', 'station_id', 'station_name', 'station_address', 'station_longitude', 'station_latitude', 'station_is_returning', 'station_is_renting', 'station_is_installed', 'station_type', 'docks_available', 'bikes_available', 'electic_bikes_available', 'smart_bikes_available', 'classic_bikes_available', 'station_capacity', 'new_docks_available', 'new_bikes_available', 'datatime_mtd','datatime_rnd', 'date_rnd', 'year_rnd', 'month_rnd', 'day_rnd', 'day_of_week_rnd', 'time_rnd', 'hour_rnd', 'minute_rnd', 'second_rnd']]

df_updated.head(10)

Unnamed: 0,station_last_updated,status_last_reported,station_id,station_name,station_address,station_longitude,station_latitude,station_is_returning,station_is_renting,station_is_installed,...,datatime_rnd,date_rnd,year_rnd,month_rnd,day_rnd,day_of_week_rnd,time_rnd,hour_rnd,minute_rnd,second_rnd
0,1697507102,1697507104,bcycle_boulder_1855,Folsom & Colorado,SE corner of Folsom & Colorado,-105.26385,40.00811,1,1,1,...,2023-10-16 19:45:00,2023-10-16,2023,10,16,Monday,19:45:00,19,45,0
1,1697507102,1697507104,bcycle_boulder_2763,20th & Pearl,1986 20th St.,-105.26952,40.01988,1,1,1,...,2023-10-16 19:45:00,2023-10-16,2023,10,16,Monday,19:45:00,19,45,0
2,1697507102,1697507104,bcycle_boulder_1858,15th & Pearl,15th Street & Pearl Street,-105.27584,40.01872,1,1,1,...,2023-10-16 19:45:00,2023-10-16,2023,10,16,Monday,19:45:00,19,45,0
3,1697507102,1697507104,bcycle_boulder_1859,11th & Pearl,11th Street & Pearl Street,-105.28116,40.01747,1,1,1,...,2023-10-16 19:45:00,2023-10-16,2023,10,16,Monday,19:45:00,19,45,0
4,1697507102,1697507104,bcycle_boulder_1860,13th & Spruce,13th Street & Spruce Street,-105.2789,40.01909,1,1,1,...,2023-10-16 19:45:00,2023-10-16,2023,10,16,Monday,19:45:00,19,45,0
5,1697507102,1697507104,bcycle_boulder_1861,UCAR Center Green,3080 Center Green Drive,-105.24611,40.03154,1,1,1,...,2023-10-16 19:45:00,2023-10-16,2023,10,16,Monday,19:45:00,19,45,0
6,1697507102,1697507104,bcycle_boulder_1866,Library @ Arapahoe,1015 Arapahoe,-105.28087,40.01377,1,1,1,...,2023-10-16 19:45:00,2023-10-16,2023,10,16,Monday,19:45:00,19,45,0
7,1697507102,1697507104,bcycle_boulder_1867,26th & Pearl,2600 Pearl Street,-105.25984,40.0216,1,1,1,...,2023-10-16 19:45:00,2023-10-16,2023,10,16,Monday,19:45:00,19,45,0
8,1697507102,1697507104,bcycle_boulder_1869,Municipal Building,1777 Broadway,-105.27959,40.01508,1,1,1,...,2023-10-16 19:45:00,2023-10-16,2023,10,16,Monday,19:45:00,19,45,0
9,1697507102,1697507104,bcycle_boulder_1871,Broadway & Alpine,1290 Alpine Avenue,-105.28144,40.02543,1,1,1,...,2023-10-16 19:45:00,2023-10-16,2023,10,16,Monday,19:45:00,19,45,0


In [None]:
df_updated.to_csv('/content/drive/MyDrive/CSCI 5502 Data Mining/01 Project/Data/Bcycle_data_to_date_updated02.csv',index= False)

In [None]:
print("Total Number of Stations ID:", len(df["station_id"].unique()))
print("Total Number of Stations Name:", len(df["station_name"].unique()))
print("Total Number of Stations Address:", len(df["station_address"].unique()))

Total Number of Stations ID: 54
Total Number of Stations Name: 54
Total Number of Stations Address: 53


* Total number of unique stations: 54
* However, the total number of unique address is only 53
  * Because station_id 1872 (19th @ Boulder Creek) and 2766 (13th & Arapahoe) have the same address: 'Boulder Creek Path'

In [None]:
df[df["station_address"] == 'Boulder Creek Path' ]['station_name'].unique()
df[df["station_address"] == 'Boulder Creek Path' ]['station_id'].unique()
#value_counts()

array(['bcycle_boulder_1872', 'bcycle_boulder_2766'], dtype=object)

## 5. Calculate wait times

Wait time is calculated for both bikes and docks. The calculation is basically done by
1. Creating a new column that gives the next bike availability time.
2. Using the above column and the current time- calculate the wait time.

In [None]:
df=pd.read_csv('/content/drive/MyDrive/CSCI 5502 Data Mining/01 Project/Data/sample_usable_data_for_initial_analysis.csv')

# Sort the data by datetime
df.sort_values(by=['datatime_rnd'],inplace=True)
df.reset_index(inplace=True, drop=True)
df.head()

Unnamed: 0,station_id,datatime_mtd,datatime_rnd,date_rnd,year_rnd,month_rnd,day_rnd,day_of_week_rnd,time_rnd,hour_rnd,...,station_type,docks_available,bikes_available,electic_bikes_available,smart_bikes_available,classic_bikes_available,station_capacity,bikes_available_y,new_docks_available,new_bikes_available
0,bcycle_boulder_1855,2023-10-31 00:00:20,2023-10-31 00:00:00,2023-10-31,2023.0,10.0,31.0,Tuesday,00:00:00,0.0,...,Kiosk and Station,15.0,0.0,0.0,0.0,0.0,15.0,0.0,0,0
1,bcycle_boulder_2763,2023-10-31 00:00:20,2023-10-31 00:00:00,2023-10-31,2023.0,10.0,31.0,Tuesday,00:00:00,0.0,...,Kiosk and Station,5.0,8.0,8.0,0.0,0.0,13.0,8.0,5,0
2,bcycle_boulder_1859,2023-10-31 00:00:20,2023-10-31 00:00:00,2023-10-31,2023.0,10.0,31.0,Tuesday,00:00:00,0.0,...,Kiosk and Station,14.0,0.0,0.0,0.0,0.0,14.0,0.0,0,0
3,bcycle_boulder_1860,2023-10-31 00:00:20,2023-10-31 00:00:00,2023-10-31,2023.0,10.0,31.0,Tuesday,00:00:00,0.0,...,Kiosk and Station,13.0,0.0,0.0,0.0,0.0,13.0,0.0,0,0
4,bcycle_boulder_1861,2023-10-31 00:00:20,2023-10-31 00:00:00,2023-10-31,2023.0,10.0,31.0,Tuesday,00:00:00,0.0,...,Kiosk and Station,9.0,2.0,2.0,0.0,0.0,11.0,2.0,0,1


In [None]:
# Calculate the next available bike and dock time for each station

def cal_next_avl_time(all_times, avl_times):
  next_avl_time=[]
  i=0
  j=0
  while j<len(avl_times) and i<len(all_times):
    if (all_times[i]< avl_times[j]):
      next_avl_time.append(avl_times[j])
      i+=1
    else:
      j+=1
  next_avl_time += [None]*(len(all_times)-len(next_avl_time))
  return next_avl_time

# Calculate next available bike time
for i in df.station_id.unique():
  mask = df['station_id']==i
  data= df[mask]
  all_times= data['datatime_rnd'].values
  avl_times = data[data['new_bikes_available']>0]['datatime_rnd'].values
  df.loc[df['station_id']==i, 'next_avl_bike_time'] = cal_next_avl_time(all_times, avl_times)

# Calculate next available dock time
for i in df.station_id.unique():
  mask = df['station_id']==i
  data= df[mask]
  all_times= data['datatime_rnd'].values
  avl_times = data[data['new_docks_available']>0]['datatime_rnd'].values
  df.loc[df['station_id']==i, 'next_avl_dock_time'] = cal_next_avl_time(all_times, avl_times)

df

Unnamed: 0,station_id,datatime_mtd,datatime_rnd,date_rnd,year_rnd,month_rnd,day_rnd,day_of_week_rnd,time_rnd,hour_rnd,...,bikes_available,electic_bikes_available,smart_bikes_available,classic_bikes_available,station_capacity,bikes_available_y,new_docks_available,new_bikes_available,next_avl_bike_time,next_avl_dock_time
0,bcycle_boulder_1855,2023-10-31 00:00:20,2023-10-31 00:00:00,2023-10-31,2023.0,10.0,31.0,Tuesday,00:00:00,0.0,...,0.0,0.0,0.0,0.0,15.0,0.0,0,0,2023-10-31 01:24:00,2023-10-31 06:06:00
1,bcycle_boulder_2763,2023-10-31 00:00:20,2023-10-31 00:00:00,2023-10-31,2023.0,10.0,31.0,Tuesday,00:00:00,0.0,...,8.0,8.0,0.0,0.0,13.0,8.0,5,0,2023-10-31 10:03:00,2023-10-31 08:39:00
2,bcycle_boulder_1859,2023-10-31 00:00:20,2023-10-31 00:00:00,2023-10-31,2023.0,10.0,31.0,Tuesday,00:00:00,0.0,...,0.0,0.0,0.0,0.0,14.0,0.0,0,0,2023-10-31 00:24:00,2023-10-31 14:30:00
3,bcycle_boulder_1860,2023-10-31 00:00:20,2023-10-31 00:00:00,2023-10-31,2023.0,10.0,31.0,Tuesday,00:00:00,0.0,...,0.0,0.0,0.0,0.0,13.0,0.0,0,0,2023-10-31 10:06:00,2023-10-31 13:39:00
4,bcycle_boulder_1861,2023-10-31 00:00:20,2023-10-31 00:00:00,2023-10-31,2023.0,10.0,31.0,Tuesday,00:00:00,0.0,...,2.0,2.0,0.0,0.0,11.0,2.0,0,1,2023-10-31 13:00:00,2023-10-31 13:12:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
181435,bcycle_boulder_7327,2023-11-06 23:57:04,2023-11-06 23:57:00,2023-11-06,2023.0,11.0,6.0,Monday,23:57:00,23.0,...,4.0,4.0,0.0,0.0,8.0,4.0,0,0,,
181436,bcycle_boulder_7345,2023-11-06 23:57:04,2023-11-06 23:57:00,2023-11-06,2023.0,11.0,6.0,Monday,23:57:00,23.0,...,1.0,1.0,0.0,0.0,6.0,1.0,0,0,,
181437,bcycle_boulder_2764,2023-11-06 23:57:04,2023-11-06 23:57:00,2023-11-06,2023.0,11.0,6.0,Monday,23:57:00,23.0,...,8.0,8.0,0.0,0.0,13.0,8.0,0,0,,
181438,bcycle_boulder_7346,2023-11-06 23:57:04,2023-11-06 23:57:00,2023-11-06,2023.0,11.0,6.0,Monday,23:57:00,23.0,...,3.0,3.0,0.0,0.0,6.0,3.0,0,0,,


In [None]:
# Calculate the wait times

# Convert the datatime columns to from string to datetime format
df['datatime_rnd'] = pd.to_datetime(df['datatime_rnd'], format='%Y-%m-%d %H:%M:%S')
df['next_avl_bike_time'] = pd.to_datetime(df['next_avl_bike_time'], format='%Y-%m-%d %H:%M:%S')
df['next_avl_dock_time'] = pd.to_datetime(df['next_avl_dock_time'], format='%Y-%m-%d %H:%M:%S')

# Calculate the bike wait time
df['bike_wait_time'] = (df['next_avl_bike_time'] - df['datatime_rnd']).dt.total_seconds() / 60

# Calculate the dock wait time
df['dock_wait_time'] = (df['next_avl_dock_time'] - df['datatime_rnd']).dt.total_seconds() / 60

df

Unnamed: 0,station_id,datatime_mtd,datatime_rnd,date_rnd,year_rnd,month_rnd,day_rnd,day_of_week_rnd,time_rnd,hour_rnd,...,smart_bikes_available,classic_bikes_available,station_capacity,bikes_available_y,new_docks_available,new_bikes_available,next_avl_bike_time,next_avl_dock_time,bike_wait_time,dock_wait_time
0,bcycle_boulder_1855,2023-10-31 00:00:20,2023-10-31 00:00:00,2023-10-31,2023.0,10.0,31.0,Tuesday,00:00:00,0.0,...,0.0,0.0,15.0,0.0,0,0,2023-10-31 01:24:00,2023-10-31 06:06:00,84.0,366.0
1,bcycle_boulder_2763,2023-10-31 00:00:20,2023-10-31 00:00:00,2023-10-31,2023.0,10.0,31.0,Tuesday,00:00:00,0.0,...,0.0,0.0,13.0,8.0,5,0,2023-10-31 10:03:00,2023-10-31 08:39:00,603.0,519.0
2,bcycle_boulder_1859,2023-10-31 00:00:20,2023-10-31 00:00:00,2023-10-31,2023.0,10.0,31.0,Tuesday,00:00:00,0.0,...,0.0,0.0,14.0,0.0,0,0,2023-10-31 00:24:00,2023-10-31 14:30:00,24.0,870.0
3,bcycle_boulder_1860,2023-10-31 00:00:20,2023-10-31 00:00:00,2023-10-31,2023.0,10.0,31.0,Tuesday,00:00:00,0.0,...,0.0,0.0,13.0,0.0,0,0,2023-10-31 10:06:00,2023-10-31 13:39:00,606.0,819.0
4,bcycle_boulder_1861,2023-10-31 00:00:20,2023-10-31 00:00:00,2023-10-31,2023.0,10.0,31.0,Tuesday,00:00:00,0.0,...,0.0,0.0,11.0,2.0,0,1,2023-10-31 13:00:00,2023-10-31 13:12:00,780.0,792.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
181435,bcycle_boulder_7327,2023-11-06 23:57:04,2023-11-06 23:57:00,2023-11-06,2023.0,11.0,6.0,Monday,23:57:00,23.0,...,0.0,0.0,8.0,4.0,0,0,NaT,NaT,,
181436,bcycle_boulder_7345,2023-11-06 23:57:04,2023-11-06 23:57:00,2023-11-06,2023.0,11.0,6.0,Monday,23:57:00,23.0,...,0.0,0.0,6.0,1.0,0,0,NaT,NaT,,
181437,bcycle_boulder_2764,2023-11-06 23:57:04,2023-11-06 23:57:00,2023-11-06,2023.0,11.0,6.0,Monday,23:57:00,23.0,...,0.0,0.0,13.0,8.0,0,0,NaT,NaT,,
181438,bcycle_boulder_7346,2023-11-06 23:57:04,2023-11-06 23:57:00,2023-11-06,2023.0,11.0,6.0,Monday,23:57:00,23.0,...,0.0,0.0,6.0,3.0,0,0,NaT,NaT,,


In [None]:
df.to_csv('/content/drive/MyDrive/CSCI 5502 Data Mining/01 Project/Data/sample_usable_data_for_initial_analysis.csv',index=False)