In [None]:
import pandas as pd
from pathlib import Path

csv_url = "https://data.urbansharing.com/oslobysykkel.no/trips/v1/2025/10.csv"
df = pd.read_csv(csv_url)

# Converting the date time in to proper datetime
# Converting seconds to minutes

df['DurationMin'] = df['duration']/ 60 

# ConvertThe string Datetime in to real datetime format
df['StartedAt'] = pd.to_datetime(df['started_at'], format='ISO8601', utc= True)
df['EndedAt'] = pd.to_datetime(df['ended_at'],format='ISO8601' ,utc=True)

#Convert the datetime into local time
df['StartedAtOslo'] = df['StartedAt'].dt.tz_convert('Europe/Oslo')
df['EndedAtOslo'] = df['EndedAt'].dt.tz_convert('Europe/Oslo')

#Local Hours 
df["StartHourOslo"] = df["StartedAtOslo"].dt.hour

#Finding the day of the week which is georgian calender 0=monday...6=Sunday

df["DayOfWeek"] = df["StartedAtOslo"].dt.dayofweek

#For Finding Weekend
df["IsWeekend"] = df["DayOfWeek"].isin([5, 6])

#Renamning some columns
renameMap = {
    "started_at": "StartedAtRaw",
    "ended_at": "EndedAtRaw",
    "start_station_name": "StartStationName",
    "start_station_description": "StartStationDescription",
    "start_station_latitude": "StartStationLatitude",
    "start_station_longitude": "StartStationLongitude",
    "end_station_name": "EndStationName",
    "end_station_description": "EndStationDescription",
    "end_station_latitude": "EndStationLatitude",
    "end_station_longitude": "EndStationLongitude",
    "start_station_id": "StartStationId",
    "end_station_id": "EndStationId",
    "duration": "DurationSec"
}

df.rename(columns=renameMap, inplace=True)

df.columns.tolist()

['StartedAtRaw',
 'EndedAtRaw',
 'DurationSec',
 'StartStationId',
 'StartStationName',
 'StartStationDescription',
 'StartStationLatitude',
 'StartStationLongitude',
 'EndStationId',
 'EndStationName',
 'EndStationDescription',
 'EndStationLatitude',
 'EndStationLongitude',
 'DurationMin',
 'StartedAt',
 'EndedAt',
 'StartedAtOslo',
 'EndedAtOslo',
 'StartHourOslo',
 'DayOfWeek',
 'IsWeekend']

In [None]:
# Checking weekend vs weekdays
df['IsWeekend'].value_counts()
df['IsWeekend'].value_counts(normalize=True)


In [32]:
WeekendSummary = (
    df
    .groupby('IsWeekend')
    .agg(
        TripCount = ('DurationMin', 'size'),
        AverageDurationMin = ('DurationMin', 'mean')
    ).reset_index()
)
WeekendSummary

Unnamed: 0,IsWeekend,TripCount,AverageDurationMin
0,False,88971,10.525631
1,True,16986,12.49579


In [33]:
DaySummary = (
    df
    .groupby('DayOfWeek')
    .agg(
        TripCount = ('DurationMin', 'size'),
        AverageDurationTime = ('DurationMin', 'mean')
    ).reset_index()
)

DaySummary

Unnamed: 0,DayOfWeek,TripCount,AverageDurationTime
0,0,16315,10.70381
1,1,14086,10.472269
2,2,20375,10.45456
3,3,20933,10.491142
4,4,17262,10.526481
5,5,9208,12.06702
6,6,7778,13.00339


In [41]:
# Bussiest hour 


HourlySummary =(
    df
    .groupby('StartHourOslo')
    .agg(
        TripCount =('DurationMin', 'size'),
        AverageDuration = ('DurationMin', 'mean')
    ).reset_index()
     .sort_values('StartHourOslo')
)

HourlySummary



Unnamed: 0,StartHourOslo,TripCount,AverageDuration
0,0,1522,10.820215
1,1,2,4.025
2,5,512,8.851042
3,6,2378,8.999425
4,7,8444,9.400539
5,8,11877,9.603112
6,9,5424,10.085251
7,10,3682,11.748529
8,11,3896,12.091607
9,12,4201,13.184853


In [42]:
DayAndHourlySummary = (
    df
    .groupby(['DayOfWeek', 'StartHourOslo'])
    .agg(
        TripCount = ('DurationMin', 'size')
    ).reset_index()
)
DayAndHourlySummary

Unnamed: 0,DayOfWeek,StartHourOslo,TripCount
0,0,0,80
1,0,5,81
2,0,6,431
3,0,7,1518
4,0,8,2354
...,...,...,...
136,6,19,445
137,6,20,331
138,6,21,276
139,6,22,235


In [44]:
day_name_map = {
    0: "Mon",
    1: "Tue",
    2: "Wed",
    3: "Thu",
    4: "Fri",
    5: "Sat",
    6: "Sun",
}

DayAndHourlySummary["DayName"] = DayAndHourlySummary["DayOfWeek"].map(day_name_map)
DayAndHourlySummary

Unnamed: 0,DayOfWeek,StartHourOslo,TripCount,DayName
0,0,0,80,Mon
1,0,5,81,Mon
2,0,6,431,Mon
3,0,7,1518,Mon
4,0,8,2354,Mon
...,...,...,...,...
136,6,19,445,Sun
137,6,20,331,Sun
138,6,21,276,Sun
139,6,22,235,Sun


In [45]:
DayHourPivot = DayAndHourlySummary.pivot(
    index="DayName",
    columns="StartHourOslo",
    values="TripCount"
)

DayHourPivot

StartHourOslo,0,1,5,6,7,8,9,10,11,12,...,14,15,16,17,18,19,20,21,22,23
DayName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Fri,238.0,,109.0,391.0,1465.0,2140.0,975.0,629.0,623.0,652.0,...,963.0,1506.0,1723.0,1391.0,1072.0,711.0,487.0,344.0,354.0,573.0
Mon,80.0,,81.0,431.0,1518.0,2354.0,870.0,488.0,499.0,463.0,...,771.0,1436.0,2091.0,1509.0,1127.0,696.0,543.0,377.0,276.0,151.0
Sat,461.0,2.0,25.0,56.0,108.0,181.0,400.0,518.0,545.0,708.0,...,757.0,727.0,778.0,808.0,650.0,439.0,422.0,233.0,312.0,455.0
Sun,379.0,,16.0,39.0,84.0,132.0,231.0,404.0,583.0,723.0,...,742.0,707.0,708.0,648.0,562.0,445.0,331.0,276.0,235.0,171.0
Thu,182.0,,105.0,519.0,1895.0,2759.0,1146.0,615.0,673.0,663.0,...,938.0,1623.0,2378.0,1883.0,1402.0,977.0,719.0,630.0,576.0,443.0
Tue,85.0,,83.0,431.0,1426.0,1681.0,689.0,368.0,364.0,349.0,...,616.0,1393.0,1766.0,1263.0,981.0,656.0,522.0,448.0,334.0,221.0
Wed,97.0,,93.0,511.0,1948.0,2630.0,1113.0,660.0,609.0,643.0,...,967.0,1753.0,2396.0,1836.0,1252.0,939.0,657.0,621.0,525.0,391.0


In [50]:
# Adding a column for finding time of the day
def getTimeOfDay(hour):
    if 5 <= hour <= 11:
        return "Morning"
    elif 12 <= hour <= 17:
        return "Afternoon"
    elif 18 <= hour <= 23:
        return "Evening"
    else:
        return "Night"
    
df['TimeOfDay'] = df['StartHourOslo'].apply(getTimeOfDay)
df.columns.to_list()

['StartedAtRaw',
 'EndedAtRaw',
 'DurationSec',
 'StartStationId',
 'StartStationName',
 'StartStationDescription',
 'StartStationLatitude',
 'StartStationLongitude',
 'EndStationId',
 'EndStationName',
 'EndStationDescription',
 'EndStationLatitude',
 'EndStationLongitude',
 'DurationMin',
 'StartedAt',
 'EndedAt',
 'StartedAtOslo',
 'EndedAtOslo',
 'StartHourOslo',
 'DayOfWeek',
 'IsWeekend',
 'TimeOfDay']

In [52]:
df.columns.to_list()

['StartedAtRaw',
 'EndedAtRaw',
 'DurationSec',
 'StartStationId',
 'StartStationName',
 'StartStationDescription',
 'StartStationLatitude',
 'StartStationLongitude',
 'EndStationId',
 'EndStationName',
 'EndStationDescription',
 'EndStationLatitude',
 'EndStationLongitude',
 'DurationMin',
 'StartedAt',
 'EndedAt',
 'StartedAtOslo',
 'EndedAtOslo',
 'StartHourOslo',
 'DayOfWeek',
 'IsWeekend',
 'TimeOfDay']

In [53]:
# Creating a Clean CSV
from pathlib import Path

TripsClean = df[[
    "DurationSec",
    "DurationMin",
    "StartedAtRaw",
    "EndedAtRaw",
    "StartedAtOslo",
    "EndedAtOslo",
    "StartHourOslo",
    "DayOfWeek",
    "IsWeekend",
    "TimeOfDay",
    "StartStationId",
    "EndStationId"
]].copy()

ProcessedDir = Path('../data/processed/')
OutputPath = ProcessedDir / 'TripsOctoberOsloCityBike.csv'
TripsClean.to_csv(OutputPath, index=False)



In [55]:
CheckDf = pd.read_csv('/Users/rifat-mac/Study/Data-Analytics-Project/Oslo-city-bike-analytics/data/processed/TripsOctoberOsloCityBike.csv')
CheckDf.head()

Unnamed: 0,DurationSec,DurationMin,StartedAtRaw,EndedAtRaw,StartedAtOslo,EndedAtOslo,StartHourOslo,DayOfWeek,IsWeekend,TimeOfDay,StartStationId,EndStationId
0,688,11.466667,2025-10-01 03:02:11.567000+00:00,2025-10-01 03:13:40.482000+00:00,2025-10-01 05:02:11.567000+02:00,2025-10-01 05:13:40.482000+02:00,5,2,False,Morning,578,484
1,159,2.65,2025-10-01 03:05:23.240000+00:00,2025-10-01 03:08:02.978000+00:00,2025-10-01 05:05:23.240000+02:00,2025-10-01 05:08:02.978000+02:00,5,2,False,Morning,2339,2328
2,890,14.833333,2025-10-01 03:10:51.967000+00:00,2025-10-01 03:25:42.164000+00:00,2025-10-01 05:10:51.967000+02:00,2025-10-01 05:25:42.164000+02:00,5,2,False,Morning,603,421
3,177,2.95,2025-10-01 03:11:12.509000+00:00,2025-10-01 03:14:09.562000+00:00,2025-10-01 05:11:12.509000+02:00,2025-10-01 05:14:09.562000+02:00,5,2,False,Morning,600,465
4,211,3.516667,2025-10-01 03:13:54.987000+00:00,2025-10-01 03:17:26.588000+00:00,2025-10-01 05:13:54.987000+02:00,2025-10-01 05:17:26.588000+02:00,5,2,False,Morning,499,617
