## import the library/files

In [None]:
import requests
import pandas as pd
import json

In [2]:
# Define the URL for the YouBike real-time data
url = "https://tcgbusfs.blob.core.windows.net/dotapp/youbike/v2/youbike_immediate.json"

print(f"Attempting to fetch data from: {url}\n")

Attempting to fetch data from: https://tcgbusfs.blob.core.windows.net/dotapp/youbike/v2/youbike_immediate.json



In [3]:
# Send a GET request to the URL
response = requests.get(url)

# Check if the request was successful (status code 200)
response.raise_for_status() # This will raise an HTTPError for bad responses (4xx or 5xx)

# Parse the JSON response
data = response.json()

print("Data fetched successfully! Number of stations received:", len(data))

Data fetched successfully! Number of stations received: 1594


## Data Exploration

In [4]:
df = pd.DataFrame(data)
df.head()

Unnamed: 0,sno,sna,sarea,mday,ar,sareaen,snaen,aren,act,srcUpdateTime,updateTime,infoTime,infoDate,total,available_rent_bikes,latitude,longitude,available_return_bikes
0,500101001,YouBike2.0_捷運科技大樓站,大安區,2025-06-09 22:00:04,復興南路二段235號前,Daan Dist.,YouBike2.0_MRT Technology Bldg. Sta.,No.235， Sec. 2， Fuxing S. Rd.,1,2025-06-09 22:12:29,2025-06-09 22:12:51,2025-06-09 22:00:04,2025-06-09,28,4,25.02605,121.5436,24
1,500101002,YouBike2.0_復興南路二段273號前,大安區,2025-06-09 22:00:04,復興南路二段273號西側,Daan Dist.,YouBike2.0_No.273， Sec. 2， Fuxing S. Rd.,No.273， Sec. 2， Fuxing S. Rd. (West),1,2025-06-09 22:12:29,2025-06-09 22:12:51,2025-06-09 22:00:04,2025-06-09,21,7,25.02565,121.54357,14
2,500101003,YouBike2.0_國北教大實小東側門,大安區,2025-06-09 22:03:02,和平東路二段96巷7號,Daan Dist.,YouBike2.0_NTUE Experiment Elementary School (...,No. 7， Ln. 96， Sec. 2， Heping E. Rd,1,2025-06-09 22:12:29,2025-06-09 22:12:51,2025-06-09 22:03:02,2025-06-09,28,19,25.02429,121.54124,9
3,500101004,YouBike2.0_和平公園東側,大安區,2025-06-09 22:03:02,和平東路二段118巷33號,Daan Dist.,YouBike2.0_Heping Park (East),No. 33， Ln. 118， Sec. 2， Heping E. Rd,1,2025-06-09 22:12:29,2025-06-09 22:12:51,2025-06-09 22:03:02,2025-06-09,11,5,25.02351,121.54282,6
4,500101005,YouBike2.0_辛亥復興路口西北側,大安區,2025-06-09 22:00:04,復興南路二段368號,Daan Dist.,YouBike2.0_Xinhai Fuxing Rd. Intersection (Nor...,No. 368， Sec. 2， Fuxing S. Rd.,1,2025-06-09 22:12:29,2025-06-09 22:12:51,2025-06-09 22:00:04,2025-06-09,16,8,25.02153,121.54299,8


## DataFrame Info

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1594 entries, 0 to 1593
Data columns (total 18 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   sno                     1594 non-null   object 
 1   sna                     1594 non-null   object 
 2   sarea                   1594 non-null   object 
 3   mday                    1594 non-null   object 
 4   ar                      1594 non-null   object 
 5   sareaen                 1594 non-null   object 
 6   snaen                   1594 non-null   object 
 7   aren                    1594 non-null   object 
 8   act                     1594 non-null   object 
 9   srcUpdateTime           1594 non-null   object 
 10  updateTime              1594 non-null   object 
 11  infoTime                1594 non-null   object 
 12  infoDate                1594 non-null   object 
 13  total                   1594 non-null   int64  
 14  available_rent_bikes    1594 non-null   

### Get descriptive statistics for numerical columns

In [6]:
print(df.describe(include='number'))

             total  available_rent_bikes     latitude    longitude  \
count  1594.000000           1594.000000  1594.000000  1594.000000   
mean     26.925972             10.550816    25.052868   121.544913   
std      13.487044              9.120880     0.033285     0.031795   
min       5.000000              0.000000    24.976190   121.462280   
25%      17.000000              4.000000    25.029930   121.522255   
50%      24.000000              9.000000    25.049945   121.541120   
75%      32.000000             15.000000    25.072722   121.566055   
max      99.000000             69.000000    25.145820   121.623060   

       available_return_bikes  
count             1594.000000  
mean                15.732120  
std                 12.317586  
min                  0.000000  
25%                  7.000000  
50%                 13.000000  
75%                 21.000000  
max                 89.000000  


### Check for missing values in each column

In [7]:
print(df.isnull().sum())

sno                       0
sna                       0
sarea                     0
mday                      0
ar                        0
sareaen                   0
snaen                     0
aren                      0
act                       0
srcUpdateTime             0
updateTime                0
infoTime                  0
infoDate                  0
total                     0
available_rent_bikes      0
latitude                  0
longitude                 0
available_return_bikes    0
dtype: int64


### Check unique values for some key categorical columns

In [8]:
print("Unique values for 'sarea' (District):")
print(df['sarea'].unique())

print("Unique values for 'act' (Active Status):")
print(df['act'].unique())

Unique values for 'sarea' (District):
['大安區' '大同區' '士林區' '文山區' '中正區' '中山區' '內湖區' '北投區' '松山區' '南港區' '信義區' '萬華區'
 '臺大公館校區']
Unique values for 'act' (Active Status):
['1' '0']


### Check the data types of 'mday', 'srcUpdateTime', 'updateTime', 'infoTime', 'infoDate'

In [9]:
print(df[['mday', 'srcUpdateTime', 'updateTime', 'infoTime', 'infoDate']].dtypes)

mday             object
srcUpdateTime    object
updateTime       object
infoTime         object
infoDate         object
dtype: object
