In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv('traveller-report-daily.csv')
df.head()

Unnamed: 0,Date,Port of Entry,Region,Mode,Sum of Volume
0,2018-01-01,2060 - Moncton,Atlantic Region,Air,119.0
1,2018-01-01,2110 - St. Stephen: Traffic,Atlantic Region,Land,843.0
2,2018-01-01,2114 - St. Stephen: 3rd Bridge,Atlantic Region,Land,364.0
3,2018-01-01,2115 - St. Stephen: 3rd Bridge Commercial,Atlantic Region,Land,37.0
4,2018-01-01,2120 - Woodstock: Traffic,Atlantic Region,Land,868.0


# Task 1

## clean_volume

In [2]:
df.rename(columns = {'Sum of Volume' : 'VOLUME'}, inplace = True)
df['VOLUME'] = df['VOLUME'].astype('Int64')
df.head()

Unnamed: 0,Date,Port of Entry,Region,Mode,VOLUME
0,2018-01-01,2060 - Moncton,Atlantic Region,Air,119
1,2018-01-01,2110 - St. Stephen: Traffic,Atlantic Region,Land,843
2,2018-01-01,2114 - St. Stephen: 3rd Bridge,Atlantic Region,Land,364
3,2018-01-01,2115 - St. Stephen: 3rd Bridge Commercial,Atlantic Region,Land,37
4,2018-01-01,2120 - Woodstock: Traffic,Atlantic Region,Land,868


## clean_region

In [4]:
df['Region'] = df['Region'].replace({'QuÈbec Region': 'Québec Region','Quebec Region': 'Québec Region'})
df['Region'] = df['Region'].str.replace('Region','')

## clean_port

In [16]:
# AI: Debugging, 
# At first, I tried pl = df['Port of Entry'].str.split('-')
# But I got an error: AttributeError: 'list' object has no attribute 'str'
# so I asked AI for debugging and realized that I need expand = True

spl = df['Port of Entry'].str.split('-', expand=True)
df['PORT_ID'] = spl[0].str.strip().astype(int)
df['PORT_NAME'] = spl[1].str.strip()
df = df.drop(columns = 'Port of Entry')

## clean_data

In [26]:
df['Date'] = pd.to_datetime(df['Date'])

## fill_missing_volumes

In [28]:
df_copy = df.copy()

In [30]:
df_copy.isnull().sum()

Date              0
Region            0
Mode              0
VOLUME       145794
PORT_ID           0
PORT_NAME         0
dtype: int64

In [43]:
def fill_missing_volumes(df: pd.DataFrame, strategy: str) -> None:
    """ Modify df by filling missing values in the VOLUME column using the specified strategy. """
    if strategy == 'MEAN':
        df['VOLUME'].fillna(df['VOLUME'].mean())
    elif strategy == 'MEDIAN':
        df['VOLUME'].fillna(df['VOLUME'].median())
    else:
        df['VOLUME'].fillna(0)

In [44]:
fill_missing_volumes(df, 'MEAN')

In [45]:
df.isnull().sum()

Date         0
Region       0
Mode         0
VOLUME       0
PORT_ID      0
PORT_NAME    0
dtype: int64

# Task 2

## filter_with_volume

In [61]:
fil = (df['Region'] == 'Québec') & (df['VOLUME'] >= 100)
result = df[fil]
result

Unnamed: 0,Date,Region,Mode,VOLUME,PORT_ID,PORT_NAME


## find_port_name

In [67]:
row = df[df['PORT_ID'] == 2060]
row['PORT_NAME'][0]

'Moncton'

## get_mean_volume_by

In [71]:
result = df.groupby('Region')['VOLUME'].mean()
result

Region
Atlantic                  419.293232
Greater Toronto Area     3317.243461
Northern Ontario          811.334589
Pacific                  1106.541528
Prairie                   464.312154
Québec                     782.17457
Southern Ontario         2190.017605
Name: VOLUME, dtype: Float64

## get_top_n_by_volume

In [78]:
total = df.groupby('Region')['VOLUME'].sum()
sort = total.sort_values(ascending = False)
sort.head(3)

Region
Pacific                  162923855
Southern Ontario         159225230
Greater Toronto Area     106918074
Name: VOLUME, dtype: Int64

## compute_volume_by_time

In [85]:
year = df['Date'].dt.year
month = df['Date'].dt.month

f1 = (df[df['Date'].dt.year == 2023]) & (df[df['Date'].dt.month == 1])
result = f1.groupby('Region')['VOLUME'].sum()
result

TypeError: unsupported operand type(s) for &: 'DatetimeArray' and 'DatetimeArray'

## calculate_volume_change

In [72]:
df.head()

Unnamed: 0,Date,Region,Mode,VOLUME,PORT_ID,PORT_NAME
0,2018-01-01,Atlantic,Air,119,2060,Moncton
1,2018-01-01,Atlantic,Land,843,2110,St. Stephen: Traffic
2,2018-01-01,Atlantic,Land,364,2114,St. Stephen: 3rd Bridge
3,2018-01-01,Atlantic,Land,37,2115,St. Stephen: 3rd Bridge Commercial
4,2018-01-01,Atlantic,Land,868,2120,Woodstock: Traffic
