In [13]:
from datetime import datetime, timedelta
import numpy as np
import pandas as pd

In [2]:
reg = pd.read_csv('travel-history-2022.csv', 
                  names=['date', 'type'], 
                  skipinitialspace=True)
reg['date'] = pd.to_datetime(reg['date'], 
                             format='%d-%Y-%b')
reg.sort_values(by='date', 
                ascending=False, 
                inplace=True, 
                ignore_index=True)
reg

Unnamed: 0,date,type
0,2022-03-07,DEPARTURE
1,2022-02-11,ARRIVAL
2,2022-01-09,DEPARTURE
3,2021-12-19,ARRIVAL
4,2021-10-20,DEPARTURE
5,2020-08-01,ARRIVAL
6,2020-05-10,DEPARTURE
7,2020-01-05,ARRIVAL


In [3]:
arrivals = reg.loc[reg['type']=='ARRIVAL', 'date'].reset_index(drop=True)
departures = reg.loc[reg['type']=='DEPARTURE', 'date'].reset_index(drop=True)
trip_cols = [departures, arrivals]
trips = pd.concat(trip_cols, axis=1, ignore_index=True)
trips.rename(columns={0:'departure', 1:'arrival'}, inplace=True)
trips

Unnamed: 0,departure,arrival
0,2022-03-07,2022-02-11
1,2022-01-09,2021-12-19
2,2021-10-20,2020-08-01
3,2020-05-10,2020-01-05


In [4]:
for i in range (2,-1,-1):
    trips[f'202{i}'] = timedelta(days=0)
trips

Unnamed: 0,departure,arrival,2022,2021,2020
0,2022-03-07,2022-02-11,0 days,0 days,0 days
1,2022-01-09,2021-12-19,0 days,0 days,0 days
2,2021-10-20,2020-08-01,0 days,0 days,0 days
3,2020-05-10,2020-01-05,0 days,0 days,0 days


In [15]:
for idx, data in trips.iterrows():
    dep_year = data.departure.year
    arr_year = data.arrival.year
    offset = timedelta(days=1)
    
    if (dep_year - arr_year == 0):
        trips.at[idx, f'{dep_year}'] = data.departure - data.arrival + offset

    else:
        trips.at[idx, f'{dep_year}'] = data.departure - datetime(dep_year, 1, 1) + offset 
        trips.at[idx, f'{arr_year}'] = datetime(arr_year, 12, 31) - data.arrival + offset

trips        

Unnamed: 0,departure,arrival,2022,2021,2020
0,2022-03-07,2022-02-11,25 days,0 days,0 days
1,2022-01-09,2021-12-19,9 days,13 days,0 days
2,2021-10-20,2020-08-01,0 days,293 days,153 days
3,2020-05-10,2020-01-05,0 days,0 days,127 days


In [28]:
current_year = 2022

total_presence = 0

for i in range (3):
    year = current_year - i
    
    year_total = trips[f'{year}'].sum()
    year_total = year_total.days
    
    weight = 3*i
    if(weight != 0):
        print(f'{year}: {year_total:.2f} -> {year_total/weight:.2f}')
        year_total = year_total/weight
    else:
        print(f'{year}: {year_total:.2f}')
        
    total_presence += year_total

print(f'Total presence days: {total_presence:.2f} -> {int(total_presence)}')

2022: 34.00
2021: 306.00 -> 102.00
2020: 280.00 -> 46.67
Total presence days: 182.67 -> 182


In [29]:
if (trips[f'{current_year}'].sum().days >= 31 and total_presence >= 183):
    print('You are a US resident for tax purposes.')
else:
    print('You are not a US resident for tax purposes.')

You are not a US resident for tax purposes
