In [4]:
import pandas as pd
import sys


class VisitorsAnalyticsUtils:
    def __init__(self, period, region, path):
        self.period = period
        self.region = region
        self.path = path
    
    def loadDataFile(self):
        return pd.read_csv(self.path)
    
    def parseData(self):
        data = self.loadDataFile()
        data['Year'] = data.iloc[:, 0].str.split().str[0]
        data = data.dropna()

        asia = [" Brunei Darussalam ", " Indonesia ", " Malaysia ", " Philippines ", " Thailand ", " Viet Nam ", " Myanmar ", " Japan ", " Hong Kong ", " China ", " Taiwan " ," Korea, Republic Of ", " India ", " Pakistan ", " Sri Lanka ", " Saudi Arabia ", " Kuwait ", " UAE "]
        europe = [' United Kingdom ', ' Germany ', ' France ', ' Italy ', ' Netherlands ', ' Greece ', ' Belgium & Luxembourg ', ' Switzerland ', ' Austria ', ' Scandinavia ', ' CIS & Eastern Europe '  ]
        others = [' USA ', ' Canada ', ' Australia ', ' New Zealand ', ' Africa ']

        #Getting data from the year period
        if self.period == '1':
            period_data = data.loc[(data['Year'] >= '1978') & (data['Year'] <= '1987')]
        elif self.period == '2':
            period_data = data.loc[(data['Year'] >= '1988') & (data['Year'] <= '1997')]
        elif self.period == '3':
            period_data = data.loc[(data['Year'] >= '1998') & (data['Year'] <= '2007')]
        elif self.period == '4':
            period_data = data.loc[(data['Year'] >= '2008') & (data['Year'] <= '2017')]
        else:
            print("Period is not in range. Exiting...")
            sys.exit(0)

        period_data = period_data.drop('Year', axis=1)

        #Getting the region data for the year period
        if self.region == '1':
            region_data = period_data[asia].copy()
        elif self.region == '2':
            region_data = period_data[europe].copy()
        elif self.region == '3':
            region_data = period_data[others].copy()
        else:
            print("Region is not in range. Exiting...")
            sys.exit(0)
        
        return region_data



Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [5]:
import unittest

class TestVisitorsAnalyticsUtils(unittest.TestCase):
    def test_loadDataFile(self):
        data = VisitorsAnalyticsUtils('1', '1', './Int_Monthly_Visitor.csv')
        df = data.loadDataFile()
        self.assertIsInstance(df, pd.DataFrame)

    def test_parseData(self):
        data = VisitorsAnalyticsUtils('1', '1', './Int_Monthly_Visitor.csv')
        region_data = data.parseData()  # Corrected method name
        self.assertIsInstance(region_data, pd.DataFrame)

    def test_parseData_period(self):
        data = VisitorsAnalyticsUtils('1', '1', './Int_Monthly_Visitor.csv')
        region_data = data.parseData() 
        # Assuming expected number of rows based on data file
        self.assertEqual(len(region_data), 120)

    def test_parseData_region(self):
        data = VisitorsAnalyticsUtils('1', '1', './Int_Monthly_Visitor.csv')
        region_data = data.parseData() 
        # Assuming expected number of columns based on data file
        self.assertEqual(len(region_data.columns), 18)

unittest.main(argv=[''], exit=False)


....
----------------------------------------------------------------------
Ran 4 tests in 0.092s

OK


<unittest.main.TestProgram at 0x1e66860bb10>