In [1]:
# Pie Chart visualization to analyse the number of visitors by crew landing to each country.

# This Pie Chart analyzes visitor patterns by crew landed to China in 2019 and South Korea in 2021.

In [2]:
import pandas as pd
import plotly.express as px

In [3]:
# Load the datasets
china_visitors = pd.read_csv('China_cleaned_Visitor_Arrivals_2019_Data.csv')
south_korea_age = pd.read_csv('South_Korea_visitor_Arrivalsby_age___by_nationality.csv')

In [4]:
# Inspect the columns and the first few rows of each dataset
print("China Visitors Data Columns:")
print(china_visitors.columns)

print("\nChina Visitors Data Sample:")
print(china_visitors.head())

print("\nSouth Korea Visitors by Age Data Columns:")
print(south_korea_age.columns)

print("\nSouth Korea Visitors by Age Data Sample:")
print(south_korea_age.head())

# Check for missing values and data types
print("China Visitors Data Info:")
print(china_visitors.info())

print("\nSouth Korea Visitors by Age Data Info:")
print(south_korea_age.info())

China Visitors Data Columns:
Index(['Item', '2019'], dtype='object')

China Visitors Data Sample:
           Item     2019
0         Total  4911.36
1          Male  2881.29
2        Female  2030.07
3  14 and under   184.92
4         15-24   686.20

South Korea Visitors by Age Data Columns:
Index(['Year', '2021', '2021.1', '2021.2', '2021.3', '2021.4', '2021.5',
       '2021.6', '2021.7'],
      dtype='object')

South Korea Visitors by Age Data Sample:
                Year   2021 2021.1 2021.2 2021.3 2021.4 2021.5       2021.6  \
0  By nationality(2)  Total   0-20  21-30  31-40  41-50  51-60  61 and Over   
1              Total  90150   5677  14043  12866   8002   6135         3456   
2              China  11691    341   1437   1902   1184   1323          669   
3              Japan   1007     86    235    173    184    123           58   
4             Taiwan    290      7     55     47     28     12           10   

  2021.7  
0   Crew  
1  39971  
2   4835  
3    148  
4    131  
Chi

In [5]:
# Ensure we have the crew data
# Adding a crew row to China data if it doesn't exist
if 'worker_and_crew' not in china_visitors['Item'].str.lower().values:
    china_visitors = pd.concat([china_visitors, pd.DataFrame({'Item': ['Worker and Crew'], '2019': [0]})], ignore_index=True)

# Extract the crew data for China
china_crew_data = china_visitors[china_visitors['Item'].str.lower() == 'worker_and_crew']

In [6]:
# Extract the crew data for South Korea
south_korea_crew_data = south_korea_age[south_korea_age['Year'] == 'Total'][['2021.7']].copy()
south_korea_crew_data['Country'] = 'South Korea'
south_korea_crew_data.columns = ['Crew_Arrivals', 'Country']

In [7]:
# Create a summary DataFrame for the pie chart
crew_data = pd.DataFrame({
    'Country': ['China', 'South Korea'],
    'Crew_Arrivals': [714.01, 39971]  # Directly using the provided values
})

# Convert Crew_Arrivals to numeric
crew_data['Crew_Arrivals'] = pd.to_numeric(crew_data['Crew_Arrivals'])

# Debug: Check the crew data
print("\nCrew Data Summary:")
print(crew_data)


Crew Data Summary:
       Country  Crew_Arrivals
0        China         714.01
1  South Korea       39971.00


In [8]:
# Create the pie chart
fig = px.pie(crew_data, values='Crew_Arrivals', names='Country', title='Comparison of Crew Arrivals in China (2019) and South Korea (2021)',
             labels={'Crew_Arrivals': 'Number of Crew Arrivals'})

fig.show()