In [32]:
# load all json files from data folder, read the key "country" and load into a data frame, make sure to clean the memory after loading the data
import os
import pandas as pd
import json

# ensure data folder exists
if not os.path.exists('data'):
    os.makedirs('data')

# load all json files from data folder
countries = []
people = []
durations = []
index = 0
for file in os.listdir('data'):
  index += 1
  if file.endswith('.json'):
    # load json directly into memory, use uft-8 encoding
    with open(os.path.join('data', file), mode='r', encoding='utf-8') as f:
      try:
        loaded = json.load(f)
        countries.append(loaded['country'])
        people.append(loaded['people'])
        durations.append(loaded['duration'])
      except Exception as e:
        print(f'Error loading file {file}: {e}')
        # print file string
        print(f'File content: {f.read()}')
            
# read the key "country" and load into a data frame
df = pd.DataFrame({ 'country': countries, 'people': people, 'duration': durations })

print(df)

           country  people  duration
0           Mexico       6    26.140
1          Denmark       5    21.849
2      New Zealand       8    70.364
3           Canada       9    29.935
4     South Africa       6    18.440
...            ...     ...       ...
2545      Bulgaria       9    67.209
2546        Sweden       8    16.163
2547       Albania       9    91.478
2548         Ghana       5    84.680
2549       Ireland       9    17.658

[2550 rows x 3 columns]


In [36]:
print(len(df['country'].value_counts()))
print(*list(f'{key}: {value}' for key, value in df['country'].value_counts().items()), sep='\n')

90
Denmark: 62
Ukraine: 58
Russia: 52
Türkiye: 52
Switzerland: 50
New Zealand: 49
Spain: 48
Indonesia: 48
Chile: 47
Poland: 47
Hungary: 47
Sweden: 46
Mexico: 46
Australia: 46
United States: 45
United Kingdom: 45
Philippines: 45
Peru: 44
Portugal: 42
Norway: 42
Greece: 42
Czech Republic: 41
Germany: 41
Colombia: 41
Italy: 40
Ireland: 40
Finland: 40
Japan: 40
South Africa: 40
Bulgaria: 39
Thailand: 39
Austria: 38
India: 37
Netherlands: 36
France: 32
Argentina: 32
Belgium: 32
Romania: 31
Canada: 31
Croatia: 29
Latvia: 29
Luxembourg: 29
Brazil: 28
Malaysia: 26
Iceland: 25
Bangladesh: 24
Albania: 24
Nigeria: 24
Bolivia: 23
Tunisia: 23
Singapore: 23
Cambodia: 22
Mongolia: 22
South Korea: 22
United Arab Emirates: 22
Botswana: 22
Lesotho: 21
Ecuador: 21
Lithuania: 21
Senegal: 21
Guatemala: 20
Slovakia: 20
Uruguay: 19
Estonia: 19
North Macedonia: 19
Serbia: 18
Israel: 18
Ghana: 17
Montenegro: 17
Eswatini: 16
Slovenia: 15
Kenya: 14
Jordan: 14
Kyrgyz Republic: 14
Sri Lanka: 14
China: 13
Malta: 12

In [35]:
df.describe()

Unnamed: 0,people,duration
count,2550.0,2550.0
mean,7.779608,50.569091
std,1.541137,30.490846
min,3.0,6.91
25%,7.0,18.9485
50%,8.5,46.076
75%,9.0,84.70325
max,9.0,184.335
