In [1]:
import pandas as pd # pandas handles panel data
import json # Allows automatic conversion to json data format
from pymongo import MongoClient #We just want the MongoClient part today
from os.path import join

# # Structure the Swimming dataset into documents
 This uses Python to convert a source file csv into a 1:few structure for MongoDB.
 This program reads the full csv file into a dataframe called df.
 
https://data.world/romanian-data/swimming-dataset-top-200-world-times/workspace/data-dictionary
 The csv has been uploaded into your module in Brightspace.

## Understanding your data

There is a 'data dictionary' of sorts on the original website, but it doesn't tell us enough.

In [2]:
# Investigate the dataset that you are using.  First, load the dataset.

df = pd.read_csv(join('../../Datasets/','Swimming Database 2.csv'), encoding='latin-1')
# First, let's check the columns we have and make sure the names 
# are okay and we want all of them.
print("Columns in data frame")
print(df.columns)
# Then print the shape - in this case, the number of rows and columns.
print('DataFrame shape ',df.shape)

Columns in data frame
Index(['Event Name', 'Swim time', 'Swim date', 'Event description',
       'Team Code', 'Team Name', 'Athlete Full Name', 'Gender',
       'Athlete birth date', 'Rank_Order', 'City', 'Country Code',
       'Duration (hh:mm:ss:ff)'],
      dtype='object')
DataFrame shape  (5200, 13)


#### In this case, the column names have spaces, so we won't be able to use dot notation.  We'll replace the spaces by "_"

In [3]:
df.columns = list(map(lambda x: x.replace(" ", "_"), df.columns))


### Make sure each attribute has the correct data  type

In [4]:
df.dtypes

Event_Name                object
Swim_time                 object
Swim_date                 object
Event_description         object
Team_Code                 object
Team_Name                 object
Athlete_Full_Name         object
Gender                    object
Athlete_birth_date        object
Rank_Order                 int64
City                      object
Country_Code              object
Duration_(hh:mm:ss:ff)    object
dtype: object

In [5]:
df[['Event_Name',  'Event_description', 'Team_Code', 'Team_Name', 'Athlete_Full_Name', 'Gender','City', 'Country_Code' ]] =\
df[['Event_Name',  'Event_description', 'Team_Code', 'Team_Name', 'Athlete_Full_Name', 'Gender','City', 'Country_Code' ]].astype("string")

#### We still have two dates and two times - are the times the same?
'Swim_time', 'Swim_date','Athlete_birth_date','Duration_(hh:mm:ss:ff)'

In [6]:
df[['Swim_time','Duration_(hh:mm:ss:ff)']]

Unnamed: 0,Swim_time,Duration_(hh:mm:ss:ff)
0,46.86,0:0:46:86
1,46.91,0:0:46:91
2,46.94,0:0:46:94
3,46.96,0:0:46:96
4,46.98,0:0:46:98
...,...,...
5195,16:02.10,0:16:02:10
5196,16:02.15,0:16:02:15
5197,16:02.21,0:16:02:21
5198,16:02.26,0:16:02:26


#### In this case, they're the same.  Normally we could check programmatically, but not today.
Rename Duration_(hh:mm:ss:ff) to Duration and drop Swim_time.

In [7]:

df = df.rename(columns={"Duration_(hh:mm:ss:ff)": "Duration"})
df = df.drop(columns=['Swim_time'])

In [8]:
df.columns

Index(['Event_Name', 'Swim_date', 'Event_description', 'Team_Code',
       'Team_Name', 'Athlete_Full_Name', 'Gender', 'Athlete_birth_date',
       'Rank_Order', 'City', 'Country_Code', 'Duration'],
      dtype='object')

## Dates and times
Json conversion expects strings for dates and times, but the format needs to be correct.  If it is not, convert it to datetime, then back to string.


In [9]:
df['Athlete_birth_date'].head()

0    9/15/2004
1    1/10/1987
2     5/1/1983
3    8/16/1996
4    9/15/2004
Name: Athlete_birth_date, dtype: object

In [10]:
df['Athlete_birth_date']=pd.to_datetime(df['Athlete_birth_date'],format='%m/%d/%Y').dt.date
df['Athlete_birth_date']=df['Athlete_birth_date'].astype("string")

In [11]:
df['Athlete_birth_date'].head()

0    2004-09-15
1    1987-01-10
2    1983-05-01
3    1996-08-16
4    2004-09-15
Name: Athlete_birth_date, dtype: string

In [12]:
df['Swim_date']=pd.to_datetime(df['Swim_date'],format='%m/%d/%Y').dt.date
df['Swim_date']=df['Swim_date'].astype("string")

In [13]:
df.tail(2)

Unnamed: 0,Event_Name,Swim_date,Event_description,Team_Code,Team_Name,Athlete_Full_Name,Gender,Athlete_birth_date,Rank_Order,City,Country_Code,Duration
5198,Olympic Games Tokyo 2020,2021-07-26,Women 1500 Freestyle LCM Female,HUN,Hungary,"MIHALYVARI-FARKAS, Viktoria",F,2003-11-26,199,Tokyo,JPN,0:16:02:26
5199,European Championships 2010,2010-08-14,Women 1500 Freestyle LCM Female,IRL,Ireland,"MURPHY, Grainne",F,1993-03-26,200,Budapest,HUN,0:16:02:29


### Exploring
 - Let's look to see how many unique values there are in each column

In [14]:
print('Unique values')
print(df.nunique())
# Print the top five rows
print('Top 5 rows')
print(df.head(2))
print(df.tail(2))

Unique values
Event_Name             476
Swim_date             1001
Event_description       26
Team_Code               58
Team_Name               58
Athlete_Full_Name      716
Gender                   2
Athlete_birth_date     683
Rank_Order             200
City                   204
Country_Code            50
Duration              2964
dtype: int64
Top 5 rows
                           Event_Name   Swim_date           Event_description  \
0         European Championships 2022  2022-08-13  Men 100 Freestyle LCM Male   
1  13th FINA World Championships 2009  2009-07-30  Men 100 Freestyle LCM Male   

  Team_Code Team_Name   Athlete_Full_Name Gender Athlete_birth_date  \
0       ROU   Romania     POPOVICI, David      M         2004-09-15   
1       BRA    Brazil  CIELO FILHO, Cesar      M         1987-01-10   

   Rank_Order  City Country_Code   Duration  
0           1  Rome          ITA  0:0:46:86  
1           2  Rome          ITA  0:0:46:91  
                       Event_Name   Swim_d

### Promising relationships:
 - It looks like we could consider grouping under any of the following:
        - Team Code and Team Name (we should check that they have a 1:1 matching)
        - Event Name (how does the Event_description correspond to this? Are they related?)
        - Country and City?  
        - Athletes and their swims?

In [15]:
# |Isthere a 1:1 matching between team_code and team_name?
for i in  sorted(df.Team_Code.unique()):
    if len(df[df.Team_Code==i].Team_Name.unique())!=1:
        print(i)

#### Explore the relationship between event description and event name

In [16]:
print(df.Event_description.unique())

<StringArray>
[       'Men 100 Freestyle LCM Male',        'Men 100 Butterfly LCM Male',
       'Men 100 Backstroke LCM Male',    'Women 100 Freestyle LCM Female',
    'Women 100 Butterfly LCM Female',     'Men 100 Breaststroke LCM Male',
   'Women 100 Backstroke LCM Female', 'Women 100 Breaststroke LCM Female',
        'Men 200 Freestyle LCM Male',        'Men 200 Butterfly LCM Male',
       'Men 200 Backstroke LCM Male',    'Women 200 Freestyle LCM Female',
           'Men 200 Medley LCM Male',    'Women 200 Butterfly LCM Female',
   'Women 200 Backstroke LCM Female',     'Men 200 Breaststroke LCM Male',
       'Women 200 Medley LCM Female', 'Women 200 Breaststroke LCM Female',
        'Men 400 Freestyle LCM Male',    'Women 400 Freestyle LCM Female',
           'Men 400 Medley LCM Male',       'Women 400 Medley LCM Female',
        'Men 800 Freestyle LCM Male',    'Women 800 Freestyle LCM Female',
       'Men 1500 Freestyle LCM Male',   'Women 1500 Freestyle LCM Female']
Length: 26,

In [17]:
for i in sorted(df.Event_description.unique()):
       if len(df[df.Event_description==i].Event_Name.unique())!=1:
        print(i)

Men 100 Backstroke LCM Male
Men 100 Breaststroke LCM Male
Men 100 Butterfly LCM Male
Men 100 Freestyle LCM Male
Men 1500 Freestyle LCM Male
Men 200 Backstroke LCM Male
Men 200 Breaststroke LCM Male
Men 200 Butterfly LCM Male
Men 200 Freestyle LCM Male
Men 200 Medley LCM Male
Men 400 Freestyle LCM Male
Men 400 Medley LCM Male
Men 800 Freestyle LCM Male
Women 100 Backstroke LCM Female
Women 100 Breaststroke LCM Female
Women 100 Butterfly LCM Female
Women 100 Freestyle LCM Female
Women 1500 Freestyle LCM Female
Women 200 Backstroke LCM Female
Women 200 Breaststroke LCM Female
Women 200 Butterfly LCM Female
Women 200 Freestyle LCM Female
Women 200 Medley LCM Female
Women 400 Freestyle LCM Female
Women 400 Medley LCM Female
Women 800 Freestyle LCM Female


#### Okay, they aren't 1:1.  Let's see what event descriptions are there for each event name

In [18]:
for i in sorted(df.Event_Name.unique()):
    print('\n', i, '\n', df[df.Event_Name==i].Event_description.unique())


 - 
 <StringArray>
['Women 100 Butterfly LCM Female', 'Women 200 Butterfly LCM Female',
  'Men 200 Breaststroke LCM Male',        'Men 400 Medley LCM Male']
Length: 4, dtype: string

 10e Open de France de Natation Vichy Val d'Allier 
 <StringArray>
['Women 200 Medley LCM Female', 'Men 400 Medley LCM Male']
Length: 2, dtype: string

 10th FINA World Championships 2003 
 <StringArray>
[     'Men 100 Butterfly LCM Male',      'Men 200 Freestyle LCM Male',
      'Men 200 Butterfly LCM Male',         'Men 200 Medley LCM Male',
      'Men 400 Freestyle LCM Male',         'Men 400 Medley LCM Male',
      'Men 800 Freestyle LCM Male',     'Men 1500 Freestyle LCM Male',
 'Women 1500 Freestyle LCM Female']
Length: 9, dtype: string

 10th Pan Pacific Championships 2006 
 <StringArray>
[     'Men 200 Butterfly LCM Male',     'Men 200 Backstroke LCM Male',
         'Men 200 Medley LCM Male',  'Women 200 Butterfly LCM Female',
         'Men 400 Medley LCM Male', 'Women 1500 Freestyle LCM Female']



 52nd International Trophy Settecolli 
 <StringArray>
[   'Women 100 Butterfly LCM Female', 'Women 100 Breaststroke LCM Female',
    'Women 200 Freestyle LCM Female']
Length: 3, dtype: string

 53th International Trophy Settecolli 
 <StringArray>
['Women 100 Freestyle LCM Female', 'Women 200 Freestyle LCM Female',
     'Men 200 Butterfly LCM Male',     'Men 400 Freestyle LCM Male',
 'Women 400 Freestyle LCM Female',     'Men 800 Freestyle LCM Male']
Length: 6, dtype: string

 54th International Trophy Settecolli 
 <StringArray>
['Men 100 Breaststroke LCM Male',    'Men 200 Butterfly LCM Male',
    'Men 400 Freestyle LCM Male',       'Men 400 Medley LCM Male',
   'Men 1500 Freestyle LCM Male']
Length: 5, dtype: string

 55th International Trophy Settecolli 
 <StringArray>
[   'Women 100 Butterfly LCM Female',     'Men 100 Breaststroke LCM Male',
 'Women 100 Breaststroke LCM Female',   'Women 200 Backstroke LCM Female',
 'Women 200 Breaststroke LCM Female',       'Women 400 Medl


 GHF Australian Age Championships 2018 
 <StringArray>
['Women 100 Freestyle LCM Female']
Length: 1, dtype: string

 German National Championships (50m) 
 <StringArray>
[    'Men 100 Butterfly LCM Male',     'Men 200 Freestyle LCM Male',
        'Men 200 Medley LCM Male', 'Women 200 Butterfly LCM Female',
  'Men 200 Breaststroke LCM Male',     'Men 400 Freestyle LCM Male',
     'Men 800 Freestyle LCM Male',    'Men 1500 Freestyle LCM Male']
Length: 8, dtype: string

 German Open 
 <StringArray>
['Women 200 Butterfly LCM Female']
Length: 1, dtype: string

 German Open  
 <StringArray>
['Women 200 Butterfly LCM Female']
Length: 1, dtype: string

 German Trials 2021 
 <StringArray>
[ 'Women 100 Freestyle LCM Female',      'Men 400 Freestyle LCM Male',
     'Men 1500 Freestyle LCM Male', 'Women 1500 Freestyle LCM Female']
Length: 4, dtype: string

 Hamamatsu Championships (50m) 
 <StringArray>
[    'Men 200 Medley LCM Male', 'Women 200 Medley LCM Female',
     'Men 400 Medley LCM Male']
L


 Meeting Open de la Mediterranee Circuit FFN - Gold 
 <StringArray>
[ 'Women 200 Freestyle LCM Female', 'Women 200 Backstroke LCM Female',
     'Women 200 Medley LCM Female',     'Women 400 Medley LCM Female']
Length: 4, dtype: string

 Mel Zajac Jr International 2012 
 <StringArray>
['Men 400 Freestyle LCM Male']
Length: 1, dtype: string

 Mel Zajac Jr. International 2019 
 <StringArray>
['Men 200 Butterfly LCM Male']
Length: 1, dtype: string

 Mesa Grand Prix 2014 
 <StringArray>
['Women 800 Freestyle LCM Female']
Length: 1, dtype: string

 Mesa PSS 2016 
 <StringArray>
['Women 100 Butterfly LCM Female', 'Women 200 Freestyle LCM Female',
 'Women 400 Freestyle LCM Female',    'Women 400 Medley LCM Female',
 'Women 800 Freestyle LCM Female']
Length: 5, dtype: string

 Mission Viejo Swim Meet of Champions 
 <StringArray>
[   'Men 100 Butterfly LCM Male',   'Men 100 Backstroke LCM Male',
 'Men 100 Breaststroke LCM Male']
Length: 3, dtype: string

 Missouri Grand Prix 2008 
 <StringArray


 Open Cup of Belarus 
 <StringArray>
['Women 100 Freestyle LCM Female', 'Women 100 Butterfly LCM Female']
Length: 2, dtype: string

 Open Dutch Championships 
 <StringArray>
['Women 400 Medley LCM Female']
Length: 1, dtype: string

 Open National Swimming Championships 
 <StringArray>
['Men 800 Freestyle LCM Male', 'Men 1500 Freestyle LCM Male']
Length: 2, dtype: string

 Open Tournament 2015 
 <StringArray>
['Women 100 Freestyle LCM Female']
Length: 1, dtype: string

 Open de France 2017 
 <StringArray>
['Women 100 Breaststroke LCM Female', 'Women 200 Breaststroke LCM Female',
       'Women 400 Medley LCM Female']
Length: 3, dtype: string

 PV Senior Championships 2014 (50m) 
 <StringArray>
['Women 200 Freestyle LCM Female']
Length: 1, dtype: string

 Pan Pacs Trials 2018 
 <StringArray>
[ 'Women 100 Butterfly LCM Female',     'Men 200 Backstroke LCM Male',
  'Women 400 Freestyle LCM Female', 'Women 1500 Freestyle LCM Female']
Length: 4, dtype: string

 Philipps 66 USA National Champ

## Take one of the groupings and design a collection around it.  

In [19]:

for i in sorted(df.Athlete_Full_Name.unique()):
     if len(df[df.Athlete_Full_Name==i].Athlete_birth_date.unique())!=1:
        print(i,"Not 1 birth date")
     if len(df[df.Athlete_Full_Name==i].Team_Code.unique())!=1:
        print(i,"Not 1 Team Code", len(df[df.Athlete_Full_Name==i].Team_Code.unique()))
     if len(df[df.Athlete_Full_Name==i].Gender.unique())!=1:
        print(i,"Not 1 Gender")

CHIKUNOVA, Evgeniia Not 1 Team Code 2
CHUPKOV, Anton Not 1 Team Code 2
EFIMOVA, Yuliya Not 1 Team Code 2
GANDY, Ellen Not 1 Team Code 2
KIRPICHNIKOVA, Anastasiia Not 1 Team Code 2
KOLESNIKOV, Kliment Not 1 Team Code 2
MALYUTIN, Martin Not 1 Team Code 2
MINAKOV, Andrei Not 1 Team Code 2
RYLOV, Evgeny Not 1 Team Code 2


### Design

We will have a 1:few between Athlete and Swim.
The columns are 'Event_Name', 'Swim_time', 'Swim_date', 'Event_description', 'Team_Code', 'Team_Name', 'Athlete_Full_Name', 'Gender', 'Athlete_birth_date', 'Rank_Order', 'City', 'Country_Code', 'Duration_(hh:mm:ss:ff)'

If we design one document per athlete, let's take the unique athlete information from:
    'Athlete_Full_Name', 'Gender', 'Athlete_birth_date'

We  can then have an embedded array of the athlete's swims, from:
'Event_Name', 'Event_description', 'Swim_time', 'Swim_date', 'Team_Code','Team_Name' 'Rank_Order', 'City', 'Country_Code', 'Duration_(hh:mm:ss:ff)'


#### Json serializable data can't be dates or times.  Let's use the string version of our time.

In [21]:
print('Event_Name', df.Event_Name.isnull().sum())
print('Event_description', df.Event_description.isnull().sum())
print('Swim_date', df.Swim_date.isnull().sum())
print('Rank_Order', df.Rank_Order.isnull().sum())
print('City', df.City.isnull().sum())
print('Duration', df.Duration.isnull().sum())

Event_Name 0
Event_description 0
Swim_date 0
Rank_Order 0
City 2
Duration 0


In [22]:
df[df.City.isnull()]

Unnamed: 0,Event_Name,Swim_date,Event_description,Team_Code,Team_Name,Athlete_Full_Name,Gender,Athlete_birth_date,Rank_Order,City,Country_Code,Duration
982,2012 AUS Invite,2013-12-16,Women 100 Butterfly LCM Female,AUS,Australia,"GANDY, Ellen",F,1991-08-15,182,,AUS,0:0:56:94
4257,Italy Invite,2016-04-30,Women 400 Medley LCM Female,HUN,Hungary,"HOSSZU, Katinka",F,1989-05-03,58,,ITA,0:04:32:68


In [23]:
df["City"].fillna("Not Specified", inplace = True)

In [24]:
df[['Event_Name', 
                                                                  'Event_description', 
                                                                  'Swim_date', 
                                                                  'Rank_Order', 
                                                                  'City', 
                                                                  'Country_Code', 
                                                                  'Duration']].isnull().values.any()

False

### Extracting the documents

#### Set up an athlete's dataframe

In [25]:
adf = df[['Athlete_Full_Name', 'Gender', 'Athlete_birth_date']].drop_duplicates()

In [26]:
print(adf.Athlete_Full_Name)

0                   POPOVICI, David
1                CIELO FILHO, Cesar
2                    BERNARD, Alain
3                   DRESSEL, Caeleb
6                   MCEVOY, Cameron
                   ...             
5184            NORDIN, Emma Ayasha
5187                GORMAN, Melissa
5191                    SHAO, Yiwen
5196    MIHALYVARI-FARKAS, Viktoria
5199                MURPHY, Grainne
Name: Athlete_Full_Name, Length: 716, dtype: string


In [27]:
adf.describe(include='all')

Unnamed: 0,Athlete_Full_Name,Gender,Athlete_birth_date
count,716,716,716
unique,716,2,683
top,"POPOVICI, David",F,1992-05-20
freq,1,409,3


In [28]:
adf.isnull().values.any()

False

### Connect to MongoDB and make a collection

In [29]:
# #### Set up the database and collection you will use.
uri = 'mongodb://admin:Sp00ky!@localhost:27017/?AuthSource=admin'
client = MongoClient(uri)

mydb = client["Swimming"]
mycol = mydb["Athlete"]
mycol.drop()

#

#### Loop through each athlete to create a document

In [30]:
for row in adf.itertuples():
    print(row.Athlete_Full_Name, type(row))
    theirswims = df[df.Athlete_Full_Name==row.Athlete_Full_Name][['Event_Name', 
                                                                  'Event_description', 
                                                                  'Swim_date', 
                                                                  'Team_Code',
                                                                  'Team_Name',
                                                                  'Rank_Order', 
                                                                  'City', 
                                                                  'Country_Code', 
                                                                  'Duration']]
    entries = json.dumps({"Name": row.Athlete_Full_Name,
                          "Birth_Date": row.Athlete_birth_date,
                          "Gender":row.Gender,
                          "Swims": theirswims.to_dict('records')
                             })
    x = mycol.insert_one(json.loads(entries))

POPOVICI, David <class 'pandas.core.frame.Pandas'>
CIELO FILHO, Cesar <class 'pandas.core.frame.Pandas'>
BERNARD, Alain <class 'pandas.core.frame.Pandas'>
DRESSEL, Caeleb <class 'pandas.core.frame.Pandas'>
MCEVOY, Cameron <class 'pandas.core.frame.Pandas'>
SULLIVAN, Eamon <class 'pandas.core.frame.Pandas'>
CHALMERS, Kyle <class 'pandas.core.frame.Pandas'>
MAGNUSSEN, James <class 'pandas.core.frame.Pandas'>
KOLESNIKOV, Kliment <class 'pandas.core.frame.Pandas'>
BOUSQUET, Frederick <class 'pandas.core.frame.Pandas'>
HAYDEN, Brent <class 'pandas.core.frame.Pandas'>
WALTERS, David <class 'pandas.core.frame.Pandas'>
NYSTRAND, Stefan <class 'pandas.core.frame.Pandas'>
HELD, Ryan <class 'pandas.core.frame.Pandas'>
GRINEV, Vladislav <class 'pandas.core.frame.Pandas'>
MIRESSI, Alessandro <class 'pandas.core.frame.Pandas'>
MILAK, Kristof <class 'pandas.core.frame.Pandas'>
PHELPS, Michael <class 'pandas.core.frame.Pandas'>
GROUSSET, Maxime <class 'pandas.core.frame.Pandas'>
ADRIAN, Nathan <class 

WASICK, Katarzyna <class 'pandas.core.frame.Pandas'>
GALLAGHER, Erin <class 'pandas.core.frame.Pandas'>
ANTONIOU, Kalia <class 'pandas.core.frame.Pandas'>
SMIT, Julia <class 'pandas.core.frame.Pandas'>
MEDEIROS, Etiene <class 'pandas.core.frame.Pandas'>
HANSSON, Louise <class 'pandas.core.frame.Pandas'>
SHIRAI, Rio <class 'pandas.core.frame.Pandas'>
POON, Victoria <class 'pandas.core.frame.Pandas'>
APOSTALON, Anika <class 'pandas.core.frame.Pandas'>
SEGEL, Janja <class 'pandas.core.frame.Pandas'>
MCKEOWN, Kaylee <class 'pandas.core.frame.Pandas'>
CASEY, Hannah <class 'pandas.core.frame.Pandas'>
FERRAIOLI, Erika <class 'pandas.core.frame.Pandas'>
O'LEARY, Mia Patricia <class 'pandas.core.frame.Pandas'>
MILLS, Alice <class 'pandas.core.frame.Pandas'>
SAKAI, Natsumi <class 'pandas.core.frame.Pandas'>
MUNOZ DEL CAMPO, Lidon <class 'pandas.core.frame.Pandas'>
SHKURDAI, Anastasiya <class 'pandas.core.frame.Pandas'>
MATSUMOTO, Yayoi <class 'pandas.core.frame.Pandas'>
GORBENKO, Anastasia <clas

BERENS, Richard <class 'pandas.core.frame.Pandas'>
KIBLER, Drew <class 'pandas.core.frame.Pandas'>
HAAS, Townley <class 'pandas.core.frame.Pandas'>
FRASER-HOLMES, Thomas <class 'pandas.core.frame.Pandas'>
LOBINTSEV, Nikita <class 'pandas.core.frame.Pandas'>
AUBOECK, Felix <class 'pandas.core.frame.Pandas'>
VANDERKAAY, Peter <class 'pandas.core.frame.Pandas'>
GRAHAM, Alexander <class 'pandas.core.frame.Pandas'>
KRASNYKH, Aleksandr <class 'pandas.core.frame.Pandas'>
DWYER, Conor <class 'pandas.core.frame.Pandas'>
UCHIDA, Sho <class 'pandas.core.frame.Pandas'>
DJAKOVIC, Antonio <class 'pandas.core.frame.Pandas'>
MARTENS, Lukas <class 'pandas.core.frame.Pandas'>
MONK, Kenrick <class 'pandas.core.frame.Pandas'>
JI, Xinjie <class 'pandas.core.frame.Pandas'>
GIREV, Ivan <class 'pandas.core.frame.Pandas'>
WINNINGTON, Elijah <class 'pandas.core.frame.Pandas'>
KOZMA, Dominik <class 'pandas.core.frame.Pandas'>
DOVGALYUK, Mikhail <class 'pandas.core.frame.Pandas'>
FOSTER, Carson <class 'pandas.cor

SHI, Jinglin <class 'pandas.core.frame.Pandas'>
JUKIC, Mirna <class 'pandas.core.frame.Pandas'>
ESCOBEDO, Emily <class 'pandas.core.frame.Pandas'>
MAMIE, Lisa <class 'pandas.core.frame.Pandas'>
CORBETT, Kaylene <class 'pandas.core.frame.Pandas'>
ULYETT, Jocelyn Kate <class 'pandas.core.frame.Pandas'>
STRAUCH, Jenna <class 'pandas.core.frame.Pandas'>
SMITH, Kierra <class 'pandas.core.frame.Pandas'>
HOSTMAN, Joline <class 'pandas.core.frame.Pandas'>
TUTTON, Chloe <class 'pandas.core.frame.Pandas'>
BEARD, Amanda <class 'pandas.core.frame.Pandas'>
VALL MONTERO, Jessica <class 'pandas.core.frame.Pandas'>
MELLOULI, Ous <class 'pandas.core.frame.Pandas'>
ZHANG, Lin <class 'pandas.core.frame.Pandas'>
HORTON, Mack <class 'pandas.core.frame.Pandas'>
HACKETT, Grant <class 'pandas.core.frame.Pandas'>
JENSEN, Larsen <class 'pandas.core.frame.Pandas'>
DETTI, Gabriele <class 'pandas.core.frame.Pandas'>
MCLOUGHLIN, Jack Alan <class 'pandas.core.frame.Pandas'>
COSTA, Guilherme <class 'pandas.core.frame

In [32]:
# #### Close the MongoDB connection

client.close()