In [1]:
import pandas as pd

### Pass a URL to the pd.read_csv Method

Fetch the latest version of the dataset from the internet automatically(we don't need to update manually or download dataset locally).

In [3]:
url = 'https://data.cityofnewyork.us/resource/25th-nujf.csv'
baby_names = pd.read_csv(url)
baby_names

Unnamed: 0,brth_yr,gndr,ethcty,nm,cnt,rnk
0,2019,FEMALE,ASIAN AND PACIFIC ISLANDER,Chloe,131,1
1,2019,FEMALE,ASIAN AND PACIFIC ISLANDER,Olivia,109,2
2,2019,FEMALE,ASIAN AND PACIFIC ISLANDER,Mia,88,3
3,2019,FEMALE,ASIAN AND PACIFIC ISLANDER,Sophia,71,4
4,2019,FEMALE,ASIAN AND PACIFIC ISLANDER,Emma,71,4
...,...,...,...,...,...,...
995,2019,MALE,ASIAN AND PACIFIC ISLANDER,Andrew,39,26
996,2019,MALE,ASIAN AND PACIFIC ISLANDER,Ibrahim,39,26
997,2019,MALE,ASIAN AND PACIFIC ISLANDER,Adam,38,27
998,2019,MALE,ASIAN AND PACIFIC ISLANDER,Abdullah,38,27


### Quick Object Conversions

In [6]:
baby_names['nm'].to_frame() # converts series to df

Unnamed: 0,nm
0,Chloe
1,Olivia
2,Mia
3,Sophia
4,Emma
...,...
995,Andrew
996,Ibrahim
997,Adam
998,Abdullah


In [8]:
baby_names['nm'].to_list() # converts to a list
baby_names['nm'].to_dict() # converts to dict, make sure that indexes in the df are unique before converting to dict

{0: 'Chloe',
 1: 'Olivia',
 2: 'Mia',
 3: 'Sophia',
 4: 'Emma',
 5: 'Amelia',
 6: 'Charlotte',
 7: 'Emily',
 8: 'Grace',
 9: 'Isabella',
 10: 'Ella',
 11: 'Evelyn',
 12: 'Ava',
 13: 'Hannah',
 14: 'Bella',
 15: 'Aria',
 16: 'Sarah',
 17: 'Hailey',
 18: 'Arya',
 19: 'Claire',
 20: 'Fatima',
 21: 'Ellie',
 22: 'Maryam',
 23: 'Kayla',
 24: 'Zoe',
 25: 'Audrey',
 26: 'Avery',
 27: 'Abigail',
 28: 'Anna',
 29: 'Maya',
 30: 'Alyssa',
 31: 'Alice',
 32: 'Sofia',
 33: 'Riley',
 34: 'Jasmine',
 35: 'Zoey',
 36: 'Celine',
 37: 'Emilia',
 38: 'Tenzin',
 39: 'Inaaya',
 40: 'Joanna',
 41: 'Fiona',
 42: 'Alina',
 43: 'Sophie',
 44: 'Leah',
 45: 'Anaya',
 46: 'Ayesha',
 47: 'Luna',
 48: 'Zainab',
 49: 'Eva',
 50: 'Annie',
 51: 'Amaya',
 52: 'Mila',
 53: 'Angela',
 54: 'Elena',
 55: 'Inaya',
 56: 'Anabia',
 57: 'Ariana',
 58: 'Manha',
 59: 'Penelope',
 60: 'Zara',
 61: 'Nina',
 62: 'Katherine',
 63: 'Aliza',
 64: 'Scarlett',
 65: 'Zahra',
 66: 'Layla',
 67: 'Natalie',
 68: 'Eleanor',
 69: 'Harper',
 7

In [9]:
# Join all the names with comma delimiter, proper capitalisation, remove duplicates, sort the first names -> 1000 entries
','.join(baby_names['nm'].str.title().drop_duplicates().sort_values()) 

'Aaliyah,Aaron,Abby,Abdullah,Abigail,Ada,Adam,Addison,Adelaide,Adele,Adeline,Adelynn,Adina,Adriana,Ahuva,Aicha,Aiden,Ailani,Aileen,Aisha,Aiza,Alahia,Alaia,Alana,Alani,Alanna,Alayna,Aleah,Aleena,Alessandra,Alessia,Alexa,Alexander,Alexandra,Alexis,Alia,Alice,Alicia,Alina,Alisha,Alison,Aliya,Aliyah,Aliza,Allison,Alma,Alondra,Alyssa,Amaia,Amalia,Amanda,Amani,Amara,Amaya,Amayah,Amber,Amelia,Amelie,Amina,Aminata,Amira,Amirah,Amiyah,Amora,Amy,Ana,Anabia,Analia,Anastasia,Anaya,Andrea,Andrew,Angela,Angelica,Angelina,Angie,Aniyah,Anna,Annabel,Annabelle,Annalise,Annie,Antonella,Anya,April,Arham,Aria,Ariah,Ariana,Arianna,Ariel,Ariella,Arielle,Ariyah,Arlette,Arya,Ashley,Astrid,Athena,Aubree,Aubrey,Audrey,Aurelia,Aurora,Austin,Autumn,Ava,Avery,Aviva,Aya,Ayaan,Ayala,Ayat,Ayesha,Ayla,Ayra,Baila,Batsheva,Beatrice,Bella,Bianca,Bina,Bintou,Blair,Blake,Blima,Blimy,Bracha,Breindy,Briana,Brianna,Brielle,Brooke,Brooklyn,Bruchy,Cali,Camila,Camilla,Camille,Caroline,Cataleya,Catalina,Catherine,Cecilia,Celeste,C

### Export CSV file with the to_csv Method

In [8]:
# Method used to export a csv file (check project root in jupyter notebook for file)

baby_names.to_csv('NYC_Baby_Names.csv') # exports with default settings
baby_names.to_csv('NYC_Baby_Names.csv', index = False, columns = ['gndr', 'nm'], encoding = 'utf-8') 
# exports without index, with select columns, utf-8 encoding -> overwrites prev export

### Import Excel File into pandas

In [3]:
df = pd.read_excel('Data - Single Worksheet.xlsx') # importing an excel file into pandas -> Single worksheet
df

Unnamed: 0,First Name,Last Name,City,Gender
0,Brandon,James,Miami,M
1,Sean,Hawkins,Denver,M
2,Judy,Day,Los Angeles,F
3,Ashley,Ruiz,San Francisco,F
4,Stephanie,Gomez,Portland,F


In [5]:
pd.read_excel('Data - Multiple Worksheets.xlsx') # importing an excel file into pandas -> Multiple worksheet
# Default is to import first worksheet from excel file

Unnamed: 0,First Name,Last Name,City,Gender
0,Brandon,James,Miami,M
1,Sean,Hawkins,Denver,M
2,Judy,Day,Los Angeles,F
3,Ashley,Ruiz,San Francisco,F
4,Stephanie,Gomez,Portland,F


In [6]:
pd.read_excel('Data - Multiple Worksheets.xlsx', sheet_name = 1) # importing second worksheet from excel file ('Data 2')

Unnamed: 0,First Name,Last Name,City,Gender
0,Parker,Power,Raleigh,F
1,Preston,Prescott,Philadelphia,F
2,Ronaldo,Donaldo,Bangor,M
3,Megan,Stiller,San Francisco,M
4,Bustin,Jieber,Austin,F


In [11]:
data = pd.read_excel('Data - Multiple Worksheets.xlsx', sheet_name = ['Data 1','Data 2']) # worksheet names
data
# Importing 2 worksheets from excel file into dict

{'Data 1':   First Name Last Name           City Gender
 0    Brandon     James          Miami      M
 1       Sean   Hawkins         Denver      M
 2       Judy       Day    Los Angeles      F
 3     Ashley      Ruiz  San Francisco      F
 4  Stephanie     Gomez       Portland      F,
 'Data 2':   First Name Last Name           City Gender
 0     Parker     Power        Raleigh      F
 1    Preston  Prescott   Philadelphia      F
 2    Ronaldo   Donaldo         Bangor      M
 3      Megan   Stiller  San Francisco      M
 4     Bustin    Jieber         Austin      F}

In [9]:
type(data) # returns dict

dict

In [12]:
data['Data 1'] # we get the first worksheet from dict

Unnamed: 0,First Name,Last Name,City,Gender
0,Brandon,James,Miami,M
1,Sean,Hawkins,Denver,M
2,Judy,Day,Los Angeles,F
3,Ashley,Ruiz,San Francisco,F
4,Stephanie,Gomez,Portland,F


In [13]:
data['Data 2'] # we get the second worksheet from dict

Unnamed: 0,First Name,Last Name,City,Gender
0,Parker,Power,Raleigh,F
1,Preston,Prescott,Philadelphia,F
2,Ronaldo,Donaldo,Bangor,M
3,Megan,Stiller,San Francisco,M
4,Bustin,Jieber,Austin,F


In [14]:
data = pd.read_excel('Data - Multiple Worksheets.xlsx', sheet_name = None) # none -> imports all the worksheets from file
data

{'Data 1':   First Name Last Name           City Gender
 0    Brandon     James          Miami      M
 1       Sean   Hawkins         Denver      M
 2       Judy       Day    Los Angeles      F
 3     Ashley      Ruiz  San Francisco      F
 4  Stephanie     Gomez       Portland      F,
 'Data 2':   First Name Last Name           City Gender
 0     Parker     Power        Raleigh      F
 1    Preston  Prescott   Philadelphia      F
 2    Ronaldo   Donaldo         Bangor      M
 3      Megan   Stiller  San Francisco      M
 4     Bustin    Jieber         Austin      F}

### Export to Excel File

Use the to_excel() Method to write pandas dfs to an Excel file.

In [4]:
girls = baby_names[baby_names['gndr'] == 'FEMALE'] # specifying the dfs that will be written to the excel file
boys = baby_names[baby_names['gndr'] == 'MALE']
boys.head()

Unnamed: 0,brth_yr,gndr,ethcty,nm,cnt,rnk
966,2019,MALE,ASIAN AND PACIFIC ISLANDER,Ethan,154,1
967,2019,MALE,ASIAN AND PACIFIC ISLANDER,Lucas,153,2
968,2019,MALE,ASIAN AND PACIFIC ISLANDER,Aiden,147,3
969,2019,MALE,ASIAN AND PACIFIC ISLANDER,Muhammad,143,4
970,2019,MALE,ASIAN AND PACIFIC ISLANDER,Jayden,118,5


In [5]:
excel_file = pd.ExcelWriter('Baby_names.xlsx') # specifying the excel file to write to, ExcelWriter obj

In [6]:
girls.to_excel(excel_file, sheet_name = 'Girls', index = False, encoding = 'utf-8') # worksheet 1- defined sheet_name, no index, all columns
boys.to_excel(excel_file, sheet_name = 'Boys', index = False, columns = ['gndr', 'nm', 'cnt'], encoding = 'utf-8') # ws2 - defined sheet_name, spec. columns

  return func(*args, **kwargs)


In [7]:
excel_file.save() # creating the file, saving contents

  excel_file.save() # creating the file, saving contents
