# Chapter 12 Input and Output in Pandas

## Pass a URL to the pd.read_csv Method

In [3]:
import pandas as pd

In [41]:
url = "https://data.cityofnewyork.us/api/views/25th-nujf/rows.csv" # need to put '' on url due to // being code language
baby_names = pd.read_csv(url).head(20)
baby_names

Unnamed: 0,Year of Birth,Gender,Ethnicity,Child's First Name,Count,Rank
0,2011,FEMALE,HISPANIC,GERALDINE,13,75
1,2011,FEMALE,HISPANIC,GIA,21,67
2,2011,FEMALE,HISPANIC,GIANNA,49,42
3,2011,FEMALE,HISPANIC,GISELLE,38,51
4,2011,FEMALE,HISPANIC,GRACE,36,53
5,2011,FEMALE,HISPANIC,GUADALUPE,26,62
6,2011,FEMALE,HISPANIC,HAILEY,126,8
7,2011,FEMALE,HISPANIC,HALEY,14,74
8,2011,FEMALE,HISPANIC,HANNAH,17,71
9,2011,FEMALE,HISPANIC,HAYLEE,17,71


## Quick Object Conversions
to_frame(), to_dict(), tolist()

In [32]:
baby_names["Child's First Name"] #if name include ', need to use " "

0     GERALDINE
1           GIA
2        GIANNA
3       GISELLE
4         GRACE
5     GUADALUPE
6        HAILEY
7         HALEY
8        HANNAH
9        HAYLEE
10       HAYLEY
11        HAZEL
12       HEAVEN
13        HEIDI
14        HEIDY
15        HELEN
16        IMANI
17       INGRID
18        IRENE
19         IRIS
Name: Child's First Name, dtype: object

In [33]:
baby_names["Child's First Name"].to_frame()

Unnamed: 0,Child's First Name
0,GERALDINE
1,GIA
2,GIANNA
3,GISELLE
4,GRACE
5,GUADALUPE
6,HAILEY
7,HALEY
8,HANNAH
9,HAYLEE


In [34]:
baby_names["Child's First Name"].tolist()

['GERALDINE',
 'GIA',
 'GIANNA',
 'GISELLE',
 'GRACE',
 'GUADALUPE',
 'HAILEY',
 'HALEY',
 'HANNAH',
 'HAYLEE',
 'HAYLEY',
 'HAZEL',
 'HEAVEN',
 'HEIDI',
 'HEIDY',
 'HELEN',
 'IMANI',
 'INGRID',
 'IRENE',
 'IRIS']

In [35]:
baby_names["Child's First Name"].to_dict() #no duplicated keys

{0: 'GERALDINE',
 1: 'GIA',
 2: 'GIANNA',
 3: 'GISELLE',
 4: 'GRACE',
 5: 'GUADALUPE',
 6: 'HAILEY',
 7: 'HALEY',
 8: 'HANNAH',
 9: 'HAYLEE',
 10: 'HAYLEY',
 11: 'HAZEL',
 12: 'HEAVEN',
 13: 'HEIDI',
 14: 'HEIDY',
 15: 'HELEN',
 16: 'IMANI',
 17: 'INGRID',
 18: 'IRENE',
 19: 'IRIS'}

In [25]:
'!'.join(['a','b','c'])

'a!b!c'

In [36]:
','.join(baby_names["Child's First Name"].str.title().drop_duplicates().sort_values())

'Geraldine,Gia,Gianna,Giselle,Grace,Guadalupe,Hailey,Haley,Hannah,Haylee,Hayley,Hazel,Heaven,Heidi,Heidy,Helen,Imani,Ingrid,Irene,Iris'

# Export CSV File with the to_csv Method
to_csv( , index = False, columns = [], encoding = 'utf-8)

In [43]:
url = "https://data.cityofnewyork.us/api/views/25th-nujf/rows.csv" # need to put '' on url due to // being code language
baby_names = pd.read_csv(url)
baby_names.head()

Unnamed: 0,Year of Birth,Gender,Ethnicity,Child's First Name,Count,Rank
0,2011,FEMALE,HISPANIC,GERALDINE,13,75
1,2011,FEMALE,HISPANIC,GIA,21,67
2,2011,FEMALE,HISPANIC,GIANNA,49,42
3,2011,FEMALE,HISPANIC,GISELLE,38,51
4,2011,FEMALE,HISPANIC,GRACE,36,53


In [44]:
baby_names.to_csv('NYC_Baby_Names.csv') # good practice to have no space in name. will be saved in notebook

In [45]:
baby_names = pd.read_csv('NYC_Baby_Names.csv')
baby_names.head() #notice that there is an extra index

Unnamed: 0.1,Unnamed: 0,Year of Birth,Gender,Ethnicity,Child's First Name,Count,Rank
0,0,2011,FEMALE,HISPANIC,GERALDINE,13,75
1,1,2011,FEMALE,HISPANIC,GIA,21,67
2,2,2011,FEMALE,HISPANIC,GIANNA,49,42
3,3,2011,FEMALE,HISPANIC,GISELLE,38,51
4,4,2011,FEMALE,HISPANIC,GRACE,36,53


In [53]:
url = "https://data.cityofnewyork.us/api/views/25th-nujf/rows.csv" # need to put '' on url due to // being code language
baby_names = pd.read_csv(url)
baby_names.to_csv('NYC_Baby_Names_New.csv', index = False) 

In [54]:
baby_names_new = pd.read_csv('NYC_Baby_Names_New.csv')
baby_names_new.head() 

Unnamed: 0,Year of Birth,Gender,Ethnicity,Child's First Name,Count,Rank
0,2011,FEMALE,HISPANIC,GERALDINE,13,75
1,2011,FEMALE,HISPANIC,GIA,21,67
2,2011,FEMALE,HISPANIC,GIANNA,49,42
3,2011,FEMALE,HISPANIC,GISELLE,38,51
4,2011,FEMALE,HISPANIC,GRACE,36,53


In [55]:
url = "https://data.cityofnewyork.us/api/views/25th-nujf/rows.csv" # need to put '' on url due to // being code language
baby_names = pd.read_csv(url)
baby_names.to_csv('NYC_Baby_Names_LimitedCol.csv', index = False, columns = ["Gender","Ethnicity","Child's First Name"])
baby_names_limitedcol = pd.read_csv('NYC_Baby_Names_LimitedCol.csv')
baby_names_limitedcol.head()

Unnamed: 0,Gender,Ethnicity,Child's First Name
0,FEMALE,HISPANIC,GERALDINE
1,FEMALE,HISPANIC,GIA
2,FEMALE,HISPANIC,GIANNA
3,FEMALE,HISPANIC,GISELLE
4,FEMALE,HISPANIC,GRACE


In [56]:
# encoding error could happen, a workaround is to provide an order ,encoding = 'utf-8'
# else google what is the correct encoding format

## Import Excel File into pandas
read_excel (, sheet_name = None/'1'/'Name'/[,])

In [59]:
pd.read_excel('Data - Single Worksheet.xlsx')

Unnamed: 0,First Name,Last Name,City,Gender
0,Brandon,James,Miami,M
1,Sean,Hawkins,Denver,M
2,Judy,Day,Los Angeles,F
3,Ashley,Ruiz,San Francisco,F
4,Stephanie,Gomez,Portland,F


In [60]:
pd.read_excel('Data - Multiple Worksheets.xlsx') #default is first worksheet

Unnamed: 0,First Name,Last Name,City,Gender
0,Brandon,James,Miami,M
1,Sean,Hawkins,Denver,M
2,Judy,Day,Los Angeles,F
3,Ashley,Ruiz,San Francisco,F
4,Stephanie,Gomez,Portland,F


In [62]:
pd.read_excel('Data - Multiple Worksheets.xlsx', sheet_name = 1) # both index pos or sheet name works

Unnamed: 0,First Name,Last Name,City,Gender
0,Parker,Power,Raleigh,F
1,Preston,Prescott,Philadelphia,F
2,Ronaldo,Donaldo,Bangor,M
3,Megan,Stiller,San Francisco,M
4,Bustin,Jieber,Austin,F


In [67]:
data = pd.read_excel('Data - Multiple Worksheets.xlsx', sheet_name = [0,1]) # multi selection will become dictionary
data, type(data)

({0:   First Name Last Name           City Gender
  0    Brandon     James          Miami      M
  1       Sean   Hawkins         Denver      M
  2       Judy       Day    Los Angeles      F
  3     Ashley      Ruiz  San Francisco      F
  4  Stephanie     Gomez       Portland      F,
  1:   First Name Last Name           City Gender
  0     Parker     Power        Raleigh      F
  1    Preston  Prescott   Philadelphia      F
  2    Ronaldo   Donaldo         Bangor      M
  3      Megan   Stiller  San Francisco      M
  4     Bustin    Jieber         Austin      F},
 dict)

In [69]:
data[1] # use key 0 or 1 to give second data sheet

Unnamed: 0,First Name,Last Name,City,Gender
0,Parker,Power,Raleigh,F
1,Preston,Prescott,Philadelphia,F
2,Ronaldo,Donaldo,Bangor,M
3,Megan,Stiller,San Francisco,M
4,Bustin,Jieber,Austin,F


In [72]:
data = pd.read_excel('Data - Multiple Worksheets.xlsx', sheet_name = ['Data 1','Data 2']) # multi selection will become dictionary
data['Data 1']

Unnamed: 0,First Name,Last Name,City,Gender
0,Brandon,James,Miami,M
1,Sean,Hawkins,Denver,M
2,Judy,Day,Los Angeles,F
3,Ashley,Ruiz,San Francisco,F
4,Stephanie,Gomez,Portland,F


In [73]:
pd.read_excel('Data - Multiple Worksheets.xlsx', sheet_name = None) #will import all worksheets

{'Data 1':   First Name Last Name           City Gender
 0    Brandon     James          Miami      M
 1       Sean   Hawkins         Denver      M
 2       Judy       Day    Los Angeles      F
 3     Ashley      Ruiz  San Francisco      F
 4  Stephanie     Gomez       Portland      F,
 'Data 2':   First Name Last Name           City Gender
 0     Parker     Power        Raleigh      F
 1    Preston  Prescott   Philadelphia      F
 2    Ronaldo   Donaldo         Bangor      M
 3      Megan   Stiller  San Francisco      M
 4     Bustin    Jieber         Austin      F}

## Export Excel File
.to_excel(ExcelWriter,

In [75]:
url = "https://data.cityofnewyork.us/api/views/25th-nujf/rows.csv" # need to put '' on url due to // being code language
baby_names = pd.read_csv(url)
baby_names.head()

Unnamed: 0,Year of Birth,Gender,Ethnicity,Child's First Name,Count,Rank
0,2011,FEMALE,HISPANIC,GERALDINE,13,75
1,2011,FEMALE,HISPANIC,GIA,21,67
2,2011,FEMALE,HISPANIC,GIANNA,49,42
3,2011,FEMALE,HISPANIC,GISELLE,38,51
4,2011,FEMALE,HISPANIC,GRACE,36,53


In [78]:
girls = baby_names[baby_names.Gender == 'FEMALE']
boys = baby_names[baby_names.Gender == 'MALE']

In [80]:
excel_file = pd.ExcelWriter("baby_names.xlsx") #foundation
girls.to_excel(excel_file, sheet_name = 'girls', index = False)
boys.to_excel(excel_file, sheet_name = 'boys', index = False, columns = ["Year of Birth", "Gender", "Ethnicity"])