### This notebook quickly demonstrates how to read different file types in Python. 

In [211]:
import pandas as pd
import numpy as np
import json
import xml.etree.ElementTree as etree # for reading xml

### READING DIFFERENT FILE FORMATS IN PYTHON

#### Reading Json file from local. Note that using with open automatically closes the file upon execution.

In [212]:
# file is closed after execution. Path is declared on the 'with open' as applicable. 
with open(r"C:/Users/Datascience/Desktop/apple.json") as json_file:
    apple_info = json.load(json_file)

In [213]:
# checking the format of the json file.
apple_info

{'zip': '95014',
 'sector': 'Technology',
 'fullTimeEmployees': 100000,
 'longBusinessSummary': 'Apple Inc. designs, manufactures, and markets smartphones, personal computers, tablets, wearables, and accessories worldwide. It also sells various related services. In addition, the company offers iPhone, a line of smartphones; Mac, a line of personal computers; iPad, a line of multi-purpose tablets; AirPods Max, an over-ear wireless headphone; and wearables, home, and accessories comprising AirPods, Apple TV, Apple Watch, Beats products, HomePod, and iPod touch. Further, it provides AppleCare support services; cloud services store services; and operates various platforms, including the App Store that allow customers to discover and download applications and digital content, such as books, music, video, games, and podcasts. Additionally, the company offers various services, such as Apple Arcade, a game subscription service; Apple Music, which offers users a curated listening experience wit

In [214]:
# converting json into dataframe as DF needs data to be a list, hence the square bracket
df_json = pd.DataFrame([apple_info])

In [215]:
df_json.head()

Unnamed: 0,zip,sector,fullTimeEmployees,longBusinessSummary,city,phone,state,country,companyOfficers,website,...,fiveYearAvgDividendYield,fiftyTwoWeekLow,bid,tradeable,dividendYield,bidSize,dayHigh,regularMarketPrice,preMarketPrice,logo_url
0,95014,Technology,100000,"Apple Inc. designs, manufactures, and markets ...",Cupertino,408 996 1010,CA,United States,[],https://www.apple.com,...,1.13,122.25,178.4,False,0.005,3200,179.61,177.77,178.38,https://logo.clearbit.com/apple.com


In [216]:
#### Reading Json file from url

In [217]:
url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/data/apple.json"

In [218]:
df_json_url= pd.DataFrame([apple_info])

In [219]:
df_json_url.head()

Unnamed: 0,zip,sector,fullTimeEmployees,longBusinessSummary,city,phone,state,country,companyOfficers,website,...,fiveYearAvgDividendYield,fiftyTwoWeekLow,bid,tradeable,dividendYield,bidSize,dayHigh,regularMarketPrice,preMarketPrice,logo_url
0,95014,Technology,100000,"Apple Inc. designs, manufactures, and markets ...",Cupertino,408 996 1010,CA,United States,[],https://www.apple.com,...,1.13,122.25,178.4,False,0.005,3200,179.61,177.77,178.38,https://logo.clearbit.com/apple.com


#### Reading csv file from local 

In [220]:
df_csv = pd.read_csv(r"C:/Users/Datascience/Desktop/addresses.csv", header=None)

In [221]:
df_csv.columns = ['First Name', 'Last Name', 'Location ', 'City','State','Area Code']

In [222]:
df_csv.head()

Unnamed: 0,First Name,Last Name,Location,City,State,Area Code
0,John,Doe,120 jefferson st.,Riverside,NJ,8075
1,Jack,McGinnis,220 hobo Av.,Phila,PA,9119
2,"John ""Da Man""",Repici,120 Jefferson St.,Riverside,NJ,8075
3,Stephen,Tyler,"7452 Terrace ""At the Plaza"" road",SomeTown,SD,91234
4,,Blankman,,SomeTown,SD,298


In [223]:
# Select the first row of the dataframe
df_csv.loc[0]

First Name                 John
Last Name                   Doe
Location      120 jefferson st.
City                  Riverside
State                        NJ
Area Code                  8075
Name: 0, dtype: object

In [224]:
# Select the 0th,1st and 2nd row of "First Name" column only
df_csv.loc[[0,1,2], "First Name" ]

0             John
1             Jack
2    John "Da Man"
Name: First Name, dtype: object

In [225]:
# Select the 0th,1st and 2nd row of "First Name" column only
df_csv.iloc[[0,1,2], 0]

0             John
1             Jack
2    John "Da Man"
Name: First Name, dtype: object

In [226]:
#### Reading csv file from url

In [227]:
# read csv data from different urls into pandas dataframes
census = pd.read_csv("https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DB0201EN-SkillsNetwork/labs/FinalModule_Coursera_V5/data/ChicagoCensusData.csv?utm_medium=Exinfluencer&utm_source=Exinfluencer&utm_content=000026UJ&utm_term=10006555&utm_id=NA-SkillsNetwork-Channel-SkillsNetworkCoursesIBMDeveloperSkillsNetworkDB0201ENSkillsNetwork20127838-2021-01-01")

In [228]:
census.head()

Unnamed: 0,COMMUNITY_AREA_NUMBER,COMMUNITY_AREA_NAME,PERCENT_OF_HOUSING_CROWDED,PERCENT_HOUSEHOLDS_BELOW_POVERTY,PERCENT_AGED_16__UNEMPLOYED,PERCENT_AGED_25__WITHOUT_HIGH_SCHOOL_DIPLOMA,PERCENT_AGED_UNDER_18_OR_OVER_64,PER_CAPITA_INCOME,HARDSHIP_INDEX
0,1.0,Rogers Park,7.7,23.6,8.7,18.2,27.5,23939,39.0
1,2.0,West Ridge,7.8,17.2,8.8,20.8,38.5,23040,46.0
2,3.0,Uptown,3.8,24.0,8.9,11.8,22.2,35787,20.0
3,4.0,Lincoln Square,3.4,10.9,8.2,13.4,25.5,37524,17.0
4,5.0,North Center,0.3,7.5,5.2,4.5,26.2,57123,6.0


#### Creating a dataframe from a numpy array

In [229]:
df_np=pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), columns=['a', 'b', 'c'])
df_np

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


In [230]:
# applying the transform function to the created dataframe
df_np = df_np.transform(func = lambda x : x + 10)
df_np

Unnamed: 0,a,b,c
0,11,12,13
1,14,15,16
2,17,18,19


In [231]:
result = df_np.transform(func = lambda x : x**(1/2))

In [232]:
result

Unnamed: 0,a,b,c
0,3.316625,3.464102,3.605551
1,3.741657,3.872983,4.0
2,4.123106,4.242641,4.358899


#### Reading xml file from local using etree

In [233]:
tree = etree.parse(r"C:/Users/Datascience/Desktop/Sample.xml")

In [234]:
tree

<xml.etree.ElementTree.ElementTree at 0x1ee883cc450>

In [235]:
# getroot

In [236]:
root = tree.getroot()

In [237]:
root

<Element 'catalog' at 0x000001EE883C35B0>

In [238]:
columns = ["author", "title", "genre", "price", "publish_date","description"]

In [239]:
df_tree = pd.DataFrame(columns = columns)

In [240]:
df_tree

Unnamed: 0,author,title,genre,price,publish_date,description


In [241]:
users =[] ## create a list
for node in root: 
    author = node.find("author").text
    title = node.find("title").text 
    genre = node.find("genre").text     
    price = node.find("price").text    
    publish_date = node.find("publish_date").text  
    description = node.find("description").text
    users.append({"author":author,"title":title,"genre":genre,"price":price,"publish_date":publish_date,"description":description})


In [242]:
df_tree = pd.DataFrame(users)

In [243]:
df_tree

Unnamed: 0,author,title,genre,price,publish_date,description
0,"Gambardella, Matthew",XML Developer's Guide,Computer,44.95,2000-10-01,An in-depth look at creating applications \n ...
1,"Ralls, Kim",Midnight Rain,Fantasy,5.95,2000-12-16,"A former architect battles corporate zombies, ..."
2,"Corets, Eva",Maeve Ascendant,Fantasy,5.95,2000-11-17,After the collapse of a nanotechnology \n ...
3,"Corets, Eva",Oberon's Legacy,Fantasy,5.95,2001-03-10,"In post-apocalypse England, the mysterious \n ..."
4,"Corets, Eva",The Sundered Grail,Fantasy,5.95,2001-09-10,"The two daughters of Maeve, half-sisters, \n ..."
5,"Randall, Cynthia",Lover Birds,Romance,4.95,2000-09-02,When Carla meets Paul at an ornithology \n ...
6,"Thurman, Paula",Splish Splash,Romance,4.95,2000-11-02,A deep sea diver finds true love twenty \n ...
7,"Knorr, Stefan",Creepy Crawlies,Horror,4.95,2000-12-06,"An anthology of horror stories about roaches,\..."
8,"Kress, Peter",Paradox Lost,Science Fiction,6.95,2000-11-02,After an inadvertant trip through a Heisenberg...
9,"O'Brien, Tim",Microsoft .NET: The Programming Bible,Computer,36.95,2000-12-09,Microsoft's .NET initiative is explored in \n ...


#### Reading xml file from local using pandas

In [244]:
df_xml = pd.read_xml(r"C:/Users/Datascience/Desktop/Sample.xml", xpath="/catalog/book") ## reading xml file using pandas

In [245]:
df_xml

Unnamed: 0,id,author,title,genre,price,publish_date,description
0,bk101,"Gambardella, Matthew",XML Developer's Guide,Computer,44.95,2000-10-01,An in-depth look at creating applications \n ...
1,bk102,"Ralls, Kim",Midnight Rain,Fantasy,5.95,2000-12-16,"A former architect battles corporate zombies, ..."
2,bk103,"Corets, Eva",Maeve Ascendant,Fantasy,5.95,2000-11-17,After the collapse of a nanotechnology \n ...
3,bk104,"Corets, Eva",Oberon's Legacy,Fantasy,5.95,2001-03-10,"In post-apocalypse England, the mysterious \n ..."
4,bk105,"Corets, Eva",The Sundered Grail,Fantasy,5.95,2001-09-10,"The two daughters of Maeve, half-sisters, \n ..."
5,bk106,"Randall, Cynthia",Lover Birds,Romance,4.95,2000-09-02,When Carla meets Paul at an ornithology \n ...
6,bk107,"Thurman, Paula",Splish Splash,Romance,4.95,2000-11-02,A deep sea diver finds true love twenty \n ...
7,bk108,"Knorr, Stefan",Creepy Crawlies,Horror,4.95,2000-12-06,"An anthology of horror stories about roaches,\..."
8,bk109,"Kress, Peter",Paradox Lost,Science Fiction,6.95,2000-11-02,After an inadvertant trip through a Heisenberg...
9,bk110,"O'Brien, Tim",Microsoft .NET: The Programming Bible,Computer,36.95,2000-12-09,Microsoft's .NET initiative is explored in \n ...


### SAVING DIFFERENT FILE FORMATS IN PYTHON. 

In [246]:
# all files are saved on the local desktop folder. 

In [247]:
df_json.to_json(r"C:/Users/Datascience/Desktop/json1.json", index = False)

In [248]:
df_csv.to_csv(r"C:/Users/Datascience/Desktop/csv1.csv", index = False)

In [249]:
df_xml.to_xml(r"C:/Users/Datascience/Desktop/xml1.xml", index = False)

### Author: Paul John Julongbayan