# 02 - Load and Save

## Objectives:
- Importing data
- Exporting data

## Import Package

In [1]:
# Import package as pandas does not ship along as native package
# use pip install pandas if not installed yet, or follow setup guide

import pandas as pd
import matplotlib.pyplot as plt

# for creating file directory
import os

## Importing Data

In [2]:
# Importing data
# pandas provide API calls to easily load almost any format
# of file into pandas DataFrame
csv_df = pd.read_csv(
    "https://raw.githubusercontent.com/pandas-dev/pandas/master/doc/data/titanic.csv"
)
csv_df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [3]:
excel_df = pd.read_excel("https://github.com/KianYang-Lee/pandas-tutorial/blob/main/datasets/airline.xls?raw=true")
excel_df.head()

Unnamed: 0,YEAR,Y,W,R,L,K
0,1948,1.214,0.243,0.1454,1.415,0.612
1,1949,1.354,0.26,0.2181,1.384,0.559
2,1950,1.569,0.278,0.3157,1.388,0.573
3,1951,1.948,0.297,0.394,1.55,0.564
4,1952,2.265,0.31,0.3559,1.802,0.574


In [4]:
json_df = pd.read_json(
    "https://raw.githubusercontent.com/KianYang-Lee/pandas-tutorial/main/datasets/ct.json",
    lines=True,
)
json_df.head()

Unnamed: 0,state,postcode,street,district,unit,location,region,number,city
0,CT,6457,Country Club Rd,,,"{'type': 'Point', 'coordinates': [-72.7277847,...",Middlesex,1111,Middletown
1,CT,6037,Parish Dr,,,"{'type': 'Point', 'coordinates': [-72.7738706,...",Hartford,51,Berlin
2,CT,6037,Stockings Brook Rd,,,"{'type': 'Point', 'coordinates': [-72.8102478,...",Hartford,90,Berlin
3,CT,6037,Lamentation Dr,,,"{'type': 'Point', 'coordinates': [-72.7450054,...",Hartford,99,Berlin
4,CT,6037,Lamentation Dr,,,"{'type': 'Point', 'coordinates': [-72.7406975,...",Hartford,207,Berlin


## Exporting data

In [5]:
# exporting data into respective file format
try:
    csv_df.to_csv("exports/Titanic_extracted.csv")
except FileNotFoundError as e:
    os.mkdir(os.getcwd() + "/exports")
    csv_df.to_csv("exports/Titanic_extracted.csv")

In [6]:
csv_df.to_excel(
    "./exports/Titanic_extracted.xlsx", sheet_name="passengers", index=False
)

In [7]:
excel_df.to_csv("./exports/airlines_extracted.csv")

In [8]:
json_df.to_json("./exports/ct_extracted.json")

## Intricacies

In [9]:
# read .xlsx file
excelx_df = pd.read_excel("./exports/Titanic_extracted.xlsx", sheet_name="passengers")

## Practice Section

1. Export the `DataFrame` that you have created in Chapter 1 to the `exports/` folder in the form of `csv` file.
2. Import the `csv` file created in Practice 1 into this environment.

## Useful resources
- https://www.kdnuggets.com/datasets/index.html
- https://github.com/awesomedata/awesome-public-datasets
- ct.json dataset source: https://www.bmc.com/blogs/pandas-read-json-csv-files/
- airlines.csv dataset source: http://www.principlesofeconometrics.com/excel.htm
- Pokemon.csv dataset source: https://www.kaggle.com/abcsds/pokemon
- Titanic.csv dataset source: https://github.com/pandas-dev/pandas/blob/master/doc/data/titanic.csv

**Copyright (C) 2021  Lee Kian Yang**

This program is licensed under MIT license.