###  Reading and Writing Text Files

In [None]:
import numpy as np
from pandas import Series,DataFrame
import pandas as pd

In [None]:
sample = pd.read_csv('sample_data/sample.csv') # read a csv file as dataframe
sample

Unnamed: 0,country,designation,points,province
0,Italy,Vulkˆ Bianco,87,Sicily & Sardinia
1,Portugal,Avidagos,87,Douro
2,US,,87,Oregon
3,US,Reserve Late Harvest,87,Michigan
4,US,Vintner's Reserve Wild Child Block,87,Oregon


In [None]:
sample = pd.read_csv('sample_data/sample.csv',header=None) # If we don't want the header to be the first row
sample

Unnamed: 0,0,1,2,3
0,country,designation,points,province
1,Italy,Vulkˆ Bianco,87,Sicily & Sardinia
2,Portugal,Avidagos,87,Douro
3,US,,87,Oregon
4,US,Reserve Late Harvest,87,Michigan
5,US,Vintner's Reserve Wild Child Block,87,Oregon


In [None]:
pd.read_csv('sample_data/sample.csv',header=None, nrows=2) # We can also indicate a particular number of rows to be read

Unnamed: 0,0,1,2,3
0,country,designation,points,province
1,Italy,Vulkˆ Bianco,87,Sicily & Sardinia


In [None]:
sample.to_csv('sample_data/sample_output.csv') # Now let's see how we can write DataFrames out to csv files

In [None]:
import sys

In [None]:
sample.to_csv(sys.stdout,sep='_') # Use sys.stdout to see the output directly and not save it

_0_1_2_3
0_country_designation_points_province
1_Italy_Vulkˆ Bianco_87_Sicily & Sardinia
2_Portugal_Avidagos_87_Douro
3_US__87_Oregon
4_US_Reserve Late Harvest_87_Michigan
5_US_Vintner's Reserve Wild Child Block_87_Oregon


In [None]:
sample.to_csv(sys.stdout,sep='/') # can use any sep in place of comma(,)

/0/1/2/3
0/country/designation/points/province
1/Italy/Vulkˆ Bianco/87/Sicily & Sardinia
2/Portugal/Avidagos/87/Douro
3/US//87/Oregon
4/US/Reserve Late Harvest/87/Michigan
5/US/Vintner's Reserve Wild Child Block/87/Oregon


In [None]:
sample.to_csv(sys.stdout,columns=['country','designation','points']) # save only selected columns

,country,designation,points
0,Italy,Vulkˆ Bianco,87
1,Portugal,Avidagos,87
2,US,,87
3,US,Reserve Late Harvest,87
4,US,Vintner's Reserve Wild Child Block,87


### Read from Google Drive

In [None]:
import pandas as pd
from google.colab import drive # Import the drive module to access Google Drive in Colab


In [None]:
# Mount Google Drive to access files

drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
path = '/content/drive/MyDrive/sample_data/sales_data.csv' # Specify the path to the CSV file in Google Drive

sales = pd.read_csv(path) # read the csv file as dataframe
sales.head()


Unnamed: 0,Product,Quantity Sold
0,Widget A,50
1,Widget B,30
2,Widget C,20
3,Widget D,40
4,Widget E,60


## **Extra:**

### Excel with Python

In [None]:
!pip install openpyxl



In [None]:
import pandas as pd

In [None]:
# Open the excel file as an object
xlsfile = pd.ExcelFile('sample_data/sample.xlsx')

In [None]:
# Parse the first sheet of the excel file and set as DataFrame
excel_df = xlsfile.parse('sample')
excel_df

Unnamed: 0,country,designation,points,province
0,Italy,Vulkˆ Bianco,87,Sicily & Sardinia
1,Portugal,Avidagos,87,Douro
2,US,,87,Oregon
3,US,Reserve Late Harvest,87,Michigan
4,US,Vintner's Reserve Wild Child Block,87,Oregon


### HTML with Python

In [None]:
from pandas import read_html
import pandas as pd
from pandas import Series, DataFrame

In [None]:
url_mcc = "https://en.wikipedia.org/wiki/Mobile_country_code"

In [None]:
webpage_list = pd.io.html.read_html(url_mcc) # returns a list of dataframes

In [None]:
webpage_df = webpage_list[0] # first dataframe in the list
webpage_df.head()

Unnamed: 0,MCC,MNC,Brand,Operator,Status,Bands (MHz),References and notes
0,1,1,TEST,Test network,Operational,any,
1,1,1,TEST,Test network,Operational,any,
2,999,99,,Internal use,Operational,any,"Internal use in private networks, no roaming[4]"
3,999,999,,Internal use,Operational,any,"Internal use in private networks, no roaming[4]"


In [None]:
webpage_df.columns

Index(['MCC', 'MNC', 'Brand', 'Operator', 'Status', 'Bands (MHz)',
       'References and notes'],
      dtype='object')

### JSON with Python

In [None]:
import numpy as np
from pandas import Series, DataFrame
import pandas as pd

In [None]:
json_data = {
  "_id": {
    "$oid": "5968dd23fc13ae04d9000001"
  },
  "product_name": "sildenafil citrate",
  "supplier": "Wisozk Inc",
  "quantity": 261,
  "unit_cost": "$10.47"
}, {
  "_id": {
    "$oid": "5968dd23fc13ae04d9000002"
  },
  "product_name": "Mountain Juniperus ashei",
  "supplier": "Keebler-Hilpert",
  "quantity": 292,
  "unit_cost": "$8.74"
}

In [None]:
import json

In [None]:
json.dumps(json_data) # convert to json object

'[{"_id": {"$oid": "5968dd23fc13ae04d9000001"}, "product_name": "sildenafil citrate", "supplier": "Wisozk Inc", "quantity": 261, "unit_cost": "$10.47"}, {"_id": {"$oid": "5968dd23fc13ae04d9000002"}, "product_name": "Mountain Juniperus ashei", "supplier": "Keebler-Hilpert", "quantity": 292, "unit_cost": "$8.74"}]'

In [None]:
json_df = DataFrame(json_data) # create a dataframe from json data
json_df

Unnamed: 0,_id,product_name,supplier,quantity,unit_cost
0,{'$oid': '5968dd23fc13ae04d9000001'},sildenafil citrate,Wisozk Inc,261,$10.47
1,{'$oid': '5968dd23fc13ae04d9000002'},Mountain Juniperus ashei,Keebler-Hilpert,292,$8.74
